nsz Git - libfirm/blob - ir/opt/opt_osr.c

   1 /**
   2  * Project:     libFIRM
   3  * File name:   ir/opt/opt_osr.
   4  * Purpose:     Operator Strength Reduction,
   5  *              Keith D. Cooper, L. Taylor Simpson, Christopher A. Vick
   6  * Author:      Michael Beck
   7  * Modified by:
   8  * Created:     12.5.2006
   9  * CVS-ID:      $Id$
  10  * Copyright:   (c) 2006 Universität Karlsruhe
  11  * Licence:     This file protected by GPL -  GNU GENERAL PUBLIC LICENSE.
  12  */
  13 #ifdef HAVE_CONFIG_H
  14 #include "config.h"
  15 #endif
  16
  17 #ifdef HAVE_MALLOC_H
  18 #include <malloc.h>
  19 #endif
  20 #ifdef HAVE_ALLOCA_H
  21 #include <alloca.h>
  22 #endif
  23
  24 #include "opt_osr.h"
  25 #include "irgraph.h"
  26 #include "ircons.h"
  27 #include "irop_t.h"
  28 #include "irloop.h"
  29 #include "irdom.h"
  30 #include "irgmod.h"
  31 #include "irflag_t.h"
  32 #include "irgwalk.h"
  33 #include "irouts.h"
  34 #include "debug.h"
  35 #include "obst.h"
  36 #include "set.h"
  37 #include "tv.h"
  38 #include "hashptr.h"
  39 #include "irtools.h"
  40 #include "array.h"
  41 #include "firmstat.h"
  42
  43 /** The debug handle. */
  44 DEBUG_ONLY(static firm_dbg_module_t *dbg;)
  45
  46 /** A scc. */
  47 typedef struct scc {
  48         ir_node *head;          /**< the head of the list */
  49 } scc;
  50
  51 /** A node entry */
  52 typedef struct node_entry {
  53         unsigned DFSnum;    /**< the DFS number of this node */
  54         unsigned low;       /**< the low number of this node */
  55         ir_node  *header;   /**< the header of this node */
  56         int      in_stack;  /**< flag, set if the node is on the stack */
  57         ir_node  *next;     /**< link to the next node the the same scc */
  58         scc      *pscc;     /**< the scc of this node */
  59         unsigned POnum;     /**< the post order number for blocks */
  60 } node_entry;
  61
  62 /** The environment. */
  63 typedef struct iv_env {
  64         struct obstack obst;    /**< an obstack for allocations */
  65         ir_node  **stack;       /**< the node stack */
  66         int      tos;           /**< tos index */
  67         unsigned nextDFSnum;    /**< the current DFS number */
  68         unsigned POnum;         /**< current post order number */
  69         set      *quad_map;     /**< a map from (op, iv, rc) to node */
  70         set      *lftr_edges;   /**< the set of lftr edges */
  71         unsigned replaced;      /**< number of replaced ops */
  72         unsigned lftr_replaced; /**< number of applied linear function test replacements */
  73         unsigned flags;         /**< additional flags */
  74 } iv_env;
  75
  76 /**
  77  * An entry in the (op, node, node) -> node map.
  78  */
  79 typedef struct quad_t {
  80         opcode  code;  /**< the opcode of the reduced operation */
  81         ir_node *op1;  /**< the first operand the reduced operation */
  82         ir_node *op2;  /**< the second operand of the reduced operation */
  83
  84         ir_node *res; /**< the reduced operation */
  85 } quad_t;
  86
  87 /**
  88  * A LFTR edge.
  89  */
  90 typedef struct LFTR_edge {
  91         ir_node *src;   /**< the source node */
  92         ir_node *dst;   /**< the destination node */
  93         opcode  code;   /**< the opcode that must be applied */
  94         ir_node *rc;    /**< the region const that must be applied */
  95 } LFTR_edge;
  96
  97 /* forward */
  98 static ir_node *reduce(ir_node *orig, ir_node *iv, ir_node *rc, iv_env *env);
  99
 100 /**
 101  * Compare two LFTR edges.
 102  */
 103 static int LFTR_cmp(const void *e1, const void *e2, size_t size) {
 104         const LFTR_edge *l1 = e1;
 105         const LFTR_edge *l2 = e2;
 106
 107         return l1->src != l2->src;
 108 }
 109
 110 /**
 111  * Find a LFTR edge.
 112  */
 113 static LFTR_edge *LFTR_find(ir_node *src, iv_env *env) {
 114         LFTR_edge key;
 115
 116         key.src  = src;
 117
 118         return set_find(env->lftr_edges, &key, sizeof(key), HASH_PTR(src));
 119 }
 120
 121 /**
 122  * Add a LFTR edge.
 123  */
 124 static void LFTR_add(ir_node *src, ir_node *dst, opcode code, ir_node *rc, iv_env *env) {
 125         LFTR_edge key;
 126
 127         key.src  = src;
 128         key.dst  = dst;
 129         key.code = code;
 130         key.rc   = rc;
 131
 132         /*
 133          * There might be more than one edge here. This is rather bad
 134          * because we currently store only one.
 135          */
 136 //      assert(LFTR_find(src, env) == NULL);
 137         set_insert(env->lftr_edges, &key, sizeof(key), HASH_PTR(src));
 138 }
 139
 140 /**
 141  * Gets the node_entry of a node
 142  */
 143 static node_entry *get_irn_ne(ir_node *irn, iv_env *env) {
 144         node_entry *e = get_irn_link(irn);
 145
 146         if (! e) {
 147                 e = obstack_alloc(&env->obst, sizeof(*e));
 148                 memset(e, 0, sizeof(*e));
 149                 set_irn_link(irn, e);
 150         }
 151         return e;
 152 }
 153
 154 /**
 155  * Check if irn is an IV.
 156  *
 157  * @param irn  the node to check
 158  * @param env  the environment
 159  *
 160  * @returns the header if it is one, NULL else
 161  */
 162 static ir_node *is_iv(ir_node *irn, iv_env *env) {
 163         return get_irn_ne(irn, env)->header;
 164 }
 165
 166 /**
 167  * Check if irn is a region constant.
 168  * The block or irn must strictly dominate the header block.
 169  *
 170  * @param irn           the node to check
 171  * @param header_block  the header block of the induction variable
 172  */
 173 static int is_rc(ir_node *irn, ir_node *header_block) {
 174         ir_node *block = get_nodes_block(irn);
 175
 176         return (block != header_block) && block_dominates(block, header_block);
 177 }
 178
 179 /**
 180  * Set compare function for the quad set.
 181  */
 182 static int quad_cmp(const void *e1, const void *e2, size_t size) {
 183         const quad_t *c1 = e1;
 184         const quad_t *c2 = e2;
 185
 186         return c1->code != c2->code || c1->op1 != c2->op1 || c1->op2 != c2->op2;
 187 }
 188
 189 /**
 190  * Check if an reduced operation was already calculated.
 191  *
 192  * @param code  the opcode of the operation
 193  * @param op1   the first operand of the operation
 194  * @param op2   the second operand of the operation
 195  * @param env   the environment
 196  *
 197  * @return the already reduced node or NULL if this operation is not yet reduced
 198  */
 199 static ir_node *search(opcode code, ir_node *op1, ir_node *op2, iv_env *env) {
 200         quad_t key, *entry;
 201
 202         key.code = code;
 203         key.op1 = op1;
 204         key.op2 = op2;
 205
 206         entry = set_find(env->quad_map, &key, sizeof(key),
 207                          (code * 9) ^ HASH_PTR(op1) ^HASH_PTR(op2));
 208         if (entry)
 209                 return entry->res;
 210         return NULL;
 211 }
 212
 213 /**
 214  * Add an reduced operation.
 215  *
 216  * @param code    the opcode of the operation
 217  * @param op1     the first operand of the operation
 218  * @param op2     the second operand of the operation
 219  * @param result  the result of the reduced operation
 220  * @param env     the environment
 221  */
 222 static void add(opcode code, ir_node *op1, ir_node *op2, ir_node *result, iv_env *env) {
 223         quad_t key;
 224
 225         key.code = code;
 226         key.op1  = op1;
 227         key.op2  = op2;
 228         key.res  = result;
 229
 230         set_insert(env->quad_map, &key, sizeof(key),
 231                    (code * 9) ^ HASH_PTR(op1) ^HASH_PTR(op2));
 232 }
 233
 234 /**
 235  * Find a location where to place a bin-op whose operands are in
 236  * block1 and block2.
 237  *
 238  * @param block1  the block of the first operand
 239  * @param block2  the block of the second operand
 240  *
 241  * Note that we know here that such a place must exists. Moreover, this means
 242  * that either block1 dominates block2 or vice versa. So, just return
 243  * the "smaller" one.
 244  */
 245 static ir_node *find_location(ir_node *block1, ir_node *block2) {
 246         if (block_dominates(block1, block2))
 247                 return block2;
 248         assert(block_dominates(block2, block1));
 249         return block1;
 250 }
 251
 252 /**
 253  * Create a node that executes an op1 code op1 operation.
 254  *
 255  * @param code   the opcode to execute
 256  * @param db     debug info to add to the new node
 257  * @param op1    the first operand
 258  * @param op2    the second operand
 259  * @param mode   the mode of the new operation
 260  *
 261  * @return the newly created node
 262  */
 263 static ir_node *do_apply(opcode code, dbg_info *db, ir_node *op1, ir_node *op2, ir_mode *mode) {
 264         ir_graph *irg = current_ir_graph;
 265         ir_node *result;
 266         ir_node *block = find_location(get_nodes_block(op1), get_nodes_block(op2));
 267
 268         switch (code) {
 269         case iro_Mul:
 270                 result = new_rd_Mul(db, irg, block, op1, op2, mode);
 271                 break;
 272         case iro_Add:
 273                 result = new_rd_Add(db, irg, block, op1, op2, mode);
 274                 break;
 275         case iro_Sub:
 276                 result = new_rd_Sub(db, irg, block, op1, op2, mode);
 277                 break;
 278         default:
 279                 assert(0);
 280                 result = NULL;
 281         }
 282         return result;
 283 }
 284
 285 /**
 286  * The Apply operation.
 287  *
 288  * @param orig   the node that represent the original operation and determines
 289  *               the opcode, debug-info and mode of a newly created one
 290  * @param op1    the first operand
 291  * @param op2    the second operand
 292  * @param env     the environment
 293  *
 294  * @return the newly created node
 295  */
 296 static ir_node *apply(ir_node *orig, ir_node *op1, ir_node *op2, iv_env *env) {
 297         opcode code = get_irn_opcode(orig);
 298         ir_node *result = search(code, op1, op2, env);
 299
 300         if (! result) {
 301                 dbg_info *db = get_irn_dbg_info(orig);
 302                 ir_node *op1_header = get_irn_ne(op1, env)->header;
 303                 ir_node *op2_header = get_irn_ne(op2, env)->header;
 304
 305                 if (op1_header != NULL && is_rc(op2, op1_header)) {
 306                         result = reduce(orig, op1, op2, env);
 307                 }
 308                 else if (op2_header != NULL && is_rc(op1, op2_header)) {
 309                         result = reduce(orig, op2, op1, env);
 310                 }
 311                 else {
 312                         result = do_apply(code, db, op1, op2, get_irn_mode(orig));
 313                         get_irn_ne(result, env)->header = NULL;
 314                 }
 315         }
 316         return result;
 317 }
 318
 319 /**
 320  * The Reduce operation.
 321  *
 322  * @param orig   the node that represent the original operation and determines
 323  *               the opcode, debug-info and mode of a newly created one
 324  * @param iv     the induction variable
 325  * @param rc     the region constant
 326  * @param env    the environment
 327  *
 328  * @return the reduced node
 329  */
 330 static ir_node *reduce(ir_node *orig, ir_node *iv, ir_node *rc, iv_env *env) {
 331         opcode code = get_irn_opcode(orig);
 332         ir_node *result = search(code, iv, rc, env);
 333
 334         if (! result) {
 335                 node_entry *e, *iv_e;
 336                 int i, n;
 337                 ir_mode *mode = get_irn_mode(orig);
 338
 339                 result = exact_copy(iv);
 340
 341                 /* Beware: we must always create a new nduction variable with the same mode
 342                    as the node we are replacing. Espicially this means the mode might be changed
 343                    from P to I and back. This is always possible, because we have only Phi, Add
 344                    and Sub nodes. */
 345                 set_irn_mode(result, mode);
 346                 add(code, iv, rc, result, env);
 347                 DB((dbg, LEVEL_3, "   Created new %+F for %+F (%s %+F)\n", result, iv,
 348                         get_irn_opname(orig), rc));
 349
 350                 iv_e = get_irn_ne(iv, env);
 351                 e    = get_irn_ne(result, env);
 352                 e->header = iv_e->header;
 353
 354                 /* create the LFTR edge */
 355                 LFTR_add(iv, result, code, rc, env);
 356
 357                 n = get_irn_arity(result);
 358                 for (i = 0; i < n; ++i) {
 359                         ir_node *o = get_irn_n(result, i);
 360
 361                         e = get_irn_ne(o, env);
 362                         if (e->header == iv_e->header)
 363                                 o = reduce(orig, o, rc, env);
 364                         else if (is_Phi(result))
 365                                 o = apply(orig, o, rc, env);
 366                         else {
 367                                 if (code == iro_Mul)
 368                                         o = apply(orig, o, rc, env);
 369                         }
 370                         set_irn_n(result, i, o);
 371                 }
 372         }
 373         else {
 374                 DB((dbg, LEVEL_3, "   Already Created %+F for %+F (%s %+F)\n", result, iv,
 375                         get_irn_opname(orig), rc));
 376         }
 377         return result;
 378 }
 379
 380 /**
 381  * The Replace operation.
 382  *
 383  * @param irn   the node that will be replaced
 384  * @param iv    the induction variable
 385  * @param rc    the region constant
 386  * @param env   the environment
 387  */
 388 static void replace(ir_node *irn, ir_node *iv, ir_node *rc, iv_env *env) {
 389         ir_node *result;
 390
 391         DB((dbg, LEVEL_2, "  Replacing %+F\n", irn));
 392
 393         result = reduce(irn, iv, rc, env);
 394         if (result != irn) {
 395                 node_entry *e, *iv_e;
 396
 397                 hook_strength_red(current_ir_graph, irn);
 398                 exchange(irn, result);
 399                 e = get_irn_ne(result, env);
 400                 iv_e = get_irn_ne(iv, env);
 401                 e->header = iv_e->header;
 402         }
 403 }
 404
 405 /**
 406  * Check if a node can be replaced (+, -, *).
 407  *
 408  * @param irn   the node to check
 409  * @param env   the environment
 410  *
 411  * @return non-zero if irn should be Replace'd
 412  */
 413 static int check_replace(ir_node *irn, iv_env *env) {
 414         ir_node *left, *right, *iv, *rc;
 415         ir_op   *op  = get_irn_op(irn);
 416         opcode  code = get_op_code(op);
 417         ir_node *liv, *riv;
 418
 419         switch (code) {
 420         case iro_Mul:
 421         case iro_Add:
 422         case iro_Sub:
 423                 iv = rc = NULL;
 424
 425                 left  = get_binop_left(irn);
 426                 right = get_binop_right(irn);
 427
 428                 liv = is_iv(left, env);
 429                 riv = is_iv(right, env);
 430                 if (liv && is_rc(right, liv)) {
 431                         iv = left; rc = right;
 432                 }
 433                 else if (riv && is_op_commutative(op) &&
 434                                     is_rc(left, riv)) {
 435                         iv = right; rc = left;
 436                 }
 437
 438                 if (iv) {
 439                         replace(irn, iv, rc, env);
 440                         ++env->replaced;
 441                         return 1;
 442                 }
 443                 break;
 444         default:
 445                 break;
 446         }
 447         return 0;
 448 }
 449
 450 /**
 451  * Check which SCC's are induction variables.
 452  *
 453  * @param pscc  a SCC
 454  * @param env   the environment
 455  */
 456 static void classify_iv(scc *pscc, iv_env *env) {
 457         ir_node *irn, *next, *header = NULL;
 458         node_entry *b, *h = NULL;
 459         int j, only_phi, num_outside;
 460         ir_node *out_rc;
 461
 462         /* find the header block for this scc */
 463         for (irn = pscc->head; irn; irn = next) {
 464                 node_entry *e = get_irn_link(irn);
 465                 ir_node *block = get_nodes_block(irn);
 466
 467                 next = e->next;
 468                 b = get_irn_ne(block, env);
 469
 470                 if (header) {
 471                         if (h->POnum < b->POnum) {
 472                                 header = block;
 473                                 h      = b;
 474                         }
 475                 }
 476                 else {
 477                         header = block;
 478                         h      = b;
 479                 }
 480         }
 481
 482         /* check if this scc contains only Phi, Add or Sub nodes */
 483         only_phi    = 1;
 484         num_outside = 0;
 485         out_rc      = NULL;
 486         for (irn = pscc->head; irn; irn = next) {
 487                 node_entry *e = get_irn_ne(irn, env);
 488
 489                 next = e->next;
 490                 switch (get_irn_opcode(irn)) {
 491                 case iro_Add:
 492                 case iro_Sub:
 493                         only_phi = 0;
 494                         /* fall through */
 495                 case iro_Phi:
 496                         for (j = get_irn_arity(irn) - 1; j >= 0; --j) {
 497                                 ir_node *pred  = get_irn_n(irn, j);
 498                                 node_entry *pe = get_irn_ne(pred, env);
 499
 500                                 if (pe->pscc != e->pscc) {
 501                                         /* not in the same SCC, must be a region const */
 502                                         if (! is_rc(pred, header)) {
 503                                                 /* not an induction variable */
 504                                                 goto fail;
 505                                         }
 506                                         if (! out_rc) {
 507                                                 out_rc = pred;
 508                                                 ++num_outside;
 509                                         }
 510                                         else if (out_rc != pred)
 511                                                 ++num_outside;
 512                                 }
 513                         }
 514                         break;
 515                 default:
 516                         /* not an induction variable */
 517                         goto fail;
 518                 }
 519         }
 520         /* found an induction variable */
 521         DB((dbg, LEVEL_2, "  Found an induction variable:\n  "));
 522         if (only_phi && num_outside == 1) {
 523                 DB((dbg, LEVEL_2, "  Found an USELESS Phi cycle:\n  "));
 524         }
 525
 526         /* set the header for every node in this scc */
 527         for (irn = pscc->head; irn; irn = next) {
 528                 node_entry *e = get_irn_ne(irn, env);
 529                 e->header = header;
 530                 next = e->next;
 531                 DB((dbg, LEVEL_2, " %+F,", irn));
 532         }
 533         DB((dbg, LEVEL_2, "\n"));
 534         return;
 535
 536 fail:
 537         for (irn = pscc->head; irn; irn = next) {
 538                 node_entry *e = get_irn_ne(irn, env);
 539
 540                 next = e->next;
 541                 if (! check_replace(irn, env))
 542                         e->header = NULL;
 543         }
 544 }
 545
 546 /**
 547  * Process a SCC.
 548  *
 549  * @param pscc  the SCC
 550  * @param env   the environment
 551  */
 552 static void process_scc(scc *pscc, iv_env *env) {
 553         ir_node *head = pscc->head;
 554         node_entry *e = get_irn_link(head);
 555
 556 #ifdef DEBUG_libfirm
 557         {
 558                 ir_node *irn, *next;
 559
 560                 DB((dbg, LEVEL_4, " SCC at %p:\n ", pscc));
 561                 for (irn = pscc->head; irn; irn = next) {
 562                         node_entry *e = get_irn_link(irn);
 563
 564                         next = e->next;
 565
 566                         DB((dbg, LEVEL_4, " %+F,", irn));
 567                 }
 568                 DB((dbg, LEVEL_4, "\n"));
 569         }
 570 #endif
 571
 572         if (e->next == NULL) {
 573                 /* this SCC has only a single member */
 574                 check_replace(head, env);
 575         }
 576         else {
 577                 classify_iv(pscc, env);
 578         }
 579 }
 580
 581 /**
 582  * Push a node onto the stack.
 583  *
 584  * @param env   the environment
 585  * @param n     the node to push
 586  */
 587 static void push(iv_env *env, ir_node *n) {
 588         node_entry *e;
 589
 590         if (env->tos == ARR_LEN(env->stack)) {
 591                 int nlen = ARR_LEN(env->stack) * 2;
 592                 ARR_RESIZE(ir_node *, env->stack, nlen);
 593         }
 594         env->stack[env->tos++] = n;
 595         e = get_irn_ne(n, env);
 596         e->in_stack = 1;
 597 }
 598
 599 /**
 600  * pop a node from the stack
 601  *
 602  * @param env   the environment
 603  *
 604  * @return  The topmost node
 605  */
 606 static ir_node *pop(iv_env *env)
 607 {
 608   ir_node *n = env->stack[--env->tos];
 609   node_entry *e = get_irn_ne(n, env);
 610
 611   e->in_stack = 0;
 612   return n;
 613 }
 614
 615 /**
 616  * Do Tarjan's SCC algorithm and drive OSR.
 617  *
 618  * @param irn  start at this node
 619  * @param env  the environment
 620  */
 621 static void dfs(ir_node *irn, iv_env *env)
 622 {
 623         int i, n;
 624         node_entry *node = get_irn_ne(irn, env);
 625
 626         mark_irn_visited(irn);
 627
 628         /* do not put blocks into the scc */
 629         if (is_Block(irn)) {
 630                 n = get_irn_arity(irn);
 631                 for (i = 0; i < n; ++i) {
 632                         ir_node *pred = get_irn_n(irn, i);
 633
 634                         if (irn_not_visited(pred))
 635                                 dfs(pred, env);
 636                 }
 637         }
 638         else {
 639                 ir_node *block = get_nodes_block(irn);
 640
 641                 node->DFSnum = env->nextDFSnum++;
 642                 node->low    = node->DFSnum;
 643                 push(env, irn);
 644
 645                 /* handle the block */
 646                 if (irn_not_visited(block))
 647                         dfs(block, env);
 648
 649                 n = get_irn_arity(irn);
 650                 for (i = 0; i < n; ++i) {
 651                         ir_node *pred = get_irn_n(irn, i);
 652                         node_entry *o = get_irn_ne(pred, env);
 653
 654                         if (irn_not_visited(pred)) {
 655                                 dfs(pred, env);
 656                                 node->low = MIN(node->low, o->low);
 657                         }
 658                         if (o->DFSnum < node->DFSnum && o->in_stack)
 659                                 node->low = MIN(o->DFSnum, node->low);
 660                 }
 661                 if (node->low == node->DFSnum) {
 662                         scc *pscc = obstack_alloc(&env->obst, sizeof(*pscc));
 663                         ir_node *x;
 664
 665                         pscc->head = NULL;
 666                         do {
 667                                 node_entry *e;
 668
 669                                 x = pop(env);
 670                                 e = get_irn_ne(x, env);
 671                                 e->pscc    = pscc;
 672                                 e->next    = pscc->head;
 673                                 pscc->head = x;
 674                         } while (x != irn);
 675
 676                         process_scc(pscc, env);
 677                 }
 678         }
 679 }
 680
 681 /**
 682  * Do the DFS by starting at the End node of a graph.
 683  *
 684  * @param irg  the graph to process
 685  * @param env  the environment
 686  */
 687 static void do_dfs(ir_graph *irg, iv_env *env) {
 688         ir_graph *rem = current_ir_graph;
 689         ir_node *end = get_irg_end(irg);
 690         int i, n;
 691
 692         current_ir_graph = irg;
 693         inc_irg_visited(irg);
 694
 695         /* visit all visible nodes */
 696         dfs(end, env);
 697
 698         /* visit the keep-alives */
 699         n = get_End_n_keepalives(end);
 700         for (i = 0; i < n; ++i) {
 701                 ir_node *ka = get_End_keepalive(end, i);
 702
 703                 if (irn_not_visited(ka))
 704                         dfs(ka, env);
 705         }
 706
 707         current_ir_graph = rem;
 708 }
 709
 710 /**
 711  * Post-block-walker: assign the post-order number.
 712  */
 713 static void assign_po(ir_node *block, void *ctx) {
 714         iv_env *env = ctx;
 715         node_entry *e = get_irn_ne(block, env);
 716
 717         e->POnum = env->POnum++;
 718 }
 719
 720 /**
 721  * Follows the LFTR edges and return the last node in the chain.
 722  *
 723  * @param irn  the node that should be followed
 724  * @param env  the IV environment
 725  *
 726  * @note
 727  * In the current implementation only the last edge is stored, so
 728  * only one chain exists. That's why we might miss some opportunities.
 729  */
 730 static ir_node *followEdges(ir_node *irn, iv_env *env) {
 731         for (;;) {
 732                 LFTR_edge *e = LFTR_find(irn, env);
 733                 if (e)
 734                         irn = e->dst;
 735                 else
 736                         return irn;
 737         }
 738 }
 739
 740 /**
 741  * Apply one LFTR edge operation.
 742  * Return NULL if the transformation cannot be done safely without
 743  * an Overflow.
 744  *
 745  * @param rc   the IV node that should be translated
 746  * @param e    the LFTR edge
 747  * @param env  the IV environment
 748  *
 749  * @return the translated region constant or NULL
 750  *         if the translation was not possible
 751  *
 752  * @note
 753  * In the current implementation only the last edge is stored, so
 754  * only one chain exists. That's why we might miss some opportunities.
 755  */
 756 static ir_node *applyOneEdge(ir_node *rc, LFTR_edge *e, iv_env *env) {
 757         if (env->flags & osr_flag_lftr_with_ov_check) {
 758                 tarval *tv_l, *tv_r, *tv;
 759                 tarval_int_overflow_mode_t ovmode;
 760
 761                 /* overflow can only be decided for Consts */
 762                 if (! is_Const(e->rc)) {
 763                         DB((dbg, LEVEL_4, " = UNKNOWN (%+F)", e->rc));
 764                         return NULL;
 765                 }
 766
 767                 tv_l = get_Const_tarval(rc);
 768                 tv_r = get_Const_tarval(e->rc);
 769
 770                 ovmode = tarval_get_integer_overflow_mode();
 771                 tarval_set_integer_overflow_mode(TV_OVERFLOW_BAD);
 772
 773                 switch (e->code) {
 774                 case iro_Mul:
 775                         tv = tarval_mul(tv_l, tv_r);
 776                         DB((dbg, LEVEL_4, " * %+F", tv_r));
 777                         break;
 778                 case iro_Add:
 779                         tv = tarval_add(tv_l, tv_r);
 780                         DB((dbg, LEVEL_4, " + %+F", tv_r));
 781                         break;
 782                 case iro_Sub:
 783                         tv = tarval_sub(tv_l, tv_r);
 784                         DB((dbg, LEVEL_4, " - %+F", tv_r));
 785                         break;
 786                 default:
 787                         assert(0);
 788                         tv = tarval_bad;
 789                 }
 790                 tarval_set_integer_overflow_mode(ovmode);
 791
 792                 if (tv == tarval_bad) {
 793                         DB((dbg, LEVEL_4, " = OVERFLOW"));
 794                         return NULL;
 795                 }
 796                 return new_r_Const(current_ir_graph, get_irn_n(rc, -1), get_tarval_mode(tv), tv);
 797         }
 798         return do_apply(e->code, NULL, rc, e->rc, get_irn_mode(rc));
 799 }
 800
 801 /**
 802  * Applies the operations represented by the LFTR edges to a
 803  * region constant and returns the value.
 804  * Return NULL if the transformation cannot be done safely without
 805  * an Overflow.
 806  *
 807  * @param iv   the IV node that starts the LFTR edge chain
 808  * @param rc   the region constant that should be translated
 809  * @param env  the IV environment
 810  *
 811  * @return the translated region constant or NULL
 812  *         if the translation was not possible
 813  */
 814 static ir_node *applyEdges(ir_node *iv, ir_node *rc, iv_env *env) {
 815         ir_node *irn = iv;
 816
 817         if (env->flags & osr_flag_lftr_with_ov_check) {
 818                 /* overflow can only be decided for Consts */
 819                 if (! is_Const(rc)) {
 820                         DB((dbg, LEVEL_4, " = UNKNOWN (%+F)\n", rc));
 821                         return NULL;
 822                 }
 823                 DB((dbg, LEVEL_4, "%+F", get_Const_tarval(rc)));
 824         }
 825
 826         for (irn = iv; rc;) {
 827                 LFTR_edge *e = LFTR_find(irn, env);
 828                 if (e) {
 829                         rc = applyOneEdge(rc, e, env);
 830                         irn = e->dst;
 831                 }
 832                 else
 833                         break;
 834         }
 835         DB((dbg, LEVEL_3, "\n"));
 836         return rc;
 837 }
 838
 839 /**
 840  * Walker, finds Cmp(iv, rc) or Cmp(rc, iv)
 841  * and tries to optimize them.
 842  */
 843 static void do_lftr(ir_node *cmp, void *ctx) {
 844         iv_env *env = ctx;
 845         ir_node *left, *right, *liv, *riv;
 846         ir_node *iv, *rc;
 847         ir_node *nleft = NULL, *nright = NULL;
 848
 849         if (get_irn_op(cmp) != op_Cmp)
 850                 return;
 851
 852         left  = get_Cmp_left(cmp);
 853         right = get_Cmp_right(cmp);
 854
 855         liv = is_iv(left, env);
 856         riv = is_iv(right, env);
 857         if (liv && is_rc(right, liv)) {
 858                 iv = left; rc = right;
 859
 860                 nright = applyEdges(iv, rc, env);
 861                 if (nright) {
 862                         nleft = followEdges(iv, env);
 863                 }
 864         }
 865         else if (riv && is_rc(left, riv)) {
 866                 iv = right; rc = left;
 867
 868                 nleft = applyEdges(iv, rc, env);
 869                 if (nleft) {
 870                         nright = followEdges(iv, env);
 871                 }
 872         }
 873
 874         if (nleft && nright) {
 875                 DB((dbg, LEVEL_2, "  LFTR for %+F\n", cmp));
 876                 set_Cmp_left(cmp, nleft);
 877                 set_Cmp_right(cmp, nright);
 878                 ++env->lftr_replaced;
 879         }
 880 }
 881
 882 /**
 883  * do linear function test replacement.
 884  *
 885  * @param irg   the graph that should be optimized
 886  * @param env   the IV environment
 887  */
 888 static void lftr(ir_graph *irg, iv_env *env) {
 889         irg_walk_graph(irg, NULL, do_lftr, env);
 890 }
 891
 892 /**
 893  * Pre-walker: set all node links to NULL and fix the
 894  * block of Proj nodes.
 895  */
 896 static void clear_and_fix(ir_node *irn, void *env)
 897 {
 898         set_irn_link(irn, NULL);
 899
 900         if (is_Proj(irn)) {
 901                 ir_node *pred = get_Proj_pred(irn);
 902                 set_irn_n(irn, -1, get_irn_n(pred, -1));
 903         }
 904 }
 905
 906 /* Performs Operator Strength Reduction for the passed graph. */
 907 void opt_osr(ir_graph *irg, unsigned flags) {
 908         iv_env env;
 909
 910         if (! get_opt_strength_red())
 911                 return;
 912
 913         FIRM_DBG_REGISTER(dbg, "firm.opt.osr");
 914 //      firm_dbg_set_mask(dbg, SET_LEVEL_3);
 915
 916         DB((dbg, LEVEL_1, "Doing Operator Strength Reduction for %+F\n", irg));
 917
 918         obstack_init(&env.obst);
 919         env.stack         = NEW_ARR_F(ir_node *, 128);
 920         env.tos           = 0;
 921         env.nextDFSnum    = 0;
 922         env.POnum         = 0;
 923         env.quad_map      = new_set(quad_cmp, 64);
 924         env.lftr_edges    = new_set(LFTR_cmp, 64);
 925         env.replaced      = 0;
 926         env.lftr_replaced = 0;
 927         env.flags         = flags;
 928
 929         /* Clear all links and move Proj nodes into the
 930            the same block as it's predecessors.
 931            This can improve the placement of new nodes.
 932          */
 933         irg_walk_graph(irg, NULL, clear_and_fix, NULL);
 934
 935         /* we need dominance */
 936         assure_doms(irg);
 937         assure_irg_outs(irg);
 938
 939         /* calculate the post order number for blocks. */
 940         irg_out_block_walk(get_irg_start_block(irg), NULL, assign_po, &env);
 941
 942         /* calculate the SCC's and drive OSR. */
 943         do_dfs(irg, &env);
 944
 945         if (env.replaced) {
 946                 /* try linear function test replacements */
 947                 lftr(irg, &env);
 948
 949                 set_irg_outs_inconsistent(irg);
 950                 set_irg_loopinfo_inconsistent(irg);
 951
 952                 DB((dbg, LEVEL_1, "Replacements: %u + %u (lftr)\n\n", env.replaced, env.lftr_replaced));
 953         }
 954
 955         del_set(env.lftr_edges);
 956         del_set(env.quad_map);
 957         DEL_ARR_F(env.stack);
 958         obstack_free(&env.obst, NULL);
 959 }