nsz Git - libfirm/blob - ir/opt/opt_osr.c

   1 /**
   2  * Project:     libFIRM
   3  * File name:   ir/opt/opt_osr.
   4  * Purpose:     Operator Strength Reduction,
   5  *              Keith D. Cooper, L. Taylor Simpson, Christopher A. Vick
   6  * Author:      Michael Beck
   7  * Modified by:
   8  * Created:     12.5.2006
   9  * CVS-ID:      $Id$
  10  * Copyright:   (c) 2006 Universität Karlsruhe
  11  * Licence:     This file protected by GPL -  GNU GENERAL PUBLIC LICENSE.
  12  */
  13 #ifdef HAVE_CONFIG_H
  14 #include "config.h"
  15 #endif
  16
  17 #ifdef HAVE_MALLOC_H
  18 #include <malloc.h>
  19 #endif
  20 #ifdef HAVE_ALLOCA_H
  21 #include <alloca.h>
  22 #endif
  23
  24 #include "opt_osr.h"
  25 #include "irgraph.h"
  26 #include "ircons.h"
  27 #include "irop_t.h"
  28 #include "irloop.h"
  29 #include "irdom.h"
  30 #include "irgmod.h"
  31 #include "irflag_t.h"
  32 #include "irgwalk.h"
  33 #include "irouts.h"
  34 #include "debug.h"
  35 #include "obst.h"
  36 #include "set.h"
  37 #include "tv.h"
  38 #include "hashptr.h"
  39 #include "irtools.h"
  40 #include "array.h"
  41
  42 /** The debug handle. */
  43 DEBUG_ONLY(static firm_dbg_module_t *dbg;)
  44
  45 /** A scc. */
  46 typedef struct scc {
  47         ir_node *head;          /**< the head of the list */
  48 } scc;
  49
  50 /** A node entry */
  51 typedef struct node_entry {
  52         unsigned DFSnum;    /**< the DFS number of this node */
  53         unsigned low;       /**< the low number of this node */
  54         ir_node  *header;   /**< the header of this node */
  55         int      in_stack;  /**< flag, set if the node is on the stack */
  56         ir_node  *next;     /**< link to the next node the the same scc */
  57         scc      *pscc;     /**< the scc of this node */
  58         unsigned POnum;     /**< the post order number for blocks */
  59 } node_entry;
  60
  61 /** The environment. */
  62 typedef struct iv_env {
  63         struct obstack obst;    /**< an obstack for allocations */
  64         ir_node  **stack;       /**< the node stack */
  65         int      tos;           /**< tos index */
  66         unsigned nextDFSnum;    /**< the current DFS number */
  67         unsigned POnum;         /**< current post order number */
  68         set      *quad_map;     /**< a map from (op, iv, rc) to node */
  69         set      *lftr_edges;   /**< the set of lftr edges */
  70         unsigned replaced;      /**< number of replaced ops */
  71         unsigned lftr_replaced; /**< number of applied linear function test replacements */
  72         unsigned flags;         /**< additional flags */
  73 } iv_env;
  74
  75 /**
  76  * An entry in the (op, node, node) -> node map.
  77  */
  78 typedef struct quad_t {
  79         opcode  code;  /**< the opcode of the reduced operation */
  80         ir_node *op1;  /**< the first operand the reduced operation */
  81         ir_node *op2;  /**< the second operand of the reduced operation */
  82
  83         ir_node *res; /**< the reduced operation */
  84 } quad_t;
  85
  86 /**
  87  * A LFTR edge.
  88  */
  89 typedef struct LFTR_edge {
  90         ir_node *src;   /**< the source node */
  91         ir_node *dst;   /**< the destination node */
  92         opcode  code;   /**< the opcode that must be applied */
  93         ir_node *rc;    /**< the region const that must be applied */
  94 } LFTR_edge;
  95
  96 /* forward */
  97 static ir_node *reduce(ir_node *orig, ir_node *iv, ir_node *rc, iv_env *env);
  98
  99 /**
 100  * Compare two LFTR edges.
 101  */
 102 static int LFTR_cmp(const void *e1, const void *e2, size_t size) {
 103         const LFTR_edge *l1 = e1;
 104         const LFTR_edge *l2 = e2;
 105
 106         return l1->src != l2->src;
 107 }
 108
 109 /**
 110  * Find a LFTR edge.
 111  */
 112 static LFTR_edge *LFTR_find(ir_node *src, iv_env *env) {
 113         LFTR_edge key;
 114
 115         key.src  = src;
 116
 117         return set_find(env->lftr_edges, &key, sizeof(key), HASH_PTR(src));
 118 }
 119
 120 /**
 121  * Add a LFTR edge.
 122  */
 123 static void LFTR_add(ir_node *src, ir_node *dst, opcode code, ir_node *rc, iv_env *env) {
 124         LFTR_edge key;
 125
 126         key.src  = src;
 127         key.dst  = dst;
 128         key.code = code;
 129         key.rc   = rc;
 130
 131         /*
 132          * There might be more than one edge here. This is rather bad
 133          * because we currently store only one.
 134          */
 135 //      assert(LFTR_find(src, env) == NULL);
 136         set_insert(env->lftr_edges, &key, sizeof(key), HASH_PTR(src));
 137 }
 138
 139 /**
 140  * Gets the node_entry of a node
 141  */
 142 static node_entry *get_irn_ne(ir_node *irn, iv_env *env) {
 143         node_entry *e = get_irn_link(irn);
 144
 145         if (! e) {
 146                 e = obstack_alloc(&env->obst, sizeof(*e));
 147                 memset(e, 0, sizeof(*e));
 148                 set_irn_link(irn, e);
 149         }
 150         return e;
 151 }
 152
 153 /**
 154  * Check if irn is an IV.
 155  *
 156  * @param irn  the node to check
 157  * @param env  the environment
 158  *
 159  * @returns the header if it is one, NULL else
 160  */
 161 static ir_node *is_iv(ir_node *irn, iv_env *env) {
 162         return get_irn_ne(irn, env)->header;
 163 }
 164
 165 /**
 166  * Check if irn is a region constant.
 167  * The block or irn must strictly dominate the header block.
 168  *
 169  * @param irn           the node to check
 170  * @param header_block  the header block of the induction variable
 171  */
 172 static int is_rc(ir_node *irn, ir_node *header_block) {
 173         ir_node *block = get_nodes_block(irn);
 174
 175         return (block != header_block) && block_dominates(block, header_block);
 176 }
 177
 178 /**
 179  * Set compare function for the quad set.
 180  */
 181 static int quad_cmp(const void *e1, const void *e2, size_t size) {
 182         const quad_t *c1 = e1;
 183         const quad_t *c2 = e2;
 184
 185         return c1->code != c2->code || c1->op1 != c2->op1 || c1->op2 != c2->op2;
 186 }
 187
 188 /**
 189  * Check if an reduced operation was already calculated.
 190  *
 191  * @param code  the opcode of the operation
 192  * @param op1   the first operand of the operation
 193  * @param op2   the second operand of the operation
 194  * @param env   the environment
 195  *
 196  * @return the already reduced node or NULL if this operation is not yet reduced
 197  */
 198 static ir_node *search(opcode code, ir_node *op1, ir_node *op2, iv_env *env) {
 199         quad_t key, *entry;
 200
 201         key.code = code;
 202         key.op1 = op1;
 203         key.op2 = op2;
 204
 205         entry = set_find(env->quad_map, &key, sizeof(key),
 206                          (code * 9) ^ HASH_PTR(op1) ^HASH_PTR(op2));
 207         if (entry)
 208                 return entry->res;
 209         return NULL;
 210 }
 211
 212 /**
 213  * Add an reduced operation.
 214  *
 215  * @param code    the opcode of the operation
 216  * @param op1     the first operand of the operation
 217  * @param op2     the second operand of the operation
 218  * @param result  the result of the reduced operation
 219  * @param env     the environment
 220  */
 221 static void add(opcode code, ir_node *op1, ir_node *op2, ir_node *result, iv_env *env) {
 222         quad_t key;
 223
 224         key.code = code;
 225         key.op1  = op1;
 226         key.op2  = op2;
 227         key.res  = result;
 228
 229         set_insert(env->quad_map, &key, sizeof(key),
 230                    (code * 9) ^ HASH_PTR(op1) ^HASH_PTR(op2));
 231 }
 232
 233 /**
 234  * Find a location where to place a bin-op whose operands are in
 235  * block1 and block2.
 236  *
 237  * @param block1  the block of the first operand
 238  * @param block2  the block of the second operand
 239  *
 240  * Note that we know here that such a place must exists. Moreover, this means
 241  * that either block1 dominates block2 or vice versa. So, just return
 242  * the "smaller" one.
 243  */
 244 static ir_node *find_location(ir_node *block1, ir_node *block2) {
 245         if (block_dominates(block1, block2))
 246                 return block2;
 247         assert(block_dominates(block2, block1));
 248         return block1;
 249 }
 250
 251 /**
 252  * Create a node that executes an op1 code op1 operation.
 253  *
 254  * @param code   the opcode to execute
 255  * @param db     debug info to add to the new node
 256  * @param op1    the first operand
 257  * @param op2    the second operand
 258  * @param mode   the mode of the new operation
 259  *
 260  * @return the newly created node
 261  */
 262 static ir_node *do_apply(opcode code, dbg_info *db, ir_node *op1, ir_node *op2, ir_mode *mode) {
 263         ir_graph *irg = current_ir_graph;
 264         ir_node *result;
 265         ir_node *block = find_location(get_nodes_block(op1), get_nodes_block(op2));
 266
 267         switch (code) {
 268         case iro_Mul:
 269                 result = new_rd_Mul(db, irg, block, op1, op2, mode);
 270                 break;
 271         case iro_Add:
 272                 result = new_rd_Add(db, irg, block, op1, op2, mode);
 273                 break;
 274         case iro_Sub:
 275                 result = new_rd_Sub(db, irg, block, op1, op2, mode);
 276                 break;
 277         default:
 278                 assert(0);
 279                 result = NULL;
 280         }
 281         return result;
 282 }
 283
 284 /**
 285  * The Apply operation.
 286  *
 287  * @param orig   the node that represent the original operation and determines
 288  *               the opcode, debug-info and mode of a newly created one
 289  * @param op1    the first operand
 290  * @param op2    the second operand
 291  * @param env     the environment
 292  *
 293  * @return the newly created node
 294  */
 295 static ir_node *apply(ir_node *orig, ir_node *op1, ir_node *op2, iv_env *env) {
 296         opcode code = get_irn_opcode(orig);
 297         ir_node *result = search(code, op1, op2, env);
 298
 299         if (! result) {
 300                 dbg_info *db = get_irn_dbg_info(orig);
 301                 ir_node *op1_header = get_irn_ne(op1, env)->header;
 302                 ir_node *op2_header = get_irn_ne(op2, env)->header;
 303
 304                 if (op1_header != NULL && is_rc(op2, op1_header)) {
 305                         result = reduce(orig, op1, op2, env);
 306                 }
 307                 else if (op2_header != NULL && is_rc(op1, op2_header)) {
 308                         result = reduce(orig, op2, op1, env);
 309                 }
 310                 else {
 311                         result = do_apply(code, db, op1, op2, get_irn_mode(orig));
 312                         get_irn_ne(result, env)->header = NULL;
 313                 }
 314         }
 315         return result;
 316 }
 317
 318 /**
 319  * The Reduce operation.
 320  *
 321  * @param orig   the node that represent the original operation and determines
 322  *               the opcode, debug-info and mode of a newly created one
 323  * @param iv     the induction variable
 324  * @param rc     the region constant
 325  * @param env    the environment
 326  *
 327  * @return the reduced node
 328  */
 329 static ir_node *reduce(ir_node *orig, ir_node *iv, ir_node *rc, iv_env *env) {
 330         opcode code = get_irn_opcode(orig);
 331         ir_node *result = search(code, iv, rc, env);
 332
 333         if (! result) {
 334                 node_entry *e, *iv_e;
 335                 int i, n;
 336                 ir_mode *mode = get_irn_mode(orig);
 337
 338                 result = exact_copy(iv);
 339                 if (mode_is_reference(mode)) {
 340                         /* bad case: we replace a reference mode calculation.
 341                            assure that the new IV will be a reference one */
 342                         set_irn_mode(result, mode);
 343                 }
 344                 add(code, iv, rc, result, env);
 345                 DB((dbg, LEVEL_3, "   Created new %+F for %+F (%s %+F)\n", result, iv,
 346                         get_irn_opname(orig), rc));
 347
 348                 iv_e = get_irn_ne(iv, env);
 349                 e    = get_irn_ne(result, env);
 350                 e->header = iv_e->header;
 351
 352                 /* create the LFTR edge */
 353                 LFTR_add(iv, result, code, rc, env);
 354
 355                 n = get_irn_arity(result);
 356                 for (i = 0; i < n; ++i) {
 357                         ir_node *o = get_irn_n(result, i);
 358
 359                         e = get_irn_ne(o, env);
 360                         if (e->header == iv_e->header)
 361                                 o = reduce(orig, o, rc, env);
 362                         else if (is_Phi(result))
 363                                 o = apply(orig, o, rc, env);
 364                         else {
 365                                 switch (code) {
 366                                 case iro_Mul:
 367                                         o = apply(orig, o, rc, env);
 368                                         break;
 369                                 }
 370                         }
 371                         set_irn_n(result, i, o);
 372                 }
 373         }
 374         else {
 375                 DB((dbg, LEVEL_3, "   Already Created %+F for %+F (%s %+F)\n", result, iv,
 376                         get_irn_opname(orig), rc));
 377         }
 378         return result;
 379 }
 380
 381 /**
 382  * The Replace operation.
 383  *
 384  * @param irn   the node that will be replaced
 385  * @param iv    the induction variable
 386  * @param rc    the region constant
 387  * @param env   the environment
 388  */
 389 static void replace(ir_node *irn, ir_node *iv, ir_node *rc, iv_env *env) {
 390         ir_node *result;
 391
 392         DB((dbg, LEVEL_2, "  Replacing %+F\n", irn));
 393
 394         result = reduce(irn, iv, rc, env);
 395         if (result != irn) {
 396                 node_entry *e, *iv_e;
 397
 398                 exchange(irn, result);
 399                 e = get_irn_ne(result, env);
 400                 iv_e = get_irn_ne(iv, env);
 401                 e->header = iv_e->header;
 402         }
 403 }
 404
 405 /**
 406  * Check if a node can be replaced (+, -, *).
 407  *
 408  * @param irn   the node to check
 409  * @param env   the environment
 410  *
 411  * @return non-zero if irn should be Replace'd
 412  */
 413 static int check_replace(ir_node *irn, iv_env *env) {
 414         ir_node *left, *right, *iv, *rc;
 415         ir_op   *op  = get_irn_op(irn);
 416         opcode  code = get_op_code(op);
 417         ir_node *liv, *riv;
 418
 419         switch (code) {
 420         case iro_Mul:
 421         case iro_Add:
 422         case iro_Sub:
 423                 iv = rc = NULL;
 424
 425                 left  = get_binop_left(irn);
 426                 right = get_binop_right(irn);
 427
 428                 liv = is_iv(left, env);
 429                 riv = is_iv(right, env);
 430                 if (liv && is_rc(right, liv)) {
 431                         iv = left; rc = right;
 432                 }
 433                 else if (riv && is_op_commutative(op) &&
 434                                     is_rc(left, riv)) {
 435                         iv = right; rc = left;
 436                 }
 437
 438                 if (iv) {
 439                         replace(irn, iv, rc, env);
 440                         ++env->replaced;
 441                         return 1;
 442                 }
 443                 break;
 444         }
 445         return 0;
 446 }
 447
 448 /**
 449  * Check which SCC's are induction variables.
 450  *
 451  * @param pscc  a SCC
 452  * @param env   the environment
 453  */
 454 static void classify_iv(scc *pscc, iv_env *env) {
 455         ir_node *irn, *next, *header = NULL;
 456         node_entry *h, *b;
 457         int j;
 458
 459         /* find the header block for this scc */
 460         for (irn = pscc->head; irn; irn = next) {
 461                 node_entry *e = get_irn_link(irn);
 462                 ir_node *block = get_nodes_block(irn);
 463
 464                 next = e->next;
 465                 b = get_irn_ne(block, env);
 466
 467                 if (header) {
 468                         if (h->POnum < b->POnum) {
 469                                 header = block;
 470                                 h      = b;
 471                         }
 472                 }
 473                 else {
 474                         header = block;
 475                         h      = b;
 476                 }
 477         }
 478
 479         /* check if this scc contains only Phi, Add or Sub nodes */
 480         for (irn = pscc->head; irn; irn = next) {
 481                 node_entry *e = get_irn_ne(irn, env);
 482
 483                 next = e->next;
 484                 switch (get_irn_opcode(irn)) {
 485                 case iro_Add:
 486                 case iro_Sub:
 487                 case iro_Phi:
 488                         for (j = get_irn_arity(irn) - 1; j >= 0; --j) {
 489                                 ir_node *pred  = get_irn_n(irn, j);
 490                                 node_entry *pe = get_irn_ne(pred, env);
 491
 492                                 if (pe->pscc != e->pscc) {
 493                                         /* not in the same SCC, must be a region const */
 494                                         if (! is_rc(pred, header)) {
 495                                                 /* not an induction variable */
 496                                                 goto fail;
 497                                         }
 498                                 }
 499                         }
 500                         break;
 501                 default:
 502                         /* not an induction variable */
 503                         goto fail;
 504                 }
 505         }
 506         /* found an induction variable */
 507         DB((dbg, LEVEL_2, "  Found an induction variable:\n  "));
 508
 509         /* set the header for every node in this scc */
 510         for (irn = pscc->head; irn; irn = next) {
 511                 node_entry *e = get_irn_ne(irn, env);
 512                 e->header = header;
 513                 next = e->next;
 514                 DB((dbg, LEVEL_2, " %+F,", irn));
 515         }
 516         DB((dbg, LEVEL_2, "\n"));
 517         return;
 518
 519 fail:
 520         for (irn = pscc->head; irn; irn = next) {
 521                 node_entry *e = get_irn_ne(irn, env);
 522
 523                 next = e->next;
 524                 if (! check_replace(irn, env))
 525                         e->header = NULL;
 526         }
 527 }
 528
 529 /**
 530  * Process a SCC.
 531  *
 532  * @param pscc  the SCC
 533  * @param env   the environment
 534  */
 535 static void process_scc(scc *pscc, iv_env *env) {
 536         ir_node *head = pscc->head;
 537         node_entry *e = get_irn_link(head);
 538
 539 #ifdef DEBUG_libfirm
 540         {
 541                 ir_node *irn, *next;
 542
 543                 DB((dbg, LEVEL_4, " SCC at %p:\n ", pscc));
 544                 for (irn = pscc->head; irn; irn = next) {
 545                         node_entry *e = get_irn_link(irn);
 546
 547                         next = e->next;
 548
 549                         DB((dbg, LEVEL_4, " %+F,", irn));
 550                 }
 551                 DB((dbg, LEVEL_4, "\n"));
 552         }
 553 #endif
 554
 555         if (e->next == NULL) {
 556                 /* this SCC has only a single member */
 557                 check_replace(head, env);
 558         }
 559         else {
 560                 classify_iv(pscc, env);
 561         }
 562 }
 563
 564 /**
 565  * Push a node onto the stack.
 566  *
 567  * @param env   the environment
 568  * @param n     the node to push
 569  */
 570 static void push(iv_env *env, ir_node *n) {
 571         node_entry *e;
 572
 573         if (env->tos == ARR_LEN(env->stack)) {
 574                 int nlen = ARR_LEN(env->stack) * 2;
 575                 ARR_RESIZE(ir_node *, env->stack, nlen);
 576         }
 577         env->stack[env->tos++] = n;
 578         e = get_irn_ne(n, env);
 579         e->in_stack = 1;
 580 }
 581
 582 /**
 583  * pop a node from the stack
 584  *
 585  * @param env   the environment
 586  *
 587  * @return  The topmost node
 588  */
 589 static ir_node *pop(iv_env *env)
 590 {
 591   ir_node *n = env->stack[--env->tos];
 592   node_entry *e = get_irn_ne(n, env);
 593
 594   e->in_stack = 0;
 595   return n;
 596 }
 597
 598 /**
 599  * Do Tarjan's SCC algorithm and drive OSR.
 600  *
 601  * @param irn  start at this node
 602  * @param env  the environment
 603  */
 604 static void dfs(ir_node *irn, iv_env *env)
 605 {
 606         int i, n;
 607         node_entry *node = get_irn_ne(irn, env);
 608
 609         mark_irn_visited(irn);
 610
 611         /* do not put blocks into the scc */
 612         if (is_Block(irn)) {
 613                 n = get_irn_arity(irn);
 614                 for (i = 0; i < n; ++i) {
 615                         ir_node *pred = get_irn_n(irn, i);
 616
 617                         if (irn_not_visited(pred))
 618                                 dfs(pred, env);
 619                 }
 620         }
 621         else {
 622                 ir_node *block = get_nodes_block(irn);
 623
 624                 node->DFSnum = env->nextDFSnum++;
 625                 node->low    = node->DFSnum;
 626                 push(env, irn);
 627
 628                 /* handle the block */
 629                 if (irn_not_visited(block))
 630                         dfs(block, env);
 631
 632                 n = get_irn_arity(irn);
 633                 for (i = 0; i < n; ++i) {
 634                         ir_node *pred = get_irn_n(irn, i);
 635                         node_entry *o = get_irn_ne(pred, env);
 636
 637                         if (irn_not_visited(pred)) {
 638                                 dfs(pred, env);
 639                                 node->low = MIN(node->low, o->low);
 640                         }
 641                         if (o->DFSnum < node->DFSnum && o->in_stack)
 642                                 node->low = MIN(o->DFSnum, node->low);
 643                 }
 644                 if (node->low == node->DFSnum) {
 645                         scc *pscc = obstack_alloc(&env->obst, sizeof(*pscc));
 646                         ir_node *x;
 647
 648                         pscc->head = NULL;
 649                         do {
 650                                 node_entry *e;
 651
 652                                 x = pop(env);
 653                                 e = get_irn_ne(x, env);
 654                                 e->pscc    = pscc;
 655                                 e->next    = pscc->head;
 656                                 pscc->head = x;
 657                         } while (x != irn);
 658
 659                         process_scc(pscc, env);
 660                 }
 661         }
 662 }
 663
 664 /**
 665  * Do the DFS by starting at the End node of a graph.
 666  *
 667  * @param irg  the graph to process
 668  * @param env  the environment
 669  */
 670 static void do_dfs(ir_graph *irg, iv_env *env) {
 671         ir_graph *rem = current_ir_graph;
 672         ir_node *end = get_irg_end(irg);
 673         int i, n;
 674
 675         current_ir_graph = irg;
 676         inc_irg_visited(irg);
 677
 678         /* visit all visible nodes */
 679         dfs(end, env);
 680
 681         /* visit the keep-alives */
 682         n = get_End_n_keepalives(end);
 683         for (i = 0; i < n; ++i) {
 684                 ir_node *ka = get_End_keepalive(end, i);
 685
 686                 if (irn_not_visited(ka))
 687                         dfs(ka, env);
 688         }
 689
 690         current_ir_graph = rem;
 691 }
 692
 693 /**
 694  * Post-block-walker: assign the post-order number.
 695  */
 696 static void assign_po(ir_node *block, void *ctx) {
 697         iv_env *env = ctx;
 698         node_entry *e = get_irn_ne(block, env);
 699
 700         e->POnum = env->POnum++;
 701 }
 702
 703 /**
 704  * Follows the LFTR edges and return the last node in the chain.
 705  *
 706  * @param irn  the node that should be followed
 707  * @param env  the IV environment
 708  *
 709  * @note
 710  * In the current implementation only the last edge is stored, so
 711  * only one chain exists. That's why we might miss some opportunities.
 712  */
 713 static ir_node *followEdges(ir_node *irn, iv_env *env) {
 714         for (;;) {
 715                 LFTR_edge *e = LFTR_find(irn, env);
 716                 if (e)
 717                         irn = e->dst;
 718                 else
 719                         return irn;
 720         }
 721 }
 722
 723 /**
 724  * Apply one LFTR edge operation.
 725  * Return NULL if the transformation cannot be done safely without
 726  * an Overflow.
 727  *
 728  * @param rc   the IV node that should be translated
 729  * @param e    the LFTR edge
 730  * @param env  the IV environment
 731  *
 732  * @return the translated region constant or NULL
 733  *         if the translation was not possible
 734  *
 735  * @note
 736  * In the current implementation only the last edge is stored, so
 737  * only one chain exists. That's why we might miss some opportunities.
 738  */
 739 static ir_node *applyOneEdge(ir_node *rc, LFTR_edge *e, iv_env *env) {
 740         if (env->flags & osr_flag_lftr_with_ov_check) {
 741                 tarval *tv_l, *tv_r, *tv;
 742                 tarval_int_overflow_mode_t ovmode;
 743
 744                 /* overflow can only be decided for Consts */
 745                 if (! is_Const(e->rc)) {
 746                         DB((dbg, LEVEL_4, " = UNKNOWN (%+F)", e->rc));
 747                         return NULL;
 748                 }
 749
 750                 tv_l = get_Const_tarval(rc);
 751                 tv_r = get_Const_tarval(e->rc);
 752
 753                 ovmode = tarval_get_integer_overflow_mode();
 754                 tarval_set_integer_overflow_mode(TV_OVERFLOW_BAD);
 755
 756                 switch (e->code) {
 757                 case iro_Mul:
 758                         tv = tarval_mul(tv_l, tv_r);
 759                         DB((dbg, LEVEL_4, " * %+F", tv_r));
 760                         break;
 761                 case iro_Add:
 762                         tv = tarval_add(tv_l, tv_r);
 763                         DB((dbg, LEVEL_4, " + %+F", tv_r));
 764                         break;
 765                 case iro_Sub:
 766                         tv = tarval_sub(tv_l, tv_r);
 767                         DB((dbg, LEVEL_4, " - %+F", tv_r));
 768                         break;
 769                 default:
 770                         assert(0);
 771                         tv = tarval_bad;
 772                 }
 773                 tarval_set_integer_overflow_mode(ovmode);
 774
 775                 if (tv == tarval_bad) {
 776                         DB((dbg, LEVEL_4, " = OVERFLOW"));
 777                         return NULL;
 778                 }
 779                 return new_r_Const(current_ir_graph, get_irn_n(rc, -1), get_tarval_mode(tv), tv);
 780         }
 781         return do_apply(e->code, NULL, rc, e->rc, get_irn_mode(rc));
 782 }
 783
 784 /**
 785  * Applies the operations represented by the LFTR edges to a
 786  * region constant and returns the value.
 787  * Return NULL if the transformation cannot be done safely without
 788  * an Overflow.
 789  *
 790  * @param iv   the IV node that starts the LFTR edge chain
 791  * @param rc   the region constant that should be translated
 792  * @param env  the IV environment
 793  *
 794  * @return the translated region constant or NULL
 795  *         if the translation was not possible
 796  */
 797 static ir_node *applyEdges(ir_node *iv, ir_node *rc, iv_env *env) {
 798         ir_node *irn = iv;
 799
 800         if (env->flags & osr_flag_lftr_with_ov_check) {
 801                 /* overflow can only be decided for Consts */
 802                 if (! is_Const(rc)) {
 803                         DB((dbg, LEVEL_4, " = UNKNOWN (%+F)\n", rc));
 804                         return NULL;
 805                 }
 806                 DB((dbg, LEVEL_4, "%+F", get_Const_tarval(rc)));
 807         }
 808
 809         for (irn = iv; rc;) {
 810                 LFTR_edge *e = LFTR_find(irn, env);
 811                 if (e) {
 812                         rc = applyOneEdge(rc, e, env);
 813                         irn = e->dst;
 814                 }
 815                 else
 816                         break;
 817         }
 818         DB((dbg, LEVEL_3, "\n"));
 819         return rc;
 820 }
 821
 822 /**
 823  * Walker, finds Cmp(iv, rc) or Cmp(rc, iv)
 824  * and tries to optimize them.
 825  */
 826 static void do_lftr(ir_node *cmp, void *ctx) {
 827         iv_env *env = ctx;
 828         ir_node *left, *right, *liv, *riv;
 829         ir_node *iv, *rc;
 830         ir_node *nleft = NULL, *nright = NULL;
 831
 832         if (get_irn_op(cmp) != op_Cmp)
 833                 return;
 834
 835         left  = get_Cmp_left(cmp);
 836         right = get_Cmp_right(cmp);
 837
 838         liv = is_iv(left, env);
 839         riv = is_iv(right, env);
 840         if (liv && is_rc(right, liv)) {
 841                 iv = left; rc = right;
 842
 843                 nright = applyEdges(iv, rc, env);
 844                 if (nright) {
 845                         nleft = followEdges(iv, env);
 846                 }
 847         }
 848         else if (riv && is_rc(left, riv)) {
 849                 iv = right; rc = left;
 850
 851                 nleft = applyEdges(iv, rc, env);
 852                 if (nleft) {
 853                         nright = followEdges(iv, env);
 854                 }
 855         }
 856
 857         if (nleft && nright) {
 858                 DB((dbg, LEVEL_2, "  LFTR for %+F\n", cmp));
 859                 set_Cmp_left(cmp, nleft);
 860                 set_Cmp_right(cmp, nright);
 861                 ++env->lftr_replaced;
 862         }
 863 }
 864
 865 /**
 866  * do linear function test replacement.
 867  *
 868  * @param irg   the graph that should be optimized
 869  * @param env   the IV environment
 870  */
 871 static void lftr(ir_graph *irg, iv_env *env) {
 872         irg_walk_graph(irg, NULL, do_lftr, env);
 873 }
 874
 875 /**
 876  * Pre-walker: set all node links to NULL and fix the
 877  * block of Proj nodes.
 878  */
 879 static void clear_and_fix(ir_node *irn, void *env)
 880 {
 881         set_irn_link(irn, NULL);
 882
 883         if (is_Proj(irn)) {
 884                 ir_node *pred = get_Proj_pred(irn);
 885                 set_irn_n(irn, -1, get_irn_n(pred, -1));
 886         }
 887 }
 888
 889 /* Performs Operator Strength Reduction for the passed graph. */
 890 void opt_osr(ir_graph *irg, unsigned flags) {
 891         iv_env env;
 892
 893         if (! get_opt_strength_red())
 894                 return;
 895
 896         FIRM_DBG_REGISTER(dbg, "firm.opt.osr");
 897 //      firm_dbg_set_mask(dbg, SET_LEVEL_3);
 898
 899         DB((dbg, LEVEL_1, "Doing Operator Strength Reduction for %+F\n", irg));
 900
 901         obstack_init(&env.obst);
 902         env.stack         = NEW_ARR_F(ir_node *, 128);
 903         env.tos           = 0;
 904         env.nextDFSnum    = 0;
 905         env.POnum         = 0;
 906         env.quad_map      = new_set(quad_cmp, 64);
 907         env.lftr_edges    = new_set(LFTR_cmp, 64);
 908         env.replaced      = 0;
 909         env.lftr_replaced = 0;
 910         env.flags         = flags;
 911
 912         /* Clear all links and move Proj nodes into the
 913            the same block as it's predecessors.
 914            This can improve the placement of new nodes.
 915          */
 916         irg_walk_graph(irg, NULL, clear_and_fix, NULL);
 917
 918         /* we need dominance */
 919         assure_doms(irg);
 920         assure_irg_outs(irg);
 921
 922         /* calculate the post order number for blocks. */
 923         irg_out_block_walk(get_irg_start_block(irg), NULL, assign_po, &env);
 924
 925         /* calculate the SCC's and drive OSR. */
 926         do_dfs(irg, &env);
 927
 928         if (env.replaced) {
 929                 /* try linear function test replacements */
 930                 lftr(irg, &env);
 931
 932                 set_irg_outs_inconsistent(irg);
 933                 set_irg_loopinfo_inconsistent(irg);
 934
 935                 DB((dbg, LEVEL_1, "Replacements: %u + %u (lftr)\n\n", env.replaced, env.lftr_replaced));
 936         }
 937
 938         del_set(env.lftr_edges);
 939         del_set(env.quad_map);
 940         DEL_ARR_F(env.stack);
 941         obstack_free(&env.obst, NULL);
 942 }