nsz Git - libfirm/blob - ir/opt/opt_osr.c

   1 /*
   2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
   3  *
   4  * This file is part of libFirm.
   5  *
   6  * This file may be distributed and/or modified under the terms of the
   7  * GNU General Public License version 2 as published by the Free Software
   8  * Foundation and appearing in the file LICENSE.GPL included in the
   9  * packaging of this file.
  10  *
  11  * Licensees holding valid libFirm Professional Edition licenses may use
  12  * this file in accordance with the libFirm Commercial License.
  13  * Agreement provided with the Software.
  14  *
  15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
  16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  17  * PURPOSE.
  18  */
  19
  20 /**
  21  * @file
  22  * @brief   Operator Strength Reduction.
  23  * @date    12.5.2006
  24  * @author  Michael Beck
  25  * @version $Id$
  26  * @summary
  27  *  Implementation of the Operator Strength Reduction algorithm
  28  *  by Keith D. Cooper, L. Taylor Simpson, Christopher A. Vick.
  29  *  Extended version.
  30  */
  31 #include "config.h"
  32
  33 #include "adt/pdeq.h"
  34 #include "iroptimize.h"
  35 #include "irgraph.h"
  36 #include "ircons.h"
  37 #include "irop_t.h"
  38 #include "irdom.h"
  39 #include "irgmod.h"
  40 #include "irflag_t.h"
  41 #include "irgwalk.h"
  42 #include "irouts.h"
  43 #include "iredges.h"
  44 #include "debug.h"
  45 #include "obst.h"
  46 #include "set.h"
  47 #include "tv.h"
  48 #include "hashptr.h"
  49 #include "irtools.h"
  50 #include "irloop_t.h"
  51 #include "array.h"
  52 #include "firmstat.h"
  53 #include "error.h"
  54 #include "irpass_t.h"
  55
  56 /** The debug handle. */
  57 DEBUG_ONLY(static firm_dbg_module_t *dbg;)
  58
  59 /** A scc. */
  60 typedef struct scc {
  61         ir_node  *head;         /**< the head of the list */
  62         tarval   *init;     /**< the init value iff only one exists. */
  63         tarval   *incr;     /**< the induction variable increment if only a single const exists. */
  64         unsigned code;      /**< == iro_Add if +incr, iro_Sub if -incr, 0 if not analysed, iro_Bad else */
  65 } scc;
  66
  67 /** A node entry */
  68 typedef struct node_entry {
  69         unsigned DFSnum;    /**< the DFS number of this node */
  70         unsigned low;       /**< the low number of this node */
  71         ir_node  *header;   /**< the header of this node */
  72         int      in_stack;  /**< flag, set if the node is on the stack */
  73         ir_node  *next;     /**< link to the next node the the same scc */
  74         scc      *pscc;     /**< the scc of this node */
  75         unsigned POnum;     /**< the post order number for blocks */
  76 } node_entry;
  77
  78 /** The environment. */
  79 typedef struct iv_env {
  80         struct obstack obst;    /**< an obstack for allocations */
  81         ir_node  **stack;       /**< the node stack */
  82         int      tos;           /**< tos index */
  83         unsigned nextDFSnum;    /**< the current DFS number */
  84         unsigned POnum;         /**< current post order number */
  85         set      *quad_map;     /**< a map from (op, iv, rc) to node */
  86         set      *lftr_edges;   /**< the set of lftr edges */
  87         unsigned replaced;      /**< number of replaced ops */
  88         unsigned lftr_replaced; /**< number of applied linear function test replacements */
  89         unsigned osr_flags;     /**< additional flags steering the transformation */
  90         unsigned need_postpass; /**< set, if a post pass is needed to fix Add and Sub nodes */
  91         /** Function called to process a SCC. */
  92         void (*process_scc)(scc *pscc, struct iv_env *env);
  93 } iv_env;
  94
  95 /**
  96  * An entry in the (op, node, node) -> node map.
  97  */
  98 typedef struct quadruple_t {
  99         ir_opcode code;  /**< the opcode of the reduced operation */
 100         ir_node   *op1;  /**< the first operand the reduced operation */
 101         ir_node   *op2;  /**< the second operand of the reduced operation */
 102
 103         ir_node   *res; /**< the reduced operation */
 104 } quadruple_t;
 105
 106 /**
 107  * A LFTR edge.
 108  */
 109 typedef struct LFTR_edge {
 110         ir_node   *src;   /**< the source node */
 111         ir_node   *dst;   /**< the destination node */
 112         ir_opcode code;   /**< the opcode that must be applied */
 113         ir_node   *rc;    /**< the region const that must be applied */
 114 } LFTR_edge;
 115
 116 /* forward */
 117 static ir_node *reduce(ir_node *orig, ir_node *iv, ir_node *rc, iv_env *env);
 118
 119 /**
 120  * Compare two LFTR edges.
 121  */
 122 static int LFTR_cmp(const void *e1, const void *e2, size_t size) {
 123         const LFTR_edge *l1 = e1;
 124         const LFTR_edge *l2 = e2;
 125         (void) size;
 126
 127         return l1->src != l2->src;
 128 }  /* LFTR_cmp */
 129
 130 /**
 131  * Find a LFTR edge.
 132  *
 133  * @param src  the source node of the transition
 134  */
 135 static LFTR_edge *LFTR_find(ir_node *src, iv_env *env) {
 136         LFTR_edge key;
 137
 138         key.src  = src;
 139
 140         return set_find(env->lftr_edges, &key, sizeof(key), HASH_PTR(src));
 141 }  /* LFTR_find */
 142
 143 /**
 144  * Add a LFTR edge.
 145  *
 146  * @param src   the source node of the edge
 147  * @param dst   the destination node of the edge
 148  * @param code  the opcode of the transformed transition
 149  * @param rc    the region const used in the transition
 150  * @param env   the environment
 151  */
 152 static void LFTR_add(ir_node *src, ir_node *dst, ir_opcode code, ir_node *rc, iv_env *env) {
 153         LFTR_edge key;
 154
 155         key.src  = src;
 156         key.dst  = dst;
 157         key.code = code;
 158         key.rc   = rc;
 159
 160         /*
 161          * There might be more than one edge here. This is rather bad
 162          * because we currently store only one.
 163          */
 164 //      assert(LFTR_find(src, env) == NULL);
 165         set_insert(env->lftr_edges, &key, sizeof(key), HASH_PTR(src));
 166 }  /* LFTR_add */
 167
 168 /**
 169  * Gets the node_entry of a node.
 170  *
 171  * @param irn  the node
 172  * @param env  the environment
 173  */
 174 static node_entry *get_irn_ne(ir_node *irn, iv_env *env) {
 175         node_entry *e = get_irn_link(irn);
 176
 177         if (e == NULL) {
 178                 e = obstack_alloc(&env->obst, sizeof(*e));
 179                 memset(e, 0, sizeof(*e));
 180                 set_irn_link(irn, e);
 181         }
 182         return e;
 183 }  /* get_irn_ne */
 184
 185 /**
 186  * Gets the scc from an induction variable.
 187  *
 188  * @param iv   any node of the induction variable
 189  * @param env  the environment
 190  */
 191 static scc *get_iv_scc(ir_node *iv, iv_env *env) {
 192         node_entry *e = get_irn_ne(iv, env);
 193         return e->pscc;
 194 }  /* get_iv_scc */
 195
 196 /**
 197  * Check if irn is an IV.
 198  *
 199  * @param irn  the node to check
 200  * @param env  the environment
 201  *
 202  * @returns the header if it is one, NULL else
 203  */
 204 static ir_node *is_iv(ir_node *irn, iv_env *env) {
 205         return get_irn_ne(irn, env)->header;
 206 }  /* is_iv */
 207
 208 /**
 209  * Check if irn is a region constant.
 210  * The block or irn must strictly dominate the header block.
 211  *
 212  * @param irn           the node to check
 213  * @param header_block  the header block of the induction variable
 214  */
 215 static int is_rc(ir_node *irn, ir_node *header_block) {
 216         ir_node *block = get_nodes_block(irn);
 217
 218         return (block != header_block) && block_dominates(block, header_block);
 219 }  /* is_rc */
 220
 221 /**
 222  * Set compare function for the quad set.
 223  */
 224 static int quad_cmp(const void *e1, const void *e2, size_t size) {
 225         const quadruple_t *c1 = e1;
 226         const quadruple_t *c2 = e2;
 227         (void) size;
 228
 229         return c1->code != c2->code || c1->op1 != c2->op1 || c1->op2 != c2->op2;
 230 }  /* quad_cmp */
 231
 232 /**
 233  * Check if an reduced operation was already calculated.
 234  *
 235  * @param code  the opcode of the operation
 236  * @param op1   the first operand of the operation
 237  * @param op2   the second operand of the operation
 238  * @param env   the environment
 239  *
 240  * @return the already reduced node or NULL if this operation is not yet reduced
 241  */
 242 static ir_node *search(ir_opcode code, ir_node *op1, ir_node *op2, iv_env *env) {
 243         quadruple_t key, *entry;
 244
 245         key.code = code;
 246         key.op1 = op1;
 247         key.op2 = op2;
 248
 249         entry = set_find(env->quad_map, &key, sizeof(key),
 250                          (code * 9) ^ HASH_PTR(op1) ^HASH_PTR(op2));
 251         if (entry)
 252                 return entry->res;
 253         return NULL;
 254 }  /* search */
 255
 256 /**
 257  * Add an reduced operation.
 258  *
 259  * @param code    the opcode of the operation
 260  * @param op1     the first operand of the operation
 261  * @param op2     the second operand of the operation
 262  * @param result  the result of the reduced operation
 263  * @param env     the environment
 264  */
 265 static void add(ir_opcode code, ir_node *op1, ir_node *op2, ir_node *result, iv_env *env) {
 266         quadruple_t key;
 267
 268         key.code = code;
 269         key.op1  = op1;
 270         key.op2  = op2;
 271         key.res  = result;
 272
 273         set_insert(env->quad_map, &key, sizeof(key),
 274                    (code * 9) ^ HASH_PTR(op1) ^HASH_PTR(op2));
 275 }  /* add */
 276
 277 /**
 278  * Find a location where to place a bin-op whose operands are in
 279  * block1 and block2.
 280  *
 281  * @param block1  the block of the first operand
 282  * @param block2  the block of the second operand
 283  *
 284  * Note that we know here that such a place must exists. Moreover, this means
 285  * that either block1 dominates block2 or vice versa. So, just return
 286  * the "smaller" one.
 287  */
 288 static ir_node *find_location(ir_node *block1, ir_node *block2) {
 289         if (block_dominates(block1, block2))
 290                 return block2;
 291         assert(block_dominates(block2, block1));
 292         return block1;
 293 }  /* find_location */
 294
 295 /**
 296  * Create a node that executes an op1 code op1 operation.
 297  *
 298  * @param code   the opcode to execute
 299  * @param db     debug info to add to the new node
 300  * @param op1    the first operand
 301  * @param op2    the second operand
 302  * @param mode   the mode of the new operation
 303  *
 304  * @return the newly created node
 305  */
 306 static ir_node *do_apply(ir_opcode code, dbg_info *db, ir_node *op1, ir_node *op2, ir_mode *mode) {
 307         ir_node *result;
 308         ir_node *block = find_location(get_nodes_block(op1), get_nodes_block(op2));
 309
 310         switch (code) {
 311         case iro_Mul:
 312                 result = new_rd_Mul(db, block, op1, op2, mode);
 313                 break;
 314         case iro_Add:
 315                 result = new_rd_Add(db, block, op1, op2, mode);
 316                 break;
 317         case iro_Sub:
 318                 result = new_rd_Sub(db, block, op1, op2, mode);
 319                 break;
 320         default:
 321                 panic("Unsupported opcode");
 322                 result = NULL;
 323         }
 324         return result;
 325 }  /* do_apply */
 326
 327 /**
 328  * The Apply operation.
 329  *
 330  * @param orig   the node that represent the original operation and determines
 331  *               the opcode, debug-info and mode of a newly created one
 332  * @param op1    the first operand
 333  * @param op2    the second operand
 334  * @param env    the environment
 335  *
 336  * @return the newly created node
 337  */
 338 static ir_node *apply(ir_node *header, ir_node *orig, ir_node *op1, ir_node *op2, iv_env *env) {
 339         ir_opcode code = get_irn_opcode(orig);
 340         ir_node *result = search(code, op1, op2, env);
 341
 342         if (result == NULL) {
 343                 dbg_info *db = get_irn_dbg_info(orig);
 344                 ir_node *op1_header = get_irn_ne(op1, env)->header;
 345                 ir_node *op2_header = get_irn_ne(op2, env)->header;
 346
 347                 if (op1_header == header && is_rc(op2, op1_header)) {
 348                         result = reduce(orig, op1, op2, env);
 349                 }
 350                 else if (op2_header == header && is_rc(op1, op2_header)) {
 351                         result = reduce(orig, op2, op1, env);
 352                 }
 353                 else {
 354                         result = do_apply(code, db, op1, op2, get_irn_mode(orig));
 355                         get_irn_ne(result, env)->header = NULL;
 356                 }
 357         }
 358         return result;
 359 }  /* apply */
 360
 361 /**
 362  * The Reduce operation.
 363  *
 364  * @param orig   the node that represent the original operation and determines
 365  *               the opcode, debug-info and mode of a newly created one
 366  * @param iv     the induction variable
 367  * @param rc     the region constant
 368  * @param env    the environment
 369  *
 370  * @return the reduced node
 371  */
 372 static ir_node *reduce(ir_node *orig, ir_node *iv, ir_node *rc, iv_env *env) {
 373         ir_opcode code = get_irn_opcode(orig);
 374         ir_node *result = search(code, iv, rc, env);
 375
 376         /* check if we have already done this operation on the iv */
 377         if (result == NULL) {
 378                 node_entry *e, *iv_e;
 379                 int i;
 380                 ir_mode *mode = get_irn_mode(orig);
 381
 382                 result = exact_copy(iv);
 383
 384                 if (get_irn_mode(result) != mode) {
 385                         /*
 386                          * Beware: we must always create a new induction variable with the same mode
 387                          * as the node we are replacing. Especially this means the mode might be changed
 388                          * from P to I and back. This is always possible, because we have only Phi, Add
 389                          * and Sub nodes.
 390                          * However, this might lead to AddIs(Iu,Is) which we must fix. The best way to do this
 391                          * seems to be a post-pass, or we might end with useless Conv's.
 392                          */
 393                         set_irn_mode(result, mode);
 394                         env->need_postpass = 1;
 395                 }
 396                 add(code, iv, rc, result, env);
 397                 DB((dbg, LEVEL_3, "   Created new %+F for %+F (%s %+F)\n", result, iv,
 398                         get_irn_opname(orig), rc));
 399
 400                 iv_e = get_irn_ne(iv, env);
 401                 e    = get_irn_ne(result, env);
 402                 e->header = iv_e->header;
 403
 404                 /* create the LFTR edge */
 405                 LFTR_add(iv, result, code, rc, env);
 406
 407                 for (i = get_irn_arity(result) - 1; i >= 0; --i) {
 408                         ir_node *o = get_irn_n(result, i);
 409
 410                         e = get_irn_ne(o, env);
 411                         if (e->header == iv_e->header)
 412                                 o = reduce(orig, o, rc, env);
 413                         else if (is_Phi(result) || code == iro_Mul)
 414                                 o = apply(iv_e->header, orig, o, rc, env);
 415                         set_irn_n(result, i, o);
 416                 }
 417         } else {
 418                 DB((dbg, LEVEL_3, "   Already Created %+F for %+F (%s %+F)\n", result, iv,
 419                         get_irn_opname(orig), rc));
 420         }
 421         return result;
 422 }  /* reduce */
 423
 424 /**
 425  * Update the scc for a newly created IV.
 426  */
 427 static void update_scc(ir_node *iv, node_entry *e, iv_env *env) {
 428         scc     *pscc   = e->pscc;
 429         ir_node *header = e->header;
 430         waitq    *wq = new_waitq();
 431
 432         DB((dbg, LEVEL_2, "  Creating SCC for new an induction variable:\n  "));
 433         pscc->head = NULL;
 434         waitq_put(wq, iv);
 435         do {
 436                 ir_node    *irn = waitq_get(wq);
 437                 node_entry *ne  = get_irn_ne(irn, env);
 438                 int        i;
 439
 440                 ne->pscc   = pscc;
 441                 ne->next   = pscc->head;
 442                 pscc->head = irn;
 443                 DB((dbg, LEVEL_2, " %+F,", irn));
 444
 445                 for (i = get_irn_arity(irn) - 1; i >= 0; --i) {
 446                         ir_node    *pred = get_irn_n(irn, i);
 447                         node_entry *pe   = get_irn_ne(pred, env);
 448
 449                         if (pe->header == header && pe->pscc == NULL) {
 450                                 /* set the pscc here to ensure that the node is NOT enqueued another time */
 451                                 pe->pscc = pscc;
 452                                 waitq_put(wq, pred);
 453                         }
 454                 }
 455         } while (! waitq_empty(wq));
 456         del_waitq(wq);
 457         DB((dbg, LEVEL_2, "\n"));
 458 }  /* update_scc */
 459
 460 /**
 461  * The Replace operation. We found a node representing iv (+,-,*) rc
 462  * that can be removed by replacing the induction variable iv by a new
 463  * one that 'applies' the operation 'irn'.
 464  *
 465  * @param irn   the node that will be replaced
 466  * @param iv    the induction variable
 467  * @param rc    the region constant
 468  * @param env   the environment
 469  */
 470 static int replace(ir_node *irn, ir_node *iv, ir_node *rc, iv_env *env) {
 471         ir_node *result;
 472
 473         DB((dbg, LEVEL_2, "  Replacing %+F\n", irn));
 474
 475         result = reduce(irn, iv, rc, env);
 476         if (result != irn) {
 477                 node_entry *e;
 478
 479                 hook_strength_red(current_ir_graph, irn);
 480                 exchange(irn, result);
 481                 e = get_irn_ne(result, env);
 482                 if (e->pscc == NULL) {
 483                         e->pscc = obstack_alloc(&env->obst, sizeof(*e->pscc));
 484                         memset(e->pscc, 0, sizeof(*e->pscc));
 485                         update_scc(result, e, env);
 486                 }
 487                 ++env->replaced;
 488                 return 1;
 489         }
 490         return 0;
 491 }  /* replace */
 492
 493 #if 0
 494 /**
 495  * check if a given node is a mul with 2, 4, 8
 496  */
 497 static int is_x86_shift_const(ir_node *mul) {
 498         ir_node *rc;
 499
 500         if (! is_Mul(mul))
 501                 return 0;
 502
 503         /* normalization put constants on the right side */
 504         rc = get_Mul_right(mul);
 505         if (is_Const(rc)) {
 506                 tarval *tv = get_Const_tarval(rc);
 507
 508                 if (tarval_is_long(tv)) {
 509                         long value = get_tarval_long(tv);
 510
 511                         if (value == 2 || value == 4 || value == 8) {
 512                                 /* do not reduce multiplications by 2, 4, 8 */
 513                                 return 1;
 514                         }
 515                 }
 516         }
 517         return 0;
 518 }  /* is_x86_shift_const */
 519 #endif
 520
 521 /**
 522  * Check if an IV represents a counter with constant limits.
 523  *
 524  * @param iv    any node of the induction variable
 525  * @param env   the environment
 526  */
 527 static int is_counter_iv(ir_node *iv, iv_env *env) {
 528         node_entry *e         = get_irn_ne(iv, env);
 529         scc        *pscc      = e->pscc;
 530         ir_node    *have_init = NULL;
 531         ir_node    *have_incr = NULL;
 532         ir_opcode  code       = iro_Bad;
 533         ir_node    *irn;
 534
 535         if (pscc->code != 0) {
 536                 /* already analysed */
 537                 return pscc->code != iro_Bad;
 538         }
 539
 540         pscc->code = iro_Bad;
 541         for (irn = pscc->head; irn != NULL; irn = e->next) {
 542                 if (is_Add(irn)) {
 543                         if (have_incr != NULL)
 544                                 return 0;
 545
 546                         have_incr = get_Add_right(irn);
 547                         if (! is_Const(have_incr)) {
 548                                 have_incr = get_Add_left(irn);
 549                                 if (! is_Const(have_incr))
 550                                         return 0;
 551                         }
 552                         code = iro_Add;
 553                 } else if (is_Sub(irn)) {
 554                         if (have_incr != NULL)
 555                                 return 0;
 556
 557                         have_incr = get_Sub_right(irn);
 558                         if (! is_Const(have_incr))
 559                                 return 0;
 560                         code = iro_Sub;
 561                 } else if (is_Phi(irn)) {
 562                         int i;
 563
 564                         for (i = get_Phi_n_preds(irn) - 1; i >= 0; --i) {
 565                                 ir_node    *pred = get_Phi_pred(irn, i);
 566                                 node_entry *ne   = get_irn_ne(pred, env);
 567
 568                                 if (ne->header == e->header)
 569                                         continue;
 570                                 if (have_init != NULL)
 571                                         return 0;
 572                                 have_init = pred;
 573                                 if (! is_Const(pred))
 574                                         return 0;
 575                         }
 576                 } else
 577                         return 0;
 578                 e = get_irn_ne(irn, env);
 579         }
 580         pscc->init = get_Const_tarval(have_init);
 581         pscc->incr = get_Const_tarval(have_incr);
 582         pscc->code = code;
 583         return code != iro_Bad;
 584 }  /* is_counter_iv */
 585
 586 /**
 587  * Check the users of an induction variable for register pressure.
 588  *
 589  * @param iv    any node of the induction variable
 590  * @param env   the environment
 591  *
 592  * @return non-zero if the register pressure is estimated
 593  *         to not increase, zero else
 594  */
 595 static int check_users_for_reg_pressure(ir_node *iv, iv_env *env) {
 596         ir_node    *irn, *header;
 597         ir_node    *have_user = NULL;
 598         ir_node    *have_cmp  = NULL;
 599         node_entry *e         = get_irn_ne(iv, env);
 600         scc        *pscc      = e->pscc;
 601
 602         header = e->header;
 603         for (irn = pscc->head; irn != NULL; irn = e->next) {
 604                 const ir_edge_t *edge;
 605
 606                 foreach_out_edge(irn, edge) {
 607                         ir_node    *user = get_edge_src_irn(edge);
 608                         node_entry *ne = get_irn_ne(user, env);
 609
 610                         if (e->header == ne->header) {
 611                                 /* found user from the same IV */
 612                                 continue;
 613                         }
 614                         if (is_Cmp(user)) {
 615                                 if (have_cmp != NULL) {
 616                                         /* more than one cmp, for now end here */
 617                                         return 0;
 618                                 }
 619                                 have_cmp = user;
 620                         } else {
 621                                 /* user is a real user of the IV */
 622                                 if (have_user != NULL) {
 623                                         /* found the second user */
 624                                         return 0;
 625                                 }
 626                                 have_user = user;
 627                         }
 628                 }
 629                 e = get_irn_ne(irn, env);
 630         }
 631
 632         if (have_user == NULL) {
 633                 /* no user, ignore */
 634                 return 1;
 635         }
 636
 637         if (have_cmp == NULL) {
 638                 /* fine, only one user, try to reduce */
 639                 return 1;
 640         }
 641         /*
 642          * We found one user AND at least one cmp.
 643          * We should check here if we can transform the Cmp.
 644          *
 645          * For now our capabilities for doing linear function test
 646          * are limited, so check if the iv has the right form: Only ONE
 647          * Phi, only one Add/Sub with a Const.
 648          */
 649         if (! is_counter_iv(iv, env))
 650                 return 0;
 651
 652         /*
 653          * Ok, we have only one increment AND it is a Const, we might be able
 654          * to do a linear function test replacement, so go on.
 655          */
 656         return 1;
 657 }  /* check_users_for_reg_pressure */
 658
 659 /**
 660  * Check if a node can be replaced (+, -, *).
 661  *
 662  * @param irn   the node to check
 663  * @param env   the environment
 664  *
 665  * @return non-zero if irn should be Replace'd
 666  */
 667 static int check_replace(ir_node *irn, iv_env *env) {
 668         ir_node   *left, *right, *iv, *rc;
 669         ir_op     *op  = get_irn_op(irn);
 670         ir_opcode code = get_op_code(op);
 671         ir_node   *liv, *riv;
 672
 673         switch (code) {
 674         case iro_Mul:
 675         case iro_Add:
 676         case iro_Sub:
 677                 iv = rc = NULL;
 678
 679                 left  = get_binop_left(irn);
 680                 right = get_binop_right(irn);
 681
 682                 liv = is_iv(left, env);
 683                 riv = is_iv(right, env);
 684                 if (liv && is_rc(right, liv)) {
 685                         iv = left; rc = right;
 686                 }
 687                 else if (riv && is_op_commutative(op) &&
 688                                     is_rc(left, riv)) {
 689                         iv = right; rc = left;
 690                 }
 691
 692                 if (iv) {
 693                         if (env->osr_flags & osr_flag_keep_reg_pressure) {
 694                                 if (! check_users_for_reg_pressure(iv, env))
 695                                         return 0;
 696                         }
 697                         return replace(irn, iv, rc, env);
 698                 }
 699                 break;
 700         default:
 701                 break;
 702         }
 703         return 0;
 704 }  /* check_replace */
 705
 706 /**
 707  * Check which SCC's are induction variables.
 708  *
 709  * @param pscc  a SCC
 710  * @param env   the environment
 711  */
 712 static void classify_iv(scc *pscc, iv_env *env) {
 713         ir_node *irn, *next, *header = NULL;
 714         node_entry *b, *h = NULL;
 715         int j, only_phi, num_outside;
 716         ir_node *out_rc;
 717
 718         /* find the header block for this scc */
 719         for (irn = pscc->head; irn; irn = next) {
 720                 node_entry *e = get_irn_link(irn);
 721                 ir_node *block = get_nodes_block(irn);
 722
 723                 next = e->next;
 724                 b = get_irn_ne(block, env);
 725
 726                 if (header) {
 727                         if (h->POnum < b->POnum) {
 728                                 header = block;
 729                                 h      = b;
 730                         }
 731                 }
 732                 else {
 733                         header = block;
 734                         h      = b;
 735                 }
 736         }
 737
 738         /* check if this scc contains only Phi, Add or Sub nodes */
 739         only_phi    = 1;
 740         num_outside = 0;
 741         out_rc      = NULL;
 742         for (irn = pscc->head; irn; irn = next) {
 743                 node_entry *e = get_irn_ne(irn, env);
 744
 745                 next = e->next;
 746                 switch (get_irn_opcode(irn)) {
 747                 case iro_Add:
 748                 case iro_Sub:
 749                         only_phi = 0;
 750                         /* fall through */
 751                 case iro_Phi:
 752                         for (j = get_irn_arity(irn) - 1; j >= 0; --j) {
 753                                 ir_node *pred  = get_irn_n(irn, j);
 754                                 node_entry *pe = get_irn_ne(pred, env);
 755
 756                                 if (pe->pscc != e->pscc) {
 757                                         /* not in the same SCC, must be a region const */
 758                                         if (! is_rc(pred, header)) {
 759                                                 /* not an induction variable */
 760                                                 goto fail;
 761                                         }
 762                                         if (! out_rc) {
 763                                                 out_rc = pred;
 764                                                 ++num_outside;
 765                                         } else if (out_rc != pred) {
 766                                                 ++num_outside;
 767                                         }
 768                                 }
 769                         }
 770                         break;
 771                 default:
 772                         /* not an induction variable */
 773                         goto fail;
 774                 }
 775         }
 776         /* found an induction variable */
 777         DB((dbg, LEVEL_2, "  Found an induction variable:\n  "));
 778         if (only_phi && num_outside == 1) {
 779                 /* a phi cycle with only one real predecessor can be collapsed */
 780                 DB((dbg, LEVEL_2, "  Found an USELESS Phi cycle:\n  "));
 781
 782                 for (irn = pscc->head; irn; irn = next) {
 783                         node_entry *e = get_irn_ne(irn, env);
 784                         next = e->next;
 785                         e->header = NULL;
 786                         exchange(irn, out_rc);
 787                 }
 788                 ++env->replaced;
 789                 return;
 790         }
 791
 792         /* set the header for every node in this scc */
 793         for (irn = pscc->head; irn; irn = next) {
 794                 node_entry *e = get_irn_ne(irn, env);
 795                 e->header = header;
 796                 next = e->next;
 797                 DB((dbg, LEVEL_2, " %+F,", irn));
 798         }
 799         DB((dbg, LEVEL_2, "\n"));
 800         return;
 801
 802 fail:
 803         for (irn = pscc->head; irn; irn = next) {
 804                 node_entry *e = get_irn_ne(irn, env);
 805
 806                 next = e->next;
 807                 e->header = NULL;
 808         }
 809 }  /* classify_iv */
 810
 811 /**
 812  * Process an SCC for the operator strength reduction.
 813  *
 814  * @param pscc  the SCC
 815  * @param env   the environment
 816  */
 817 static void process_scc(scc *pscc, iv_env *env) {
 818         ir_node *head = pscc->head;
 819         node_entry *e = get_irn_link(head);
 820
 821 #ifdef DEBUG_libfirm
 822         {
 823                 ir_node *irn, *next;
 824
 825                 DB((dbg, LEVEL_4, " SCC at %p:\n ", pscc));
 826                 for (irn = pscc->head; irn != NULL; irn = next) {
 827                         node_entry *e = get_irn_link(irn);
 828
 829                         next = e->next;
 830
 831                         DB((dbg, LEVEL_4, " %+F,", irn));
 832                 }
 833                 DB((dbg, LEVEL_4, "\n"));
 834         }
 835 #endif
 836
 837         if (e->next == NULL) {
 838                 /* this SCC has only a single member */
 839                 check_replace(head, env);
 840         } else {
 841                 classify_iv(pscc, env);
 842         }
 843 }  /* process_scc */
 844
 845 /**
 846  * If an SCC is a Phi only cycle, remove it.
 847  *
 848  * @param pscc  an SCC that consists of Phi nodes only
 849  * @param env   the environment
 850  */
 851 static void remove_phi_cycle(scc *pscc, iv_env *env) {
 852         ir_node *irn, *next;
 853         int j;
 854         ir_node *out_rc;
 855
 856         /* check if this scc contains only Phi, Add or Sub nodes */
 857         out_rc      = NULL;
 858         for (irn = pscc->head; irn; irn = next) {
 859                 node_entry *e = get_irn_ne(irn, env);
 860
 861                 next = e->next;
 862                 if (! is_Phi(irn))
 863                         return;
 864
 865                 for (j = get_irn_arity(irn) - 1; j >= 0; --j) {
 866                         ir_node *pred  = get_irn_n(irn, j);
 867                         node_entry *pe = get_irn_ne(pred, env);
 868
 869                         if (pe->pscc != e->pscc) {
 870                                 /* not in the same SCC, must be the only input */
 871                                 if (! out_rc) {
 872                                         out_rc = pred;
 873                                 } else if (out_rc != pred) {
 874                                         return;
 875                                 }
 876                         }
 877                 }
 878         }
 879         /* found a Phi cycle */
 880         DB((dbg, LEVEL_2, "  Found an USELESS Phi cycle:\n  "));
 881
 882         for (irn = pscc->head; irn; irn = next) {
 883                 node_entry *e = get_irn_ne(irn, env);
 884                 next = e->next;
 885                 e->header = NULL;
 886                 exchange(irn, out_rc);
 887         }
 888         ++env->replaced;
 889 }  /* remove_phi_cycle */
 890
 891 /**
 892  * Process a SCC for the Phi cycle remove.
 893  *
 894  * @param pscc  the SCC
 895  * @param env   the environment
 896  */
 897 static void process_phi_only_scc(scc *pscc, iv_env *env) {
 898         ir_node *head = pscc->head;
 899         node_entry *e = get_irn_link(head);
 900
 901 #ifdef DEBUG_libfirm
 902         {
 903                 ir_node *irn, *next;
 904
 905                 DB((dbg, LEVEL_4, " SCC at %p:\n ", pscc));
 906                 for (irn = pscc->head; irn; irn = next) {
 907                         node_entry *e = get_irn_link(irn);
 908
 909                         next = e->next;
 910
 911                         DB((dbg, LEVEL_4, " %+F,", irn));
 912                 }
 913                 DB((dbg, LEVEL_4, "\n"));
 914         }
 915 #endif
 916
 917         if (e->next != NULL)
 918                 remove_phi_cycle(pscc, env);
 919 }  /* process_phi_only_scc */
 920
 921
 922 /**
 923  * Push a node onto the stack.
 924  *
 925  * @param env   the environment
 926  * @param n     the node to push
 927  */
 928 static void push(iv_env *env, ir_node *n) {
 929         node_entry *e;
 930
 931         if (env->tos == ARR_LEN(env->stack)) {
 932                 int nlen = ARR_LEN(env->stack) * 2;
 933                 ARR_RESIZE(ir_node *, env->stack, nlen);
 934         }
 935         env->stack[env->tos++] = n;
 936         e = get_irn_ne(n, env);
 937         e->in_stack = 1;
 938 }  /* push */
 939
 940 /**
 941  * Pop a node from the stack.
 942  *
 943  * @param env   the environment
 944  *
 945  * @return  The topmost node
 946  */
 947 static ir_node *pop(iv_env *env) {
 948         ir_node *n = env->stack[--env->tos];
 949         node_entry *e = get_irn_ne(n, env);
 950
 951         e->in_stack = 0;
 952         return n;
 953 }  /* pop */
 954
 955 /**
 956  * Do Tarjan's SCC algorithm and drive OSR.
 957  *
 958  * @param irn  start at this node
 959  * @param env  the environment
 960  */
 961 static void dfs(ir_node *irn, iv_env *env) {
 962         int i, n;
 963         node_entry *node = get_irn_ne(irn, env);
 964
 965         mark_irn_visited(irn);
 966
 967         /* do not put blocks into the scc */
 968         if (is_Block(irn)) {
 969                 n = get_irn_arity(irn);
 970                 for (i = 0; i < n; ++i) {
 971                         ir_node *pred = get_irn_n(irn, i);
 972
 973                         if (!irn_visited(pred))
 974                                 dfs(pred, env);
 975                 }
 976         } else {
 977                 ir_node *block = get_nodes_block(irn);
 978
 979                 node->DFSnum = env->nextDFSnum++;
 980                 node->low    = node->DFSnum;
 981                 push(env, irn);
 982
 983                 /* handle the block */
 984                 if (!irn_visited(block))
 985                         dfs(block, env);
 986
 987                 n = get_irn_arity(irn);
 988                 for (i = 0; i < n; ++i) {
 989                         ir_node *pred = get_irn_n(irn, i);
 990                         node_entry *o = get_irn_ne(pred, env);
 991
 992                         if (!irn_visited(pred)) {
 993                                 dfs(pred, env);
 994                                 node->low = MIN(node->low, o->low);
 995                         }
 996                         if (o->DFSnum < node->DFSnum && o->in_stack)
 997                                 node->low = MIN(o->DFSnum, node->low);
 998                 }
 999                 if (node->low == node->DFSnum) {
1000                         scc *pscc = obstack_alloc(&env->obst, sizeof(*pscc));
1001                         ir_node *x;
1002
1003                         memset(pscc, 0, sizeof(*pscc));
1004                         do {
1005                                 node_entry *e;
1006
1007                                 x = pop(env);
1008                                 e = get_irn_ne(x, env);
1009                                 e->pscc    = pscc;
1010                                 e->next    = pscc->head;
1011                                 pscc->head = x;
1012                         } while (x != irn);
1013
1014                         env->process_scc(pscc, env);
1015                 }
1016         }
1017 }  /* dfs */
1018
1019 /**
1020  * Do the DFS by starting at the End node of a graph.
1021  *
1022  * @param irg  the graph to process
1023  * @param env  the environment
1024  */
1025 static void do_dfs(ir_graph *irg, iv_env *env) {
1026         ir_graph *rem = current_ir_graph;
1027         ir_node  *end = get_irg_end(irg);
1028         int i;
1029
1030         ir_reserve_resources(irg, IR_RESOURCE_IRN_VISITED);
1031
1032         current_ir_graph = irg;
1033         inc_irg_visited(irg);
1034
1035         /* visit all visible nodes */
1036         dfs(end, env);
1037
1038         /* visit the keep-alives */
1039         for (i = get_End_n_keepalives(end) - 1; i >= 0; --i) {
1040                 ir_node *ka = get_End_keepalive(end, i);
1041
1042                 if (!irn_visited(ka))
1043                         dfs(ka, env);
1044         }
1045
1046         ir_free_resources(irg, IR_RESOURCE_IRN_VISITED);
1047
1048         current_ir_graph = rem;
1049 }  /* do_dfs */
1050
1051 /**
1052  * Post-block-walker: assign the post-order number.
1053  */
1054 static void assign_po(ir_node *block, void *ctx) {
1055         iv_env *env = ctx;
1056         node_entry *e = get_irn_ne(block, env);
1057
1058         e->POnum = env->POnum++;
1059 }  /* assign_po */
1060
1061 /**
1062  * Apply one LFTR edge operation.
1063  * Return NULL if the transformation cannot be done safely without
1064  * an Overflow.
1065  *
1066  * @param iv   the induction variable
1067  * @param rc   the constant that should be translated
1068  * @param e    the LFTR edge
1069  * @param env  the IV environment
1070  *
1071  * @return the translated region constant or NULL
1072  *         if the translation was not possible
1073  *
1074  * @note
1075  * In the current implementation only the last edge is stored, so
1076  * only one chain exists. That's why we might miss some opportunities.
1077  */
1078 static ir_node *applyOneEdge(ir_node *iv, ir_node *rc, LFTR_edge *e, iv_env *env) {
1079         if (env->osr_flags & osr_flag_lftr_with_ov_check) {
1080                 tarval *tv_l, *tv_r, *tv, *tv_init, *tv_incr, *tv_end;
1081                 tarval_int_overflow_mode_t ovmode;
1082                 scc *pscc;
1083
1084                 if (! is_counter_iv(iv, env)) {
1085                         DB((dbg, LEVEL_4, " not counter IV"));
1086                         return NULL;
1087                 }
1088
1089                 /* overflow can only be decided for Consts */
1090                 if (! is_Const(e->rc)) {
1091                         if (e->code == iro_Add && mode_is_reference(get_irn_mode(e->rc))) {
1092                                 /* However we allow ONE Pointer Add, as pointer arithmetic with wrap
1093                                    around is undefined anyway */
1094                                 return do_apply(e->code, NULL, rc, e->rc, get_irn_mode(e->rc));
1095                         }
1096                         DB((dbg, LEVEL_4, " = UNKNOWN (%+F)", e->rc));
1097                         return NULL;
1098                 }
1099
1100                 tv_l = get_Const_tarval(rc);
1101                 tv_r = get_Const_tarval(e->rc);
1102
1103                 ovmode = tarval_get_integer_overflow_mode();
1104                 tarval_set_integer_overflow_mode(TV_OVERFLOW_BAD);
1105
1106                 pscc    = get_iv_scc(iv, env);
1107                 tv_incr = pscc->incr;
1108                 tv_init = pscc->init;
1109
1110                 /*
1111                  * Check that no overflow occurs:
1112                  * init must be transformed without overflow
1113                  * the new rc must be transformed without overflow
1114                  * rc +/- incr must be possible without overflow
1115                  */
1116                 switch (e->code) {
1117                 case iro_Mul:
1118                         tv      = tarval_mul(tv_l, tv_r);
1119                         tv_init = tarval_mul(tv_init, tv_r);
1120                         tv_incr = tarval_mul(tv_incr, tv_r);
1121                         DB((dbg, LEVEL_4, " * %+F", tv_r));
1122                         break;
1123                 case iro_Add:
1124                         tv      = tarval_add(tv_l, tv_r);
1125                         tv_init = tarval_add(tv_init, tv_r);
1126                         DB((dbg, LEVEL_4, " + %+F", tv_r));
1127                         break;
1128                 case iro_Sub:
1129                         tv      = tarval_sub(tv_l, tv_r, NULL);
1130                         tv_init = tarval_sub(tv_init, tv_r, NULL);
1131                         DB((dbg, LEVEL_4, " - %+F", tv_r));
1132                         break;
1133                 default:
1134                         panic("Unsupported opcode");
1135                         tv = tarval_bad;
1136                 }
1137
1138                 if (pscc->code == iro_Add) {
1139                         tv_end = tarval_add(tv, tv_incr);
1140                 } else {
1141                         assert(pscc->code == iro_Sub);
1142                         tv_end = tarval_sub(tv, tv_incr, NULL);
1143                 }
1144
1145                 tarval_set_integer_overflow_mode(ovmode);
1146
1147                 if (tv == tarval_bad || tv_init == tarval_bad || tv_end == tarval_bad) {
1148                         DB((dbg, LEVEL_4, " = OVERFLOW"));
1149                         return NULL;
1150                 }
1151                 return new_Const(tv);
1152         }
1153         return do_apply(e->code, NULL, rc, e->rc, get_irn_mode(e->dst));
1154 }  /* applyOneEdge */
1155
1156 /**
1157  * Applies the operations represented by the LFTR edges to a
1158  * region constant and returns the value.
1159  * Return NULL if the transformation cannot be done safely without
1160  * an Overflow.
1161  *
1162  * @param pIV  points to the IV node that starts the LFTR edge chain
1163  *             after translation points to the new IV
1164  * @param rc   the region constant that should be translated
1165  * @param env  the IV environment
1166  *
1167  * @return the translated region constant or NULL
1168  *         if the translation was not possible
1169  */
1170 static ir_node *applyEdges(ir_node **pIV, ir_node *rc, iv_env *env) {
1171         ir_node *iv = *pIV;
1172         if (env->osr_flags & osr_flag_lftr_with_ov_check) {
1173                 /* overflow can only be decided for Consts */
1174                 if (! is_counter_iv(iv, env)) {
1175                         DB((dbg, LEVEL_4, "not counter IV\n", rc));
1176                         return NULL;
1177                 }
1178                 if (! is_Const(rc)) {
1179                         DB((dbg, LEVEL_4, " = UNKNOWN (%+F)\n", rc));
1180                         return NULL;
1181                 }
1182                 DB((dbg, LEVEL_4, "%+F", get_Const_tarval(rc)));
1183         }
1184
1185         for (; rc;) {
1186                 LFTR_edge *e = LFTR_find(iv, env);
1187                 if (e != NULL) {
1188                         rc = applyOneEdge(iv, rc, e, env);
1189                         iv = e->dst;
1190                 } else
1191                         break;
1192         }
1193         DB((dbg, LEVEL_3, "\n"));
1194         *pIV = iv;
1195         return rc;
1196 }  /* applyEdges */
1197
1198 /**
1199  * Walker, finds Cmp(iv, rc) or Cmp(rc, iv)
1200  * and tries to optimize them.
1201  */
1202 static void do_lftr(ir_node *cmp, void *ctx) {
1203         iv_env *env = ctx;
1204         ir_node *left, *right, *liv, *riv;
1205         ir_node *iv, *rc;
1206         ir_node *nleft = NULL, *nright = NULL;
1207
1208         if (!is_Cmp(cmp))
1209                 return;
1210
1211         left  = get_Cmp_left(cmp);
1212         right = get_Cmp_right(cmp);
1213
1214         liv = is_iv(left, env);
1215         riv = is_iv(right, env);
1216         if (liv && is_rc(right, liv)) {
1217                 iv = left; rc = right;
1218
1219                 nright = applyEdges(&iv, rc, env);
1220                 nleft  = iv;
1221         }
1222         else if (riv && is_rc(left, riv)) {
1223                 iv = right; rc = left;
1224
1225                 nleft  = applyEdges(&iv, rc, env);
1226                 nright = iv;
1227         }
1228
1229         if (nleft && nright) {
1230                 DB((dbg, LEVEL_2, "  LFTR for %+F\n", cmp));
1231                 set_Cmp_left(cmp, nleft);
1232                 set_Cmp_right(cmp, nright);
1233                 ++env->lftr_replaced;
1234         }
1235 }  /* do_lftr */
1236
1237 /**
1238  * do linear function test replacement.
1239  *
1240  * @param irg   the graph that should be optimized
1241  * @param env   the IV environment
1242  */
1243 static void lftr(ir_graph *irg, iv_env *env) {
1244         irg_walk_graph(irg, NULL, do_lftr, env);
1245 }  /* lftr */
1246
1247 /**
1248  * Pre-walker: set all node links to NULL and fix the
1249  * block of Proj nodes.
1250  */
1251 static void clear_and_fix(ir_node *irn, void *env) {
1252         int *moved = env;
1253         set_irn_link(irn, NULL);
1254
1255         if (is_Proj(irn)) {
1256                 ir_node *pred       = get_Proj_pred(irn);
1257                 ir_node *pred_block = get_nodes_block(pred);
1258
1259                 if (get_nodes_block(irn) != pred_block) {
1260                         set_nodes_block(irn, pred_block);
1261                         *moved = 1;
1262                 }
1263         }
1264 }  /* clear_and_fix */
1265
1266
1267 /* Remove any Phi cycles with only one real input. */
1268 void remove_phi_cycles(ir_graph *irg) {
1269         iv_env   env;
1270         ir_graph *rem;
1271         int      projs_moved;
1272
1273         rem = current_ir_graph;
1274         current_ir_graph = irg;
1275
1276         FIRM_DBG_REGISTER(dbg, "firm.opt.remove_phi");
1277
1278         DB((dbg, LEVEL_1, "Doing Phi cycle removement for %+F\n", irg));
1279
1280         obstack_init(&env.obst);
1281         env.stack         = NEW_ARR_F(ir_node *, 128);
1282         env.tos           = 0;
1283         env.nextDFSnum    = 0;
1284         env.POnum         = 0;
1285         env.quad_map      = NULL;
1286         env.lftr_edges    = NULL;
1287         env.replaced      = 0;
1288         env.lftr_replaced = 0;
1289         env.osr_flags     = 0;
1290         env.need_postpass = 0;
1291         env.process_scc   = process_phi_only_scc;
1292
1293         /* Clear all links and move Proj nodes into the
1294            the same block as it's predecessors.
1295            This can improve the placement of new nodes.
1296          */
1297         projs_moved = 0;
1298         irg_walk_graph(irg, NULL, clear_and_fix, &projs_moved);
1299         if (projs_moved)
1300                 set_irg_outs_inconsistent(irg);
1301
1302         /* we need outs for calculating the post order */
1303         assure_irg_outs(irg);
1304
1305         /* calculate the post order number for blocks. */
1306         irg_out_block_walk(get_irg_start_block(irg), NULL, assign_po, &env);
1307
1308         /* calculate the SCC's and drive OSR. */
1309         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK);
1310         do_dfs(irg, &env);
1311         ir_free_resources(irg, IR_RESOURCE_IRN_LINK);
1312
1313         if (env.replaced) {
1314                 set_irg_outs_inconsistent(irg);
1315                 DB((dbg, LEVEL_1, "remove_phi_cycles: %u Cycles removed\n\n", env.replaced));
1316         }
1317
1318         DEL_ARR_F(env.stack);
1319         obstack_free(&env.obst, NULL);
1320
1321         current_ir_graph = rem;
1322 }  /* remove_phi_cycles */
1323
1324 ir_graph_pass_t *remove_phi_cycles_pass(const char *name, int verify, int dump)
1325 {
1326         return def_graph_pass(name ? name : "remove_phi_cycles", verify, dump, remove_phi_cycles);
1327 }  /* remove_phi_cycles_pass */
1328
1329 /**
1330  * Post-walker: fix Add and Sub nodes that where results of I<->P conversions.
1331  */
1332 static void fix_adds_and_subs(ir_node *irn, void *ctx) {
1333         (void) ctx;
1334
1335         if (is_Add(irn)) {
1336                 ir_mode *mode = get_irn_mode(irn);
1337
1338                 if (mode_is_int(mode)) {
1339                         ir_node *pred;
1340
1341                         pred = get_Add_left(irn);
1342                         if (get_irn_mode(pred) != mode) {
1343                                 ir_node *block = get_nodes_block(pred);
1344
1345                                 pred = new_r_Conv(block, pred, mode);
1346                                 set_Add_left(irn, pred);
1347                         }
1348                         pred = get_Add_right(irn);
1349                         if (get_irn_mode(pred) != mode) {
1350                                 ir_node *block = get_nodes_block(pred);
1351
1352                                 pred = new_r_Conv(block, pred, mode);
1353                                 set_Add_right(irn, pred);
1354                         }
1355                 }
1356         } else if (is_Sub(irn)) {
1357                 ir_mode *mode = get_irn_mode(irn);
1358
1359                 if (mode_is_int(mode)) {
1360                         ir_node *left   = get_Sub_left(irn);
1361                         ir_node *right  = get_Sub_right(irn);
1362                         ir_mode *l_mode = get_irn_mode(left);
1363                         ir_mode *r_mode = get_irn_mode(right);
1364
1365                         if (mode_is_int(l_mode) && mode_is_int(r_mode)) {
1366                                 if (l_mode != mode) {
1367                                         ir_node *block = get_nodes_block(left);
1368
1369                                         left = new_r_Conv(block, left, mode);
1370                                         set_Sub_left(irn, left);
1371                                 }
1372                                 if (r_mode != mode) {
1373                                         ir_node *block = get_nodes_block(right);
1374
1375                                         right = new_r_Conv(block, right, mode);
1376                                         set_Sub_right(irn, right);
1377                                 }
1378                         }
1379                 }
1380         }
1381 }  /* fix_adds_and_subs */
1382
1383 /* Performs Operator Strength Reduction for the passed graph. */
1384 void opt_osr(ir_graph *irg, unsigned flags) {
1385         iv_env   env;
1386         ir_graph *rem;
1387         int      edges;
1388         int      projs_moved;
1389
1390         if (! get_opt_strength_red()) {
1391                 /* only kill Phi cycles  */
1392                 remove_phi_cycles(irg);
1393                 return;
1394         }
1395
1396         rem = current_ir_graph;
1397         current_ir_graph = irg;
1398
1399         FIRM_DBG_REGISTER(dbg, "firm.opt.osr");
1400
1401         DB((dbg, LEVEL_1, "Doing Operator Strength Reduction for %+F\n", irg));
1402
1403         obstack_init(&env.obst);
1404         env.stack         = NEW_ARR_F(ir_node *, 128);
1405         env.tos           = 0;
1406         env.nextDFSnum    = 0;
1407         env.POnum         = 0;
1408         env.quad_map      = new_set(quad_cmp, 64);
1409         env.lftr_edges    = new_set(LFTR_cmp, 64);
1410         env.replaced      = 0;
1411         env.lftr_replaced = 0;
1412         env.osr_flags     = flags;
1413         env.need_postpass = 0;
1414         env.process_scc   = process_scc;
1415
1416         /* Clear all links and move Proj nodes into the
1417            the same block as it's predecessors.
1418            This can improve the placement of new nodes.
1419          */
1420         projs_moved = 0;
1421         irg_walk_graph(irg, NULL, clear_and_fix, &projs_moved);
1422         if (projs_moved)
1423                 set_irg_outs_inconsistent(irg);
1424
1425         /* we need dominance */
1426         assure_doms(irg);
1427
1428         edges = edges_assure(irg);
1429
1430         /* calculate the post order number for blocks by walking the out edges. */
1431         assure_irg_outs(irg);
1432         irg_block_edges_walk(get_irg_start_block(irg), NULL, assign_po, &env);
1433
1434         /* calculate the SCC's and drive OSR. */
1435         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK);
1436         do_dfs(irg, &env);
1437
1438         if (env.replaced) {
1439                 if (env.need_postpass)
1440                         irg_walk_graph(irg, NULL, fix_adds_and_subs, &env);
1441
1442                 /* try linear function test replacements */
1443                 lftr(irg, &env);
1444                 (void)lftr;
1445
1446                 set_irg_outs_inconsistent(irg);
1447                 DB((dbg, LEVEL_1, "Replacements: %u + %u (lftr)\n\n", env.replaced, env.lftr_replaced));
1448         }
1449         ir_free_resources(irg, IR_RESOURCE_IRN_LINK);
1450
1451         del_set(env.lftr_edges);
1452         del_set(env.quad_map);
1453         DEL_ARR_F(env.stack);
1454         obstack_free(&env.obst, NULL);
1455
1456         if (! edges)
1457                 edges_deactivate(irg);
1458
1459         current_ir_graph = rem;
1460 }  /* opt_osr */
1461
1462 struct pass_t {
1463         ir_graph_pass_t pass;
1464         unsigned        flags;
1465 };
1466
1467 /**
1468 * Wrapper for running opt_osr() as an ir_graph pass.
1469 */
1470 static int pass_wrapper(ir_graph *irg, void *context) {
1471         struct pass_t *pass = context;
1472         opt_osr(irg, pass->flags);
1473         return 0;
1474 }  /* pass_wrapper */
1475
1476 ir_graph_pass_t *opt_osr_pass(const char *name, int verify, int dump, unsigned flags)
1477 {
1478         struct pass_t *pass = xmalloc(sizeof(*pass));
1479
1480         pass->pass.kind       = k_ir_prog_pass;
1481         pass->pass.run_on_irg = pass_wrapper;
1482         pass->pass.context    = pass;
1483         pass->pass.name       = name ? name : "osr";
1484         pass->pass.verify     = verify != 0;
1485         pass->pass.dump       = dump != 0;
1486
1487         pass->flags = flags;
1488
1489         INIT_LIST_HEAD(&pass->pass.list);
1490
1491         return &pass->pass;
1492 }  /* opt_osr_pass */