nsz Git - libfirm/blob - ir/opt/opt_osr.c

   1 /*
   2  * Copyright (C) 1995-2007 University of Karlsruhe.  All right reserved.
   3  *
   4  * This file is part of libFirm.
   5  *
   6  * This file may be distributed and/or modified under the terms of the
   7  * GNU General Public License version 2 as published by the Free Software
   8  * Foundation and appearing in the file LICENSE.GPL included in the
   9  * packaging of this file.
  10  *
  11  * Licensees holding valid libFirm Professional Edition licenses may use
  12  * this file in accordance with the libFirm Commercial License.
  13  * Agreement provided with the Software.
  14  *
  15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
  16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  17  * PURPOSE.
  18  */
  19
  20 /**
  21  * @file
  22  * @brief   Operator Strength Reduction.
  23  * @date    12.5.2006
  24  * @author  Michael Beck
  25  * @version $Id$
  26  * @summary
  27  *  Implementation of the Operator Strength Reduction algorithm
  28  *  by Keith D. Cooper, L. Taylor Simpson, Christopher A. Vick.
  29  */
  30 #ifdef HAVE_CONFIG_H
  31 #include "config.h"
  32 #endif
  33
  34 #include "iroptimize.h"
  35 #include "irgraph.h"
  36 #include "ircons.h"
  37 #include "irop_t.h"
  38 #include "irloop.h"
  39 #include "irdom.h"
  40 #include "irgmod.h"
  41 #include "irflag_t.h"
  42 #include "irgwalk.h"
  43 #include "irouts.h"
  44 #include "debug.h"
  45 #include "obst.h"
  46 #include "set.h"
  47 #include "tv.h"
  48 #include "hashptr.h"
  49 #include "irtools.h"
  50 #include "irloop_t.h"
  51 #include "array.h"
  52 #include "firmstat.h"
  53 #include "xmalloc.h"
  54
  55 /** The debug handle. */
  56 DEBUG_ONLY(static firm_dbg_module_t *dbg;)
  57
  58 /** A scc. */
  59 typedef struct scc {
  60         ir_node *head;          /**< the head of the list */
  61 } scc;
  62
  63 /** A node entry */
  64 typedef struct node_entry {
  65         unsigned DFSnum;    /**< the DFS number of this node */
  66         unsigned low;       /**< the low number of this node */
  67         ir_node  *header;   /**< the header of this node */
  68         int      in_stack;  /**< flag, set if the node is on the stack */
  69         ir_node  *next;     /**< link to the next node the the same scc */
  70         scc      *pscc;     /**< the scc of this node */
  71         unsigned POnum;     /**< the post order number for blocks */
  72 } node_entry;
  73
  74 /** The environment. */
  75 typedef struct iv_env {
  76         struct obstack obst;    /**< an obstack for allocations */
  77         ir_node  **stack;       /**< the node stack */
  78         int      tos;           /**< tos index */
  79         unsigned nextDFSnum;    /**< the current DFS number */
  80         unsigned POnum;         /**< current post order number */
  81         set      *quad_map;     /**< a map from (op, iv, rc) to node */
  82         set      *lftr_edges;   /**< the set of lftr edges */
  83         unsigned replaced;      /**< number of replaced ops */
  84         unsigned lftr_replaced; /**< number of applied linear function test replacements */
  85         unsigned flags;         /**< additional flags */
  86         /** Function called to process a SCC. */
  87         void (*process_scc)(scc *pscc, struct iv_env *env);
  88 } iv_env;
  89
  90 /**
  91  * An entry in the (op, node, node) -> node map.
  92  */
  93 typedef struct quadruple_t {
  94         ir_opcode code;  /**< the opcode of the reduced operation */
  95         ir_node   *op1;  /**< the first operand the reduced operation */
  96         ir_node   *op2;  /**< the second operand of the reduced operation */
  97
  98         ir_node   *res; /**< the reduced operation */
  99 } quadruple_t;
 100
 101 /**
 102  * A LFTR edge.
 103  */
 104 typedef struct LFTR_edge {
 105         ir_node   *src;   /**< the source node */
 106         ir_node   *dst;   /**< the destination node */
 107         ir_opcode code;   /**< the opcode that must be applied */
 108         ir_node   *rc;    /**< the region const that must be applied */
 109 } LFTR_edge;
 110
 111 /* forward */
 112 static ir_node *reduce(ir_node *orig, ir_node *iv, ir_node *rc, iv_env *env);
 113
 114 /**
 115  * Compare two LFTR edges.
 116  */
 117 static int LFTR_cmp(const void *e1, const void *e2, size_t size) {
 118         const LFTR_edge *l1 = e1;
 119         const LFTR_edge *l2 = e2;
 120         (void) size;
 121
 122         return l1->src != l2->src;
 123 }
 124
 125 #if 0
 126 /**
 127  * Find a LFTR edge.
 128  */
 129 static LFTR_edge *LFTR_find(ir_node *src, iv_env *env) {
 130         LFTR_edge key;
 131
 132         key.src  = src;
 133
 134         return set_find(env->lftr_edges, &key, sizeof(key), HASH_PTR(src));
 135 }
 136 #endif
 137
 138 /**
 139  * Add a LFTR edge.
 140  */
 141 static void LFTR_add(ir_node *src, ir_node *dst, ir_opcode code, ir_node *rc, iv_env *env) {
 142         LFTR_edge key;
 143
 144         key.src  = src;
 145         key.dst  = dst;
 146         key.code = code;
 147         key.rc   = rc;
 148
 149         /*
 150          * There might be more than one edge here. This is rather bad
 151          * because we currently store only one.
 152          */
 153 //      assert(LFTR_find(src, env) == NULL);
 154         set_insert(env->lftr_edges, &key, sizeof(key), HASH_PTR(src));
 155 }
 156
 157 /**
 158  * Gets the node_entry of a node
 159  */
 160 static node_entry *get_irn_ne(ir_node *irn, iv_env *env) {
 161         node_entry *e = get_irn_link(irn);
 162
 163         if (! e) {
 164                 e = obstack_alloc(&env->obst, sizeof(*e));
 165                 memset(e, 0, sizeof(*e));
 166                 set_irn_link(irn, e);
 167         }
 168         return e;
 169 }
 170
 171 /**
 172  * Check if irn is an IV.
 173  *
 174  * @param irn  the node to check
 175  * @param env  the environment
 176  *
 177  * @returns the header if it is one, NULL else
 178  */
 179 static ir_node *is_iv(ir_node *irn, iv_env *env) {
 180         return get_irn_ne(irn, env)->header;
 181 }
 182
 183 /**
 184  * Check if irn is a region constant.
 185  * The block or irn must strictly dominate the header block.
 186  *
 187  * @param irn           the node to check
 188  * @param header_block  the header block of the induction variable
 189  */
 190 static int is_rc(ir_node *irn, ir_node *header_block) {
 191         ir_node *block = get_nodes_block(irn);
 192
 193         return (block != header_block) && block_dominates(block, header_block);
 194 }
 195
 196 /**
 197  * Set compare function for the quad set.
 198  */
 199 static int quad_cmp(const void *e1, const void *e2, size_t size) {
 200         const quadruple_t *c1 = e1;
 201         const quadruple_t *c2 = e2;
 202         (void) size;
 203
 204         return c1->code != c2->code || c1->op1 != c2->op1 || c1->op2 != c2->op2;
 205 }
 206
 207 /**
 208  * Check if an reduced operation was already calculated.
 209  *
 210  * @param code  the opcode of the operation
 211  * @param op1   the first operand of the operation
 212  * @param op2   the second operand of the operation
 213  * @param env   the environment
 214  *
 215  * @return the already reduced node or NULL if this operation is not yet reduced
 216  */
 217 static ir_node *search(ir_opcode code, ir_node *op1, ir_node *op2, iv_env *env) {
 218         quadruple_t key, *entry;
 219
 220         key.code = code;
 221         key.op1 = op1;
 222         key.op2 = op2;
 223
 224         entry = set_find(env->quad_map, &key, sizeof(key),
 225                          (code * 9) ^ HASH_PTR(op1) ^HASH_PTR(op2));
 226         if (entry)
 227                 return entry->res;
 228         return NULL;
 229 }
 230
 231 /**
 232  * Add an reduced operation.
 233  *
 234  * @param code    the opcode of the operation
 235  * @param op1     the first operand of the operation
 236  * @param op2     the second operand of the operation
 237  * @param result  the result of the reduced operation
 238  * @param env     the environment
 239  */
 240 static void add(ir_opcode code, ir_node *op1, ir_node *op2, ir_node *result, iv_env *env) {
 241         quadruple_t key;
 242
 243         key.code = code;
 244         key.op1  = op1;
 245         key.op2  = op2;
 246         key.res  = result;
 247
 248         set_insert(env->quad_map, &key, sizeof(key),
 249                    (code * 9) ^ HASH_PTR(op1) ^HASH_PTR(op2));
 250 }
 251
 252 /**
 253  * Find a location where to place a bin-op whose operands are in
 254  * block1 and block2.
 255  *
 256  * @param block1  the block of the first operand
 257  * @param block2  the block of the second operand
 258  *
 259  * Note that we know here that such a place must exists. Moreover, this means
 260  * that either block1 dominates block2 or vice versa. So, just return
 261  * the "smaller" one.
 262  */
 263 static ir_node *find_location(ir_node *block1, ir_node *block2) {
 264         if (block_dominates(block1, block2))
 265                 return block2;
 266         assert(block_dominates(block2, block1));
 267         return block1;
 268 }
 269
 270 /**
 271  * Create a node that executes an op1 code op1 operation.
 272  *
 273  * @param code   the opcode to execute
 274  * @param db     debug info to add to the new node
 275  * @param op1    the first operand
 276  * @param op2    the second operand
 277  * @param mode   the mode of the new operation
 278  *
 279  * @return the newly created node
 280  */
 281 static ir_node *do_apply(ir_opcode code, dbg_info *db, ir_node *op1, ir_node *op2, ir_mode *mode) {
 282         ir_graph *irg = current_ir_graph;
 283         ir_node *result;
 284         ir_node *block = find_location(get_nodes_block(op1), get_nodes_block(op2));
 285
 286         switch (code) {
 287         case iro_Mul:
 288                 result = new_rd_Mul(db, irg, block, op1, op2, mode);
 289                 break;
 290         case iro_Add:
 291                 result = new_rd_Add(db, irg, block, op1, op2, mode);
 292                 break;
 293         case iro_Sub:
 294                 result = new_rd_Sub(db, irg, block, op1, op2, mode);
 295                 break;
 296         default:
 297                 assert(0);
 298                 result = NULL;
 299         }
 300         return result;
 301 }
 302
 303 /**
 304  * The Apply operation.
 305  *
 306  * @param orig   the node that represent the original operation and determines
 307  *               the opcode, debug-info and mode of a newly created one
 308  * @param op1    the first operand
 309  * @param op2    the second operand
 310  * @param env    the environment
 311  *
 312  * @return the newly created node
 313  */
 314 static ir_node *apply(ir_node *orig, ir_node *op1, ir_node *op2, iv_env *env) {
 315         ir_opcode code = get_irn_opcode(orig);
 316         ir_node *result = search(code, op1, op2, env);
 317
 318         if (! result) {
 319                 dbg_info *db = get_irn_dbg_info(orig);
 320                 ir_node *op1_header = get_irn_ne(op1, env)->header;
 321                 ir_node *op2_header = get_irn_ne(op2, env)->header;
 322
 323                 if (op1_header != NULL && is_rc(op2, op1_header)) {
 324                         result = reduce(orig, op1, op2, env);
 325                 }
 326                 else if (op2_header != NULL && is_rc(op1, op2_header)) {
 327                         result = reduce(orig, op2, op1, env);
 328                 }
 329                 else {
 330                         result = do_apply(code, db, op1, op2, get_irn_mode(orig));
 331                         get_irn_ne(result, env)->header = NULL;         }
 332         }
 333         return result;
 334 }
 335
 336 /**
 337  * The Reduce operation.
 338  *
 339  * @param orig   the node that represent the original operation and determines
 340  *               the opcode, debug-info and mode of a newly created one
 341  * @param iv     the induction variable
 342  * @param rc     the region constant
 343  * @param env    the environment
 344  *
 345  * @return the reduced node
 346  */
 347 static ir_node *reduce(ir_node *orig, ir_node *iv, ir_node *rc, iv_env *env) {
 348         ir_opcode code = get_irn_opcode(orig);
 349         ir_node *result = search(code, iv, rc, env);
 350
 351         if (! result) {
 352                 node_entry *e, *iv_e;
 353                 int i, n;
 354                 ir_mode *mode = get_irn_mode(orig);
 355
 356                 result = exact_copy(iv);
 357
 358                 /* Beware: we must always create a new nduction variable with the same mode
 359                    as the node we are replacing. Espicially this means the mode might be changed
 360                    from P to I and back. This is always possible, because we have only Phi, Add
 361                    and Sub nodes. */
 362                 set_irn_mode(result, mode);
 363                 add(code, iv, rc, result, env);
 364                 DB((dbg, LEVEL_3, "   Created new %+F for %+F (%s %+F)\n", result, iv,
 365                         get_irn_opname(orig), rc));
 366
 367                 iv_e = get_irn_ne(iv, env);
 368                 e    = get_irn_ne(result, env);
 369                 e->header = iv_e->header;
 370
 371                 /* create the LFTR edge */
 372                 LFTR_add(iv, result, code, rc, env);
 373
 374                 n = get_irn_arity(result);
 375                 for (i = 0; i < n; ++i) {
 376                         ir_node *o = get_irn_n(result, i);
 377
 378                         e = get_irn_ne(o, env);
 379                         if (e->header == iv_e->header)
 380                                 o = reduce(orig, o, rc, env);
 381                         else if (is_Phi(result))
 382                                 o = apply(orig, o, rc, env);
 383                         else {
 384                                 if (code == iro_Mul)
 385                                         o = apply(orig, o, rc, env);
 386                         }
 387                         set_irn_n(result, i, o);
 388                 }
 389         }
 390         else {
 391                 DB((dbg, LEVEL_3, "   Already Created %+F for %+F (%s %+F)\n", result, iv,
 392                         get_irn_opname(orig), rc));
 393         }
 394         return result;
 395 }
 396
 397 /**
 398  * The Replace operation.
 399  *
 400  * @param irn   the node that will be replaced
 401  * @param iv    the induction variable
 402  * @param rc    the region constant
 403  * @param env   the environment
 404  */
 405 static int replace(ir_node *irn, ir_node *iv, ir_node *rc, iv_env *env) {
 406         ir_node *result;
 407         ir_loop *iv_loop  = get_irn_loop(get_nodes_block(iv));
 408         ir_loop *irn_loop = get_irn_loop(get_nodes_block(irn));
 409
 410         /* only replace nodes that are in the same (or deeper loops) */
 411         if (get_loop_depth(irn_loop) >= get_loop_depth(iv_loop)) {
 412                 DB((dbg, LEVEL_2, "  Replacing %+F\n", irn));
 413
 414                 result = reduce(irn, iv, rc, env);
 415                 if (result != irn) {
 416                         node_entry *e, *iv_e;
 417
 418                         hook_strength_red(current_ir_graph, irn);
 419                         exchange(irn, result);
 420                         e = get_irn_ne(result, env);
 421                         iv_e = get_irn_ne(iv, env);
 422                         e->header = iv_e->header;
 423                 }
 424                 return 1;
 425         }
 426         return 0;
 427 }
 428
 429 /**
 430  * Check if a node can be replaced (+, -, *).
 431  *
 432  * @param irn   the node to check
 433  * @param env   the environment
 434  *
 435  * @return non-zero if irn should be Replace'd
 436  */
 437 static int check_replace(ir_node *irn, iv_env *env) {
 438         ir_node   *left, *right, *iv, *rc;
 439         ir_op     *op  = get_irn_op(irn);
 440         ir_opcode code = get_op_code(op);
 441         ir_node   *liv, *riv;
 442
 443         switch (code) {
 444         case iro_Mul:
 445         case iro_Add:
 446         case iro_Sub:
 447                 iv = rc = NULL;
 448
 449                 left  = get_binop_left(irn);
 450                 right = get_binop_right(irn);
 451
 452                 liv = is_iv(left, env);
 453                 riv = is_iv(right, env);
 454                 if (liv && is_rc(right, liv)) {
 455                         iv = left; rc = right;
 456                 }
 457                 else if (riv && is_op_commutative(op) &&
 458                                     is_rc(left, riv)) {
 459                         iv = right; rc = left;
 460                 }
 461
 462                 if (iv) {
 463                         if (code == iro_Mul && env->flags & osr_flag_ignore_x86_shift) {
 464                                 if (is_Const(rc)) {
 465                                         tarval *tv = get_Const_tarval(rc);
 466
 467                                         if (tarval_is_long(tv)) {
 468                                                 long value = get_tarval_long(tv);
 469
 470                                                 if (value == 2 || value == 4 || value == 8) {
 471                                                         /* do not reduce multiplications by 2, 4, 8 */
 472                                                         break;
 473                                                 }
 474                                         }
 475                                 }
 476                         }
 477
 478                         return replace(irn, iv, rc, env);
 479                 }
 480                 break;
 481         default:
 482                 break;
 483         }
 484         return 0;
 485 }
 486
 487 /**
 488  * Check which SCC's are induction variables.
 489  *
 490  * @param pscc  a SCC
 491  * @param env   the environment
 492  */
 493 static void classify_iv(scc *pscc, iv_env *env) {
 494         ir_node *irn, *next, *header = NULL;
 495         node_entry *b, *h = NULL;
 496         int j, only_phi, num_outside;
 497         ir_node *out_rc;
 498
 499         /* find the header block for this scc */
 500         for (irn = pscc->head; irn; irn = next) {
 501                 node_entry *e = get_irn_link(irn);
 502                 ir_node *block = get_nodes_block(irn);
 503
 504                 next = e->next;
 505                 b = get_irn_ne(block, env);
 506
 507                 if (header) {
 508                         if (h->POnum < b->POnum) {
 509                                 header = block;
 510                                 h      = b;
 511                         }
 512                 }
 513                 else {
 514                         header = block;
 515                         h      = b;
 516                 }
 517         }
 518
 519         /* check if this scc contains only Phi, Add or Sub nodes */
 520         only_phi    = 1;
 521         num_outside = 0;
 522         out_rc      = NULL;
 523         for (irn = pscc->head; irn; irn = next) {
 524                 node_entry *e = get_irn_ne(irn, env);
 525
 526                 next = e->next;
 527                 switch (get_irn_opcode(irn)) {
 528                 case iro_Add:
 529                 case iro_Sub:
 530                         only_phi = 0;
 531                         /* fall through */
 532                 case iro_Phi:
 533                         for (j = get_irn_arity(irn) - 1; j >= 0; --j) {
 534                                 ir_node *pred  = get_irn_n(irn, j);
 535                                 node_entry *pe = get_irn_ne(pred, env);
 536
 537                                 if (pe->pscc != e->pscc) {
 538                                         /* not in the same SCC, must be a region const */
 539                                         if (! is_rc(pred, header)) {
 540                                                 /* not an induction variable */
 541                                                 goto fail;
 542                                         }
 543                                         if (! out_rc) {
 544                                                 out_rc = pred;
 545                                                 ++num_outside;
 546                                         } else if (out_rc != pred) {
 547                                                 ++num_outside;
 548                                         }
 549                                 }
 550                         }
 551                         break;
 552                 default:
 553                         /* not an induction variable */
 554                         goto fail;
 555                 }
 556         }
 557         /* found an induction variable */
 558         DB((dbg, LEVEL_2, "  Found an induction variable:\n  "));
 559         if (only_phi && num_outside == 1) {
 560                 /* a phi cycle with only one real predecessor can be collapsed */
 561                 DB((dbg, LEVEL_2, "  Found an USELESS Phi cycle:\n  "));
 562
 563                 for (irn = pscc->head; irn; irn = next) {
 564                         node_entry *e = get_irn_ne(irn, env);
 565                         next = e->next;
 566                         e->header = NULL;
 567                         exchange(irn, out_rc);
 568                 }
 569                 ++env->replaced;
 570                 return;
 571         }
 572
 573         /* set the header for every node in this scc */
 574         for (irn = pscc->head; irn; irn = next) {
 575                 node_entry *e = get_irn_ne(irn, env);
 576                 e->header = header;
 577                 next = e->next;
 578                 DB((dbg, LEVEL_2, " %+F,", irn));
 579         }
 580         DB((dbg, LEVEL_2, "\n"));
 581         return;
 582
 583 fail:
 584         for (irn = pscc->head; irn; irn = next) {
 585                 node_entry *e = get_irn_ne(irn, env);
 586
 587                 next = e->next;
 588                 if (! check_replace(irn, env))
 589                         e->header = NULL;
 590         }
 591 }
 592
 593 /**
 594  * Process a SCC for the operator strength reduction.
 595  *
 596  * @param pscc  the SCC
 597  * @param env   the environment
 598  */
 599 static void process_scc(scc *pscc, iv_env *env) {
 600         ir_node *head = pscc->head;
 601         node_entry *e = get_irn_link(head);
 602
 603 #ifdef DEBUG_libfirm
 604         {
 605                 ir_node *irn, *next;
 606
 607                 DB((dbg, LEVEL_4, " SCC at %p:\n ", pscc));
 608                 for (irn = pscc->head; irn; irn = next) {
 609                         node_entry *e = get_irn_link(irn);
 610
 611                         next = e->next;
 612
 613                         DB((dbg, LEVEL_4, " %+F,", irn));
 614                 }
 615                 DB((dbg, LEVEL_4, "\n"));
 616         }
 617 #endif
 618
 619         if (e->next == NULL) {
 620                 /* this SCC has only a single member */
 621                 check_replace(head, env);
 622         } else {
 623                 classify_iv(pscc, env);
 624         }
 625 }
 626
 627 /**
 628  * If an SCC is a Phi only cycle, remove it.
 629  */
 630 static void remove_phi_cycle(scc *pscc, iv_env *env) {
 631         ir_node *irn, *next;
 632         int j;
 633         ir_node *out_rc;
 634
 635         /* check if this scc contains only Phi, Add or Sub nodes */
 636         out_rc      = NULL;
 637         for (irn = pscc->head; irn; irn = next) {
 638                 node_entry *e = get_irn_ne(irn, env);
 639
 640                 next = e->next;
 641                 if (! is_Phi(irn))
 642                         return;
 643
 644                 for (j = get_irn_arity(irn) - 1; j >= 0; --j) {
 645                         ir_node *pred  = get_irn_n(irn, j);
 646                         node_entry *pe = get_irn_ne(pred, env);
 647
 648                         if (pe->pscc != e->pscc) {
 649                                 /* not in the same SCC, must be the only input */
 650                                 if (! out_rc) {
 651                                         out_rc = pred;
 652                                 } else if (out_rc != pred) {
 653                                         return;
 654                                 }
 655                         }
 656                 }
 657         }
 658         /* found a Phi cycle */
 659         DB((dbg, LEVEL_2, "  Found an USELESS Phi cycle:\n  "));
 660
 661         for (irn = pscc->head; irn; irn = next) {
 662                 node_entry *e = get_irn_ne(irn, env);
 663                 next = e->next;
 664                 e->header = NULL;
 665                 exchange(irn, out_rc);
 666         }
 667         ++env->replaced;
 668 }
 669
 670 /**
 671  * Process a SCC for the Phi cycle removement.
 672  *
 673  * @param pscc  the SCC
 674  * @param env   the environment
 675  */
 676 static void process_phi_only_scc(scc *pscc, iv_env *env) {
 677         ir_node *head = pscc->head;
 678         node_entry *e = get_irn_link(head);
 679
 680 #ifdef DEBUG_libfirm
 681         {
 682                 ir_node *irn, *next;
 683
 684                 DB((dbg, LEVEL_4, " SCC at %p:\n ", pscc));
 685                 for (irn = pscc->head; irn; irn = next) {
 686                         node_entry *e = get_irn_link(irn);
 687
 688                         next = e->next;
 689
 690                         DB((dbg, LEVEL_4, " %+F,", irn));
 691                 }
 692                 DB((dbg, LEVEL_4, "\n"));
 693         }
 694 #endif
 695
 696         if (e->next != NULL)
 697                 remove_phi_cycle(pscc, env);
 698 }
 699
 700
 701 /**
 702  * Push a node onto the stack.
 703  *
 704  * @param env   the environment
 705  * @param n     the node to push
 706  */
 707 static void push(iv_env *env, ir_node *n) {
 708         node_entry *e;
 709
 710         if (env->tos == ARR_LEN(env->stack)) {
 711                 int nlen = ARR_LEN(env->stack) * 2;
 712                 ARR_RESIZE(ir_node *, env->stack, nlen);
 713         }
 714         env->stack[env->tos++] = n;
 715         e = get_irn_ne(n, env);
 716         e->in_stack = 1;
 717 }
 718
 719 /**
 720  * pop a node from the stack
 721  *
 722  * @param env   the environment
 723  *
 724  * @return  The topmost node
 725  */
 726 static ir_node *pop(iv_env *env)
 727 {
 728         ir_node *n = env->stack[--env->tos];
 729         node_entry *e = get_irn_ne(n, env);
 730
 731         e->in_stack = 0;
 732         return n;
 733 }
 734
 735 /**
 736  * Do Tarjan's SCC algorithm and drive OSR.
 737  *
 738  * @param irn  start at this node
 739  * @param env  the environment
 740  */
 741 static void dfs(ir_node *irn, iv_env *env)
 742 {
 743         int i, n;
 744         node_entry *node = get_irn_ne(irn, env);
 745
 746         mark_irn_visited(irn);
 747
 748         /* do not put blocks into the scc */
 749         if (is_Block(irn)) {
 750                 n = get_irn_arity(irn);
 751                 for (i = 0; i < n; ++i) {
 752                         ir_node *pred = get_irn_n(irn, i);
 753
 754                         if (irn_not_visited(pred))
 755                                 dfs(pred, env);
 756                 }
 757         }
 758         else {
 759                 ir_node *block = get_nodes_block(irn);
 760
 761                 node->DFSnum = env->nextDFSnum++;
 762                 node->low    = node->DFSnum;
 763                 push(env, irn);
 764
 765                 /* handle the block */
 766                 if (irn_not_visited(block))
 767                         dfs(block, env);
 768
 769                 n = get_irn_arity(irn);
 770                 for (i = 0; i < n; ++i) {
 771                         ir_node *pred = get_irn_n(irn, i);
 772                         node_entry *o = get_irn_ne(pred, env);
 773
 774                         if (irn_not_visited(pred)) {
 775                                 dfs(pred, env);
 776                                 node->low = MIN(node->low, o->low);
 777                         }
 778                         if (o->DFSnum < node->DFSnum && o->in_stack)
 779                                 node->low = MIN(o->DFSnum, node->low);
 780                 }
 781                 if (node->low == node->DFSnum) {
 782                         scc *pscc = obstack_alloc(&env->obst, sizeof(*pscc));
 783                         ir_node *x;
 784
 785                         pscc->head = NULL;
 786                         do {
 787                                 node_entry *e;
 788
 789                                 x = pop(env);
 790                                 e = get_irn_ne(x, env);
 791                                 e->pscc    = pscc;
 792                                 e->next    = pscc->head;
 793                                 pscc->head = x;
 794                         } while (x != irn);
 795
 796                         env->process_scc(pscc, env);
 797                 }
 798         }
 799 }
 800
 801 /**
 802  * Do the DFS by starting at the End node of a graph.
 803  *
 804  * @param irg  the graph to process
 805  * @param env  the environment
 806  */
 807 static void do_dfs(ir_graph *irg, iv_env *env) {
 808         ir_graph *rem = current_ir_graph;
 809         ir_node *end = get_irg_end(irg);
 810         int i, n;
 811
 812         current_ir_graph = irg;
 813         inc_irg_visited(irg);
 814
 815         /* visit all visible nodes */
 816         dfs(end, env);
 817
 818         /* visit the keep-alives */
 819         n = get_End_n_keepalives(end);
 820         for (i = 0; i < n; ++i) {
 821                 ir_node *ka = get_End_keepalive(end, i);
 822
 823                 if (irn_not_visited(ka))
 824                         dfs(ka, env);
 825         }
 826
 827         current_ir_graph = rem;
 828 }
 829
 830 /**
 831  * Post-block-walker: assign the post-order number.
 832  */
 833 static void assign_po(ir_node *block, void *ctx) {
 834         iv_env *env = ctx;
 835         node_entry *e = get_irn_ne(block, env);
 836
 837         e->POnum = env->POnum++;
 838 }
 839
 840 #if 0
 841 /**
 842  * Follows the LFTR edges and return the last node in the chain.
 843  *
 844  * @param irn  the node that should be followed
 845  * @param env  the IV environment
 846  *
 847  * @note
 848  * In the current implementation only the last edge is stored, so
 849  * only one chain exists. That's why we might miss some opportunities.
 850  */
 851 static ir_node *followEdges(ir_node *irn, iv_env *env) {
 852         for (;;) {
 853                 LFTR_edge *e = LFTR_find(irn, env);
 854                 if (e)
 855                         irn = e->dst;
 856                 else
 857                         return irn;
 858         }
 859 }
 860
 861 /**
 862  * Apply one LFTR edge operation.
 863  * Return NULL if the transformation cannot be done safely without
 864  * an Overflow.
 865  *
 866  * @param rc   the IV node that should be translated
 867  * @param e    the LFTR edge
 868  * @param env  the IV environment
 869  *
 870  * @return the translated region constant or NULL
 871  *         if the translation was not possible
 872  *
 873  * @note
 874  * In the current implementation only the last edge is stored, so
 875  * only one chain exists. That's why we might miss some opportunities.
 876  */
 877 static ir_node *applyOneEdge(ir_node *rc, LFTR_edge *e, iv_env *env) {
 878         if (env->flags & osr_flag_lftr_with_ov_check) {
 879                 tarval *tv_l, *tv_r, *tv;
 880                 tarval_int_overflow_mode_t ovmode;
 881
 882                 /* overflow can only be decided for Consts */
 883                 if (! is_Const(e->rc)) {
 884                         DB((dbg, LEVEL_4, " = UNKNOWN (%+F)", e->rc));
 885                         return NULL;
 886                 }
 887
 888                 tv_l = get_Const_tarval(rc);
 889                 tv_r = get_Const_tarval(e->rc);
 890
 891                 ovmode = tarval_get_integer_overflow_mode();
 892                 tarval_set_integer_overflow_mode(TV_OVERFLOW_BAD);
 893
 894                 switch (e->code) {
 895                 case iro_Mul:
 896                         tv = tarval_mul(tv_l, tv_r);
 897                         DB((dbg, LEVEL_4, " * %+F", tv_r));
 898                         break;
 899                 case iro_Add:
 900                         tv = tarval_add(tv_l, tv_r);
 901                         DB((dbg, LEVEL_4, " + %+F", tv_r));
 902                         break;
 903                 case iro_Sub:
 904                         tv = tarval_sub(tv_l, tv_r);
 905                         DB((dbg, LEVEL_4, " - %+F", tv_r));
 906                         break;
 907                 default:
 908                         assert(0);
 909                         tv = tarval_bad;
 910                 }
 911                 tarval_set_integer_overflow_mode(ovmode);
 912
 913                 if (tv == tarval_bad) {
 914                         DB((dbg, LEVEL_4, " = OVERFLOW"));
 915                         return NULL;
 916                 }
 917                 return new_r_Const(current_ir_graph, get_nodes_block(rc), get_tarval_mode(tv), tv);
 918         }
 919         return do_apply(e->code, NULL, rc, e->rc, get_irn_mode(rc));
 920 }
 921
 922 /**
 923  * Applies the operations represented by the LFTR edges to a
 924  * region constant and returns the value.
 925  * Return NULL if the transformation cannot be done safely without
 926  * an Overflow.
 927  *
 928  * @param iv   the IV node that starts the LFTR edge chain
 929  * @param rc   the region constant that should be translated
 930  * @param env  the IV environment
 931  *
 932  * @return the translated region constant or NULL
 933  *         if the translation was not possible
 934  */
 935 static ir_node *applyEdges(ir_node *iv, ir_node *rc, iv_env *env) {
 936         ir_node *irn = iv;
 937
 938         if (env->flags & osr_flag_lftr_with_ov_check) {
 939                 /* overflow can only be decided for Consts */
 940                 if (! is_Const(rc)) {
 941                         DB((dbg, LEVEL_4, " = UNKNOWN (%+F)\n", rc));
 942                         return NULL;
 943                 }
 944                 DB((dbg, LEVEL_4, "%+F", get_Const_tarval(rc)));
 945         }
 946
 947         for (irn = iv; rc;) {
 948                 LFTR_edge *e = LFTR_find(irn, env);
 949                 if (e) {
 950                         rc = applyOneEdge(rc, e, env);
 951                         irn = e->dst;
 952                 }
 953                 else
 954                         break;
 955         }
 956         DB((dbg, LEVEL_3, "\n"));
 957         return rc;
 958 }
 959
 960 /**
 961  * Walker, finds Cmp(iv, rc) or Cmp(rc, iv)
 962  * and tries to optimize them.
 963  */
 964 static void do_lftr(ir_node *cmp, void *ctx) {
 965         iv_env *env = ctx;
 966         ir_node *left, *right, *liv, *riv;
 967         ir_node *iv, *rc;
 968         ir_node *nleft = NULL, *nright = NULL;
 969
 970         if (get_irn_op(cmp) != op_Cmp)
 971                 return;
 972
 973         left  = get_Cmp_left(cmp);
 974         right = get_Cmp_right(cmp);
 975
 976         liv = is_iv(left, env);
 977         riv = is_iv(right, env);
 978         if (liv && is_rc(right, liv)) {
 979                 iv = left; rc = right;
 980
 981                 nright = applyEdges(iv, rc, env);
 982                 if (nright && nright != rc) {
 983                         nleft = followEdges(iv, env);
 984                 }
 985         }
 986         else if (riv && is_rc(left, riv)) {
 987                 iv = right; rc = left;
 988
 989                 nleft = applyEdges(iv, rc, env);
 990                 if (nleft && nleft != rc) {
 991                         nright = followEdges(iv, env);
 992                 }
 993         }
 994
 995         if (nleft && nright) {
 996                 DB((dbg, LEVEL_2, "  LFTR for %+F\n", cmp));
 997                 set_Cmp_left(cmp, nleft);
 998                 set_Cmp_right(cmp, nright);
 999                 ++env->lftr_replaced;
1000         }
1001 }
1002
1003 /**
1004  * do linear function test replacement.
1005  *
1006  * @param irg   the graph that should be optimized
1007  * @param env   the IV environment
1008  */
1009 static void lftr(ir_graph *irg, iv_env *env) {
1010         irg_walk_graph(irg, NULL, do_lftr, env);
1011 }
1012 #endif
1013
1014 /**
1015  * Pre-walker: set all node links to NULL and fix the
1016  * block of Proj nodes.
1017  */
1018 static void clear_and_fix(ir_node *irn, void *env)
1019 {
1020         (void) env;
1021         set_irn_link(irn, NULL);
1022
1023         /* FIXME: must be removed but edges must be fixed first*/
1024         if (is_Proj(irn)) {
1025                 ir_node *pred = get_Proj_pred(irn);
1026                 set_irn_n(irn, -1, get_nodes_block(pred));
1027         }
1028 }
1029
1030 /* Performs Operator Strength Reduction for the passed graph. */
1031 void opt_osr(ir_graph *irg, unsigned flags) {
1032         iv_env   env;
1033         ir_graph *rem;
1034
1035         if (! get_opt_strength_red()) {
1036                 /* only kill Phi cycles  */
1037                 remove_phi_cycles(irg);
1038                 return;
1039         }
1040
1041         rem = current_ir_graph;
1042         current_ir_graph = irg;
1043
1044         FIRM_DBG_REGISTER(dbg, "firm.opt.osr");
1045
1046         DB((dbg, LEVEL_1, "Doing Operator Strength Reduction for %+F\n", irg));
1047
1048         obstack_init(&env.obst);
1049         env.stack         = NEW_ARR_F(ir_node *, 128);
1050         env.tos           = 0;
1051         env.nextDFSnum    = 0;
1052         env.POnum         = 0;
1053         env.quad_map      = new_set(quad_cmp, 64);
1054         env.lftr_edges    = new_set(LFTR_cmp, 64);
1055         env.replaced      = 0;
1056         env.lftr_replaced = 0;
1057         env.flags         = flags;
1058         env.process_scc   = process_scc;
1059
1060         /* Clear all links and move Proj nodes into the
1061            the same block as it's predecessors.
1062            This can improve the placement of new nodes.
1063          */
1064         irg_walk_graph(irg, NULL, clear_and_fix, NULL);
1065
1066         /* we need dominance */
1067         assure_doms(irg);
1068         assure_irg_outs(irg);
1069
1070         /* calculate the post order number for blocks. */
1071         irg_out_block_walk(get_irg_start_block(irg), NULL, assign_po, &env);
1072
1073         /* calculate the SCC's and drive OSR. */
1074         do_dfs(irg, &env);
1075
1076         if (env.replaced) {
1077                 /* try linear function test replacements */
1078                 //lftr(irg, &env);
1079
1080                 set_irg_outs_inconsistent(irg);
1081                 DB((dbg, LEVEL_1, "Replacements: %u + %u (lftr)\n\n", env.replaced, env.lftr_replaced));
1082         }
1083
1084         del_set(env.lftr_edges);
1085         del_set(env.quad_map);
1086         DEL_ARR_F(env.stack);
1087         obstack_free(&env.obst, NULL);
1088
1089         current_ir_graph = rem;
1090 }
1091
1092 /* Remove any Phi cycles with only one real input. */
1093 void remove_phi_cycles(ir_graph *irg) {
1094         iv_env   env;
1095         ir_graph *rem;
1096
1097         rem = current_ir_graph;
1098         current_ir_graph = irg;
1099
1100         FIRM_DBG_REGISTER(dbg, "firm.opt.remove_phi");
1101
1102         DB((dbg, LEVEL_1, "Doing Phi cycle removement for %+F\n", irg));
1103
1104         obstack_init(&env.obst);
1105         env.stack         = NEW_ARR_F(ir_node *, 128);
1106         env.tos           = 0;
1107         env.nextDFSnum    = 0;
1108         env.POnum         = 0;
1109         env.quad_map      = NULL;
1110         env.lftr_edges    = NULL;
1111         env.replaced      = 0;
1112         env.lftr_replaced = 0;
1113         env.flags         = 0;
1114         env.process_scc   = process_phi_only_scc;
1115
1116         /* Clear all links and move Proj nodes into the
1117            the same block as it's predecessors.
1118            This can improve the placement of new nodes.
1119          */
1120         irg_walk_graph(irg, NULL, clear_and_fix, NULL);
1121
1122         /* we need dominance */
1123         assure_irg_outs(irg);
1124
1125         /* calculate the post order number for blocks. */
1126         irg_out_block_walk(get_irg_start_block(irg), NULL, assign_po, &env);
1127
1128         /* calculate the SCC's and drive OSR. */
1129         do_dfs(irg, &env);
1130
1131         if (env.replaced) {
1132                 set_irg_outs_inconsistent(irg);
1133                 DB((dbg, LEVEL_1, "remove_phi_cycles: %u Cycles removed\n\n", env.replaced));
1134         }
1135
1136         DEL_ARR_F(env.stack);
1137         obstack_free(&env.obst, NULL);
1138
1139         current_ir_graph = rem;
1140 }