nsz Git - libfirm/blob - ir/opt/opt_osr.c

   1 /*
   2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
   3  *
   4  * This file is part of libFirm.
   5  *
   6  * This file may be distributed and/or modified under the terms of the
   7  * GNU General Public License version 2 as published by the Free Software
   8  * Foundation and appearing in the file LICENSE.GPL included in the
   9  * packaging of this file.
  10  *
  11  * Licensees holding valid libFirm Professional Edition licenses may use
  12  * this file in accordance with the libFirm Commercial License.
  13  * Agreement provided with the Software.
  14  *
  15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
  16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  17  * PURPOSE.
  18  */
  19
  20 /**
  21  * @file
  22  * @brief   Operator Strength Reduction.
  23  * @date    12.5.2006
  24  * @author  Michael Beck
  25  * @version $Id$
  26  * @summary
  27  *  Implementation of the Operator Strength Reduction algorithm
  28  *  by Keith D. Cooper, L. Taylor Simpson, Christopher A. Vick.
  29  *  Extended version.
  30  */
  31 #ifdef HAVE_CONFIG_H
  32 #include "config.h"
  33 #endif
  34
  35 #include "adt/pdeq.h"
  36 #include "iroptimize.h"
  37 #include "irgraph.h"
  38 #include "ircons.h"
  39 #include "irop_t.h"
  40 #include "irdom.h"
  41 #include "irgmod.h"
  42 #include "irflag_t.h"
  43 #include "irgwalk.h"
  44 #include "irouts.h"
  45 #include "iredges.h"
  46 #include "debug.h"
  47 #include "obst.h"
  48 #include "set.h"
  49 #include "tv.h"
  50 #include "hashptr.h"
  51 #include "irtools.h"
  52 #include "irloop_t.h"
  53 #include "array.h"
  54 #include "firmstat.h"
  55 #include "xmalloc.h"
  56 #include "error.h"
  57
  58 /** The debug handle. */
  59 DEBUG_ONLY(static firm_dbg_module_t *dbg;)
  60
  61 /** A scc. */
  62 typedef struct scc {
  63         ir_node  *head;         /**< the head of the list */
  64         tarval   *init;     /**< the init value iff only one exists. */
  65         tarval   *incr;     /**< the induction variable increment if only a single const exists. */
  66         unsigned code;      /**< == iro_Add if +incr, iro_Sub if -incr, 0 if not analysed, iro_Bad else */
  67 } scc;
  68
  69 /** A node entry */
  70 typedef struct node_entry {
  71         unsigned DFSnum;    /**< the DFS number of this node */
  72         unsigned low;       /**< the low number of this node */
  73         ir_node  *header;   /**< the header of this node */
  74         int      in_stack;  /**< flag, set if the node is on the stack */
  75         ir_node  *next;     /**< link to the next node the the same scc */
  76         scc      *pscc;     /**< the scc of this node */
  77         unsigned POnum;     /**< the post order number for blocks */
  78 } node_entry;
  79
  80 /** The environment. */
  81 typedef struct iv_env {
  82         struct obstack obst;    /**< an obstack for allocations */
  83         ir_node  **stack;       /**< the node stack */
  84         int      tos;           /**< tos index */
  85         unsigned nextDFSnum;    /**< the current DFS number */
  86         unsigned POnum;         /**< current post order number */
  87         set      *quad_map;     /**< a map from (op, iv, rc) to node */
  88         set      *lftr_edges;   /**< the set of lftr edges */
  89         unsigned replaced;      /**< number of replaced ops */
  90         unsigned lftr_replaced; /**< number of applied linear function test replacements */
  91         unsigned flags;         /**< additional flags */
  92         /** Function called to process a SCC. */
  93         void (*process_scc)(scc *pscc, struct iv_env *env);
  94 } iv_env;
  95
  96 /**
  97  * An entry in the (op, node, node) -> node map.
  98  */
  99 typedef struct quadruple_t {
 100         ir_opcode code;  /**< the opcode of the reduced operation */
 101         ir_node   *op1;  /**< the first operand the reduced operation */
 102         ir_node   *op2;  /**< the second operand of the reduced operation */
 103
 104         ir_node   *res; /**< the reduced operation */
 105 } quadruple_t;
 106
 107 /**
 108  * A LFTR edge.
 109  */
 110 typedef struct LFTR_edge {
 111         ir_node   *src;   /**< the source node */
 112         ir_node   *dst;   /**< the destination node */
 113         ir_opcode code;   /**< the opcode that must be applied */
 114         ir_node   *rc;    /**< the region const that must be applied */
 115 } LFTR_edge;
 116
 117 /* forward */
 118 static ir_node *reduce(ir_node *orig, ir_node *iv, ir_node *rc, iv_env *env);
 119
 120 /**
 121  * Compare two LFTR edges.
 122  */
 123 static int LFTR_cmp(const void *e1, const void *e2, size_t size) {
 124         const LFTR_edge *l1 = e1;
 125         const LFTR_edge *l2 = e2;
 126         (void) size;
 127
 128         return l1->src != l2->src;
 129 }
 130
 131 /**
 132  * Find a LFTR edge.
 133  */
 134 static LFTR_edge *LFTR_find(ir_node *src, iv_env *env) {
 135         LFTR_edge key;
 136
 137         key.src  = src;
 138
 139         return set_find(env->lftr_edges, &key, sizeof(key), HASH_PTR(src));
 140 }
 141
 142 /**
 143  * Add a LFTR edge.
 144  */
 145 static void LFTR_add(ir_node *src, ir_node *dst, ir_opcode code, ir_node *rc, iv_env *env) {
 146         LFTR_edge key;
 147
 148         key.src  = src;
 149         key.dst  = dst;
 150         key.code = code;
 151         key.rc   = rc;
 152
 153         /*
 154          * There might be more than one edge here. This is rather bad
 155          * because we currently store only one.
 156          */
 157 //      assert(LFTR_find(src, env) == NULL);
 158         set_insert(env->lftr_edges, &key, sizeof(key), HASH_PTR(src));
 159 }
 160
 161 /**
 162  * Gets the node_entry of a node
 163  */
 164 static node_entry *get_irn_ne(ir_node *irn, iv_env *env) {
 165         node_entry *e = get_irn_link(irn);
 166
 167         if (! e) {
 168                 e = obstack_alloc(&env->obst, sizeof(*e));
 169                 memset(e, 0, sizeof(*e));
 170                 set_irn_link(irn, e);
 171         }
 172         return e;
 173 }
 174
 175 /**
 176  * Gets the scc from an IV.
 177  */
 178 static scc *get_iv_scc(ir_node *iv, iv_env *env) {
 179         node_entry *e = get_irn_ne(iv, env);
 180         return e->pscc;
 181 }
 182
 183 /**
 184  * Check if irn is an IV.
 185  *
 186  * @param irn  the node to check
 187  * @param env  the environment
 188  *
 189  * @returns the header if it is one, NULL else
 190  */
 191 static ir_node *is_iv(ir_node *irn, iv_env *env) {
 192         return get_irn_ne(irn, env)->header;
 193 }
 194
 195 /**
 196  * Check if irn is a region constant.
 197  * The block or irn must strictly dominate the header block.
 198  *
 199  * @param irn           the node to check
 200  * @param header_block  the header block of the induction variable
 201  */
 202 static int is_rc(ir_node *irn, ir_node *header_block) {
 203         ir_node *block = get_nodes_block(irn);
 204
 205         return (block != header_block) && block_dominates(block, header_block);
 206 }
 207
 208 /**
 209  * Set compare function for the quad set.
 210  */
 211 static int quad_cmp(const void *e1, const void *e2, size_t size) {
 212         const quadruple_t *c1 = e1;
 213         const quadruple_t *c2 = e2;
 214         (void) size;
 215
 216         return c1->code != c2->code || c1->op1 != c2->op1 || c1->op2 != c2->op2;
 217 }
 218
 219 /**
 220  * Check if an reduced operation was already calculated.
 221  *
 222  * @param code  the opcode of the operation
 223  * @param op1   the first operand of the operation
 224  * @param op2   the second operand of the operation
 225  * @param env   the environment
 226  *
 227  * @return the already reduced node or NULL if this operation is not yet reduced
 228  */
 229 static ir_node *search(ir_opcode code, ir_node *op1, ir_node *op2, iv_env *env) {
 230         quadruple_t key, *entry;
 231
 232         key.code = code;
 233         key.op1 = op1;
 234         key.op2 = op2;
 235
 236         entry = set_find(env->quad_map, &key, sizeof(key),
 237                          (code * 9) ^ HASH_PTR(op1) ^HASH_PTR(op2));
 238         if (entry)
 239                 return entry->res;
 240         return NULL;
 241 }
 242
 243 /**
 244  * Add an reduced operation.
 245  *
 246  * @param code    the opcode of the operation
 247  * @param op1     the first operand of the operation
 248  * @param op2     the second operand of the operation
 249  * @param result  the result of the reduced operation
 250  * @param env     the environment
 251  */
 252 static void add(ir_opcode code, ir_node *op1, ir_node *op2, ir_node *result, iv_env *env) {
 253         quadruple_t key;
 254
 255         key.code = code;
 256         key.op1  = op1;
 257         key.op2  = op2;
 258         key.res  = result;
 259
 260         set_insert(env->quad_map, &key, sizeof(key),
 261                    (code * 9) ^ HASH_PTR(op1) ^HASH_PTR(op2));
 262 }
 263
 264 /**
 265  * Find a location where to place a bin-op whose operands are in
 266  * block1 and block2.
 267  *
 268  * @param block1  the block of the first operand
 269  * @param block2  the block of the second operand
 270  *
 271  * Note that we know here that such a place must exists. Moreover, this means
 272  * that either block1 dominates block2 or vice versa. So, just return
 273  * the "smaller" one.
 274  */
 275 static ir_node *find_location(ir_node *block1, ir_node *block2) {
 276         if (block_dominates(block1, block2))
 277                 return block2;
 278         assert(block_dominates(block2, block1));
 279         return block1;
 280 }
 281
 282 /**
 283  * Create a node that executes an op1 code op1 operation.
 284  *
 285  * @param code   the opcode to execute
 286  * @param db     debug info to add to the new node
 287  * @param op1    the first operand
 288  * @param op2    the second operand
 289  * @param mode   the mode of the new operation
 290  *
 291  * @return the newly created node
 292  */
 293 static ir_node *do_apply(ir_opcode code, dbg_info *db, ir_node *op1, ir_node *op2, ir_mode *mode) {
 294         ir_graph *irg = current_ir_graph;
 295         ir_node *result;
 296         ir_node *block = find_location(get_nodes_block(op1), get_nodes_block(op2));
 297
 298         switch (code) {
 299         case iro_Mul:
 300                 result = new_rd_Mul(db, irg, block, op1, op2, mode);
 301                 break;
 302         case iro_Add:
 303                 result = new_rd_Add(db, irg, block, op1, op2, mode);
 304                 break;
 305         case iro_Sub:
 306                 result = new_rd_Sub(db, irg, block, op1, op2, mode);
 307                 break;
 308         default:
 309                 panic("Unsupported opcode");
 310                 result = NULL;
 311         }
 312         return result;
 313 }
 314
 315 /**
 316  * The Apply operation.
 317  *
 318  * @param orig   the node that represent the original operation and determines
 319  *               the opcode, debug-info and mode of a newly created one
 320  * @param op1    the first operand
 321  * @param op2    the second operand
 322  * @param env    the environment
 323  *
 324  * @return the newly created node
 325  */
 326 static ir_node *apply(ir_node *header, ir_node *orig, ir_node *op1, ir_node *op2, iv_env *env) {
 327         ir_opcode code = get_irn_opcode(orig);
 328         ir_node *result = search(code, op1, op2, env);
 329
 330         if (result == NULL) {
 331                 dbg_info *db = get_irn_dbg_info(orig);
 332                 ir_node *op1_header = get_irn_ne(op1, env)->header;
 333                 ir_node *op2_header = get_irn_ne(op2, env)->header;
 334
 335                 if (op1_header == header && is_rc(op2, op1_header)) {
 336                         result = reduce(orig, op1, op2, env);
 337                 }
 338                 else if (op2_header == header && is_rc(op1, op2_header)) {
 339                         result = reduce(orig, op2, op1, env);
 340                 }
 341                 else {
 342                         result = do_apply(code, db, op1, op2, get_irn_mode(orig));
 343                         get_irn_ne(result, env)->header = NULL;
 344                 }
 345         }
 346         return result;
 347 }
 348
 349 /**
 350  * The Reduce operation.
 351  *
 352  * @param orig   the node that represent the original operation and determines
 353  *               the opcode, debug-info and mode of a newly created one
 354  * @param iv     the induction variable
 355  * @param rc     the region constant
 356  * @param env    the environment
 357  *
 358  * @return the reduced node
 359  */
 360 static ir_node *reduce(ir_node *orig, ir_node *iv, ir_node *rc, iv_env *env) {
 361         ir_opcode code = get_irn_opcode(orig);
 362         ir_node *result = search(code, iv, rc, env);
 363
 364         if (result == NULL) {
 365                 node_entry *e, *iv_e;
 366                 int i, n;
 367                 ir_mode *mode = get_irn_mode(orig);
 368
 369                 result = exact_copy(iv);
 370
 371                 /* Beware: we must always create a new induction variable with the same mode
 372                    as the node we are replacing. Especially this means the mode might be changed
 373                    from P to I and back. This is always possible, because we have only Phi, Add
 374                    and Sub nodes. */
 375                 set_irn_mode(result, mode);
 376                 add(code, iv, rc, result, env);
 377                 DB((dbg, LEVEL_3, "   Created new %+F for %+F (%s %+F)\n", result, iv,
 378                         get_irn_opname(orig), rc));
 379
 380                 iv_e = get_irn_ne(iv, env);
 381                 e    = get_irn_ne(result, env);
 382                 e->header = iv_e->header;
 383
 384                 /* create the LFTR edge */
 385                 LFTR_add(iv, result, code, rc, env);
 386
 387                 n = get_irn_arity(result);
 388                 for (i = 0; i < n; ++i) {
 389                         ir_node *o = get_irn_n(result, i);
 390
 391                         e = get_irn_ne(o, env);
 392                         if (e->header == iv_e->header)
 393                                 o = reduce(orig, o, rc, env);
 394                         else if (is_Phi(result) || code == iro_Mul)
 395                                 o = apply(iv_e->header, orig, o, rc, env);
 396                         set_irn_n(result, i, o);
 397                 }
 398         }
 399         else {
 400                 DB((dbg, LEVEL_3, "   Already Created %+F for %+F (%s %+F)\n", result, iv,
 401                         get_irn_opname(orig), rc));
 402         }
 403         return result;
 404 }
 405
 406 /**
 407  * Update the scc for a newly created IV.
 408  */
 409 static void update_scc(ir_node *iv, node_entry *e, iv_env *env) {
 410         scc     *pscc   = e->pscc;
 411         ir_node *header = e->header;
 412         waitq    *wq = new_waitq();
 413
 414         DB((dbg, LEVEL_2, "  Creating SCC for new an induction variable:\n  "));
 415         pscc->head = NULL;
 416         waitq_put(wq, iv);
 417         do {
 418                 ir_node    *irn = waitq_get(wq);
 419                 node_entry *ne  = get_irn_ne(irn, env);
 420                 int        i;
 421
 422                 ne->pscc   = pscc;
 423                 ne->next   = pscc->head;
 424                 pscc->head = irn;
 425                 DB((dbg, LEVEL_2, " %+F,", irn));
 426
 427                 for (i = get_irn_arity(irn) - 1; i >= 0; --i) {
 428                         ir_node    *pred = get_irn_n(irn, i);
 429                         node_entry *pe   = get_irn_ne(pred, env);
 430
 431                         if (pe->header == header && pe->pscc == NULL) {
 432                                 /* set the pscc here to ensure that the node is NOT enqueued another time */
 433                                 pe->pscc = pscc;
 434                                 waitq_put(wq, pred);
 435                         }
 436                 }
 437         } while (! waitq_empty(wq));
 438         del_waitq(wq);
 439         DB((dbg, LEVEL_2, "\n"));
 440 }
 441
 442 /**
 443  * The Replace operation.
 444  *
 445  * @param irn   the node that will be replaced
 446  * @param iv    the induction variable
 447  * @param rc    the region constant
 448  * @param env   the environment
 449  */
 450 static int replace(ir_node *irn, ir_node *iv, ir_node *rc, iv_env *env) {
 451         ir_node *result;
 452
 453         DB((dbg, LEVEL_2, "  Replacing %+F\n", irn));
 454
 455         result = reduce(irn, iv, rc, env);
 456         if (result != irn) {
 457                 node_entry *e;
 458
 459                 hook_strength_red(current_ir_graph, irn);
 460                 exchange(irn, result);
 461                 e = get_irn_ne(result, env);
 462                 if (e->pscc == NULL) {
 463                         e->pscc = obstack_alloc(&env->obst, sizeof(*e->pscc));
 464                         memset(e->pscc, 0, sizeof(*e->pscc));
 465                         update_scc(result, e, env);
 466                 }
 467                 ++env->replaced;
 468                 return 1;
 469         }
 470         return 0;
 471 }
 472
 473 #if 0
 474 /**
 475  * check if a given node is a mul with 2, 4, 8
 476  */
 477 static int is_x86_shift_const(ir_node *mul) {
 478         ir_node *rc;
 479
 480         if (! is_Mul(mul))
 481                 return 0;
 482
 483         /* normalization put constants on the right side */
 484         rc = get_Mul_right(mul);
 485         if (is_Const(rc)) {
 486                 tarval *tv = get_Const_tarval(rc);
 487
 488                 if (tarval_is_long(tv)) {
 489                         long value = get_tarval_long(tv);
 490
 491                         if (value == 2 || value == 4 || value == 8) {
 492                                 /* do not reduce multiplications by 2, 4, 8 */
 493                                 return 1;
 494                         }
 495                 }
 496         }
 497         return 0;
 498 }
 499 #endif
 500
 501 /**
 502  * Check if an IV represents a counter with constant limits.
 503  */
 504 static int is_counter_iv(ir_node *iv, iv_env *env) {
 505         node_entry *e         = get_irn_ne(iv, env);
 506         scc        *pscc      = e->pscc;
 507         ir_node    *have_init = NULL;
 508         ir_node    *have_incr = NULL;
 509         ir_opcode  code       = iro_Bad;
 510         ir_node    *irn;
 511
 512         if (pscc->code != 0) {
 513                 /* already analysed */
 514                 return pscc->code != iro_Bad;
 515         }
 516
 517         pscc->code = iro_Bad;
 518         for (irn = pscc->head; irn != NULL; irn = e->next) {
 519                 if (is_Add(irn)) {
 520                         if (have_incr != NULL)
 521                                 return 0;
 522
 523                         have_incr = get_Add_right(irn);
 524                         if (! is_Const(have_incr)) {
 525                                 have_incr = get_Add_left(irn);
 526                                 if (! is_Const(have_incr))
 527                                         return 0;
 528                         }
 529                         code = iro_Add;
 530                 } else if (is_Sub(irn)) {
 531                         if (have_incr != NULL)
 532                                 return 0;
 533
 534                         have_incr = get_Sub_right(irn);
 535                         if (! is_Const(have_incr))
 536                                 return 0;
 537                         code = iro_Sub;
 538                 } else if (is_Phi(irn)) {
 539                         int i;
 540
 541                         for (i = get_Phi_n_preds(irn) - 1; i >= 0; --i) {
 542                                 ir_node    *pred = get_Phi_pred(irn, i);
 543                                 node_entry *ne   = get_irn_ne(pred, env);
 544
 545                                 if (ne->header == e->header)
 546                                         continue;
 547                                 if (have_init != NULL)
 548                                         return 0;
 549                                 have_init = pred;
 550                                 if (! is_Const(pred))
 551                                         return 0;
 552                         }
 553                 } else
 554                         return 0;
 555                 e = get_irn_ne(irn, env);
 556         }
 557         pscc->init = get_Const_tarval(have_init);
 558         pscc->incr = get_Const_tarval(have_incr);
 559         pscc->code = code;
 560         return code != iro_Bad;
 561 }
 562
 563 /**
 564  * Check the users of an induction variable for register pressure.
 565  */
 566 static int check_users_for_reg_pressure(ir_node *iv, iv_env *env) {
 567         ir_node    *irn, *header;
 568         ir_node    *have_user = NULL;
 569         ir_node    *have_cmp  = NULL;
 570         node_entry *e         = get_irn_ne(iv, env);
 571         scc        *pscc      = e->pscc;
 572
 573         header = e->header;
 574         for (irn = pscc->head; irn != NULL; irn = e->next) {
 575                 const ir_edge_t *edge;
 576
 577                 foreach_out_edge(irn, edge) {
 578                         ir_node    *user = get_edge_src_irn(edge);
 579                         node_entry *ne = get_irn_ne(user, env);
 580
 581                         if (e->header == ne->header) {
 582                                 /* found user from the same IV */
 583                                 continue;
 584                         }
 585                         if (is_Cmp(user)) {
 586                                 if (have_cmp != NULL) {
 587                                         /* more than one cmp, for now end here */
 588                                         return 0;
 589                                 }
 590                                 have_cmp = user;
 591                         } else {
 592                                 /* user is a real user of the IV */
 593                                 if (have_user != NULL) {
 594                                         /* found the second user */
 595                                         return 0;
 596                                 }
 597                                 have_user = user;
 598                         }
 599                 }
 600                 e = get_irn_ne(irn, env);
 601         }
 602
 603         if (have_user == NULL) {
 604                 /* no user, ignore */
 605                 return 1;
 606         }
 607
 608         if (have_cmp == NULL) {
 609                 /* fine, only one user, try to reduce */
 610                 return 1;
 611         }
 612         /*
 613          * We found one user AND at least one cmp.
 614          * We should check here if we can transform the Cmp.
 615          *
 616          * For now our capabilities for doing linear function test
 617          * are limited, so check if the iv has the right form: Only ONE
 618          * phi, only one Add/Sub with a Const
 619          */
 620         if (! is_counter_iv(iv, env))
 621                 return 0;
 622
 623         /*
 624          * Ok, we have only one increment AND it is a Const, we might be able
 625          * to do a linear function test replacement, so go on.
 626          */
 627         return 1;
 628 }
 629
 630 /**
 631  * Check if a node can be replaced (+, -, *).
 632  *
 633  * @param irn   the node to check
 634  * @param env   the environment
 635  *
 636  * @return non-zero if irn should be Replace'd
 637  */
 638 static int check_replace(ir_node *irn, iv_env *env) {
 639         ir_node   *left, *right, *iv, *rc;
 640         ir_op     *op  = get_irn_op(irn);
 641         ir_opcode code = get_op_code(op);
 642         ir_node   *liv, *riv;
 643
 644         switch (code) {
 645         case iro_Mul:
 646         case iro_Add:
 647         case iro_Sub:
 648                 iv = rc = NULL;
 649
 650                 left  = get_binop_left(irn);
 651                 right = get_binop_right(irn);
 652
 653                 liv = is_iv(left, env);
 654                 riv = is_iv(right, env);
 655                 if (liv && is_rc(right, liv)) {
 656                         iv = left; rc = right;
 657                 }
 658                 else if (riv && is_op_commutative(op) &&
 659                                     is_rc(left, riv)) {
 660                         iv = right; rc = left;
 661                 }
 662
 663                 if (iv) {
 664                         if (env->flags & osr_flag_keep_reg_pressure) {
 665                                 if (! check_users_for_reg_pressure(iv, env))
 666                                         return 0;
 667                         }
 668                         return replace(irn, iv, rc, env);
 669                 }
 670                 break;
 671         default:
 672                 break;
 673         }
 674         return 0;
 675 }
 676
 677 /**
 678  * Check which SCC's are induction variables.
 679  *
 680  * @param pscc  a SCC
 681  * @param env   the environment
 682  */
 683 static void classify_iv(scc *pscc, iv_env *env) {
 684         ir_node *irn, *next, *header = NULL;
 685         node_entry *b, *h = NULL;
 686         int j, only_phi, num_outside;
 687         ir_node *out_rc;
 688
 689         /* find the header block for this scc */
 690         for (irn = pscc->head; irn; irn = next) {
 691                 node_entry *e = get_irn_link(irn);
 692                 ir_node *block = get_nodes_block(irn);
 693
 694                 next = e->next;
 695                 b = get_irn_ne(block, env);
 696
 697                 if (header) {
 698                         if (h->POnum < b->POnum) {
 699                                 header = block;
 700                                 h      = b;
 701                         }
 702                 }
 703                 else {
 704                         header = block;
 705                         h      = b;
 706                 }
 707         }
 708
 709         /* check if this scc contains only Phi, Add or Sub nodes */
 710         only_phi    = 1;
 711         num_outside = 0;
 712         out_rc      = NULL;
 713         for (irn = pscc->head; irn; irn = next) {
 714                 node_entry *e = get_irn_ne(irn, env);
 715
 716                 next = e->next;
 717                 switch (get_irn_opcode(irn)) {
 718                 case iro_Add:
 719                 case iro_Sub:
 720                         only_phi = 0;
 721                         /* fall through */
 722                 case iro_Phi:
 723                         for (j = get_irn_arity(irn) - 1; j >= 0; --j) {
 724                                 ir_node *pred  = get_irn_n(irn, j);
 725                                 node_entry *pe = get_irn_ne(pred, env);
 726
 727                                 if (pe->pscc != e->pscc) {
 728                                         /* not in the same SCC, must be a region const */
 729                                         if (! is_rc(pred, header)) {
 730                                                 /* not an induction variable */
 731                                                 goto fail;
 732                                         }
 733                                         if (! out_rc) {
 734                                                 out_rc = pred;
 735                                                 ++num_outside;
 736                                         } else if (out_rc != pred) {
 737                                                 ++num_outside;
 738                                         }
 739                                 }
 740                         }
 741                         break;
 742                 default:
 743                         /* not an induction variable */
 744                         goto fail;
 745                 }
 746         }
 747         /* found an induction variable */
 748         DB((dbg, LEVEL_2, "  Found an induction variable:\n  "));
 749         if (only_phi && num_outside == 1) {
 750                 /* a phi cycle with only one real predecessor can be collapsed */
 751                 DB((dbg, LEVEL_2, "  Found an USELESS Phi cycle:\n  "));
 752
 753                 for (irn = pscc->head; irn; irn = next) {
 754                         node_entry *e = get_irn_ne(irn, env);
 755                         next = e->next;
 756                         e->header = NULL;
 757                         exchange(irn, out_rc);
 758                 }
 759                 ++env->replaced;
 760                 return;
 761         }
 762
 763         /* set the header for every node in this scc */
 764         for (irn = pscc->head; irn; irn = next) {
 765                 node_entry *e = get_irn_ne(irn, env);
 766                 e->header = header;
 767                 next = e->next;
 768                 DB((dbg, LEVEL_2, " %+F,", irn));
 769         }
 770         DB((dbg, LEVEL_2, "\n"));
 771         return;
 772
 773 fail:
 774         for (irn = pscc->head; irn; irn = next) {
 775                 node_entry *e = get_irn_ne(irn, env);
 776
 777                 next = e->next;
 778                 e->header = NULL;
 779         }
 780 }
 781
 782 /**
 783  * Process a SCC for the operator strength reduction.
 784  *
 785  * @param pscc  the SCC
 786  * @param env   the environment
 787  */
 788 static void process_scc(scc *pscc, iv_env *env) {
 789         ir_node *head = pscc->head;
 790         node_entry *e = get_irn_link(head);
 791
 792 #ifdef DEBUG_libfirm
 793         {
 794                 ir_node *irn, *next;
 795
 796                 DB((dbg, LEVEL_4, " SCC at %p:\n ", pscc));
 797                 for (irn = pscc->head; irn; irn = next) {
 798                         node_entry *e = get_irn_link(irn);
 799
 800                         next = e->next;
 801
 802                         DB((dbg, LEVEL_4, " %+F,", irn));
 803                 }
 804                 DB((dbg, LEVEL_4, "\n"));
 805         }
 806 #endif
 807
 808         if (e->next == NULL) {
 809                 /* this SCC has only a single member */
 810                 check_replace(head, env);
 811         } else {
 812                 classify_iv(pscc, env);
 813         }
 814 }
 815
 816 /**
 817  * If an SCC is a Phi only cycle, remove it.
 818  */
 819 static void remove_phi_cycle(scc *pscc, iv_env *env) {
 820         ir_node *irn, *next;
 821         int j;
 822         ir_node *out_rc;
 823
 824         /* check if this scc contains only Phi, Add or Sub nodes */
 825         out_rc      = NULL;
 826         for (irn = pscc->head; irn; irn = next) {
 827                 node_entry *e = get_irn_ne(irn, env);
 828
 829                 next = e->next;
 830                 if (! is_Phi(irn))
 831                         return;
 832
 833                 for (j = get_irn_arity(irn) - 1; j >= 0; --j) {
 834                         ir_node *pred  = get_irn_n(irn, j);
 835                         node_entry *pe = get_irn_ne(pred, env);
 836
 837                         if (pe->pscc != e->pscc) {
 838                                 /* not in the same SCC, must be the only input */
 839                                 if (! out_rc) {
 840                                         out_rc = pred;
 841                                 } else if (out_rc != pred) {
 842                                         return;
 843                                 }
 844                         }
 845                 }
 846         }
 847         /* found a Phi cycle */
 848         DB((dbg, LEVEL_2, "  Found an USELESS Phi cycle:\n  "));
 849
 850         for (irn = pscc->head; irn; irn = next) {
 851                 node_entry *e = get_irn_ne(irn, env);
 852                 next = e->next;
 853                 e->header = NULL;
 854                 exchange(irn, out_rc);
 855         }
 856         ++env->replaced;
 857 }
 858
 859 /**
 860  * Process a SCC for the Phi cycle removement.
 861  *
 862  * @param pscc  the SCC
 863  * @param env   the environment
 864  */
 865 static void process_phi_only_scc(scc *pscc, iv_env *env) {
 866         ir_node *head = pscc->head;
 867         node_entry *e = get_irn_link(head);
 868
 869 #ifdef DEBUG_libfirm
 870         {
 871                 ir_node *irn, *next;
 872
 873                 DB((dbg, LEVEL_4, " SCC at %p:\n ", pscc));
 874                 for (irn = pscc->head; irn; irn = next) {
 875                         node_entry *e = get_irn_link(irn);
 876
 877                         next = e->next;
 878
 879                         DB((dbg, LEVEL_4, " %+F,", irn));
 880                 }
 881                 DB((dbg, LEVEL_4, "\n"));
 882         }
 883 #endif
 884
 885         if (e->next != NULL)
 886                 remove_phi_cycle(pscc, env);
 887 }
 888
 889
 890 /**
 891  * Push a node onto the stack.
 892  *
 893  * @param env   the environment
 894  * @param n     the node to push
 895  */
 896 static void push(iv_env *env, ir_node *n) {
 897         node_entry *e;
 898
 899         if (env->tos == ARR_LEN(env->stack)) {
 900                 int nlen = ARR_LEN(env->stack) * 2;
 901                 ARR_RESIZE(ir_node *, env->stack, nlen);
 902         }
 903         env->stack[env->tos++] = n;
 904         e = get_irn_ne(n, env);
 905         e->in_stack = 1;
 906 }
 907
 908 /**
 909  * pop a node from the stack
 910  *
 911  * @param env   the environment
 912  *
 913  * @return  The topmost node
 914  */
 915 static ir_node *pop(iv_env *env)
 916 {
 917         ir_node *n = env->stack[--env->tos];
 918         node_entry *e = get_irn_ne(n, env);
 919
 920         e->in_stack = 0;
 921         return n;
 922 }
 923
 924 /**
 925  * Do Tarjan's SCC algorithm and drive OSR.
 926  *
 927  * @param irn  start at this node
 928  * @param env  the environment
 929  */
 930 static void dfs(ir_node *irn, iv_env *env)
 931 {
 932         int i, n;
 933         node_entry *node = get_irn_ne(irn, env);
 934
 935         mark_irn_visited(irn);
 936
 937         /* do not put blocks into the scc */
 938         if (is_Block(irn)) {
 939                 n = get_irn_arity(irn);
 940                 for (i = 0; i < n; ++i) {
 941                         ir_node *pred = get_irn_n(irn, i);
 942
 943                         if (irn_not_visited(pred))
 944                                 dfs(pred, env);
 945                 }
 946         }
 947         else {
 948                 ir_node *block = get_nodes_block(irn);
 949
 950                 node->DFSnum = env->nextDFSnum++;
 951                 node->low    = node->DFSnum;
 952                 push(env, irn);
 953
 954                 /* handle the block */
 955                 if (irn_not_visited(block))
 956                         dfs(block, env);
 957
 958                 n = get_irn_arity(irn);
 959                 for (i = 0; i < n; ++i) {
 960                         ir_node *pred = get_irn_n(irn, i);
 961                         node_entry *o = get_irn_ne(pred, env);
 962
 963                         if (irn_not_visited(pred)) {
 964                                 dfs(pred, env);
 965                                 node->low = MIN(node->low, o->low);
 966                         }
 967                         if (o->DFSnum < node->DFSnum && o->in_stack)
 968                                 node->low = MIN(o->DFSnum, node->low);
 969                 }
 970                 if (node->low == node->DFSnum) {
 971                         scc *pscc = obstack_alloc(&env->obst, sizeof(*pscc));
 972                         ir_node *x;
 973
 974                         memset(pscc, 0, sizeof(*pscc));
 975                         do {
 976                                 node_entry *e;
 977
 978                                 x = pop(env);
 979                                 e = get_irn_ne(x, env);
 980                                 e->pscc    = pscc;
 981                                 e->next    = pscc->head;
 982                                 pscc->head = x;
 983                         } while (x != irn);
 984
 985                         env->process_scc(pscc, env);
 986                 }
 987         }
 988 }
 989
 990 /**
 991  * Do the DFS by starting at the End node of a graph.
 992  *
 993  * @param irg  the graph to process
 994  * @param env  the environment
 995  */
 996 static void do_dfs(ir_graph *irg, iv_env *env) {
 997         ir_graph *rem = current_ir_graph;
 998         ir_node *end = get_irg_end(irg);
 999         int i, n;
1000
1001         ir_reserve_resources(irg, IR_RESOURCE_IRN_VISITED);
1002
1003         current_ir_graph = irg;
1004         inc_irg_visited(irg);
1005
1006         /* visit all visible nodes */
1007         dfs(end, env);
1008
1009         /* visit the keep-alives */
1010         n = get_End_n_keepalives(end);
1011         for (i = 0; i < n; ++i) {
1012                 ir_node *ka = get_End_keepalive(end, i);
1013
1014                 if (irn_not_visited(ka))
1015                         dfs(ka, env);
1016         }
1017
1018         ir_free_resources(irg, IR_RESOURCE_IRN_VISITED);
1019
1020         current_ir_graph = rem;
1021 }
1022
1023 /**
1024  * Post-block-walker: assign the post-order number.
1025  */
1026 static void assign_po(ir_node *block, void *ctx) {
1027         iv_env *env = ctx;
1028         node_entry *e = get_irn_ne(block, env);
1029
1030         e->POnum = env->POnum++;
1031 }
1032
1033 /**
1034  * Follows the LFTR edges and return the last node in the chain.
1035  *
1036  * @param irn  the node that should be followed
1037  * @param env  the IV environment
1038  *
1039  * @note
1040  * In the current implementation only the last edge is stored, so
1041  * only one chain exists. That's why we might miss some opportunities.
1042  */
1043 static ir_node *followEdges(ir_node *irn, iv_env *env) {
1044         for (;;) {
1045                 LFTR_edge *e = LFTR_find(irn, env);
1046                 if (e)
1047                         irn = e->dst;
1048                 else
1049                         return irn;
1050         }
1051 }
1052
1053 /**
1054  * Apply one LFTR edge operation.
1055  * Return NULL if the transformation cannot be done safely without
1056  * an Overflow.
1057  *
1058  * @param iv   the induction variable
1059  * @param rc   the constant that should be translated
1060  * @param e    the LFTR edge
1061  * @param env  the IV environment
1062  *
1063  * @return the translated region constant or NULL
1064  *         if the translation was not possible
1065  *
1066  * @note
1067  * In the current implementation only the last edge is stored, so
1068  * only one chain exists. That's why we might miss some opportunities.
1069  */
1070 static ir_node *applyOneEdge(ir_node *iv, ir_node *rc, LFTR_edge *e, iv_env *env) {
1071         if (env->flags & osr_flag_lftr_with_ov_check) {
1072                 tarval *tv_l, *tv_r, *tv, *tv_init, *tv_incr;
1073                 tarval_int_overflow_mode_t ovmode;
1074                 scc *pscc;
1075
1076                 if (! is_counter_iv(iv, env)) {
1077                         DB((dbg, LEVEL_4, " not counter IV"));
1078                         return NULL;
1079                 }
1080
1081                 /* overflow can only be decided for Consts */
1082                 if (! is_Const(e->rc)) {
1083                         DB((dbg, LEVEL_4, " = UNKNOWN (%+F)", e->rc));
1084                         return NULL;
1085                 }
1086
1087                 tv_l = get_Const_tarval(rc);
1088                 tv_r = get_Const_tarval(e->rc);
1089
1090                 ovmode = tarval_get_integer_overflow_mode();
1091                 tarval_set_integer_overflow_mode(TV_OVERFLOW_BAD);
1092
1093                 pscc    = get_iv_scc(iv, env);
1094                 tv_incr = pscc->incr;
1095                 tv_init = pscc->init;
1096
1097                 /*
1098                  * Check that no overflow occurs:
1099                  * init must be transformed without overflow
1100                  * the new rc must be transformed without overflow
1101                  * rc +/- incr must be possible without overflow
1102                  */
1103                 switch (e->code) {
1104                 case iro_Mul:
1105                         tv      = tarval_mul(tv_l, tv_r);
1106                         tv_init = tarval_mul(tv_init, tv_r);
1107                         tv_incr = tarval_mul(tv_incr, tv_r);
1108                         DB((dbg, LEVEL_4, " * %+F", tv_r));
1109                         break;
1110                 case iro_Add:
1111                         tv      = tarval_add(tv_l, tv_r);
1112                         tv_init = tarval_add(tv_init, tv_r);
1113                         DB((dbg, LEVEL_4, " + %+F", tv_r));
1114                         break;
1115                 case iro_Sub:
1116                         tv      = tarval_sub(tv_l, tv_r, NULL);
1117                         tv_init = tarval_sub(tv_init, tv_r, NULL);
1118                         DB((dbg, LEVEL_4, " - %+F", tv_r));
1119                         break;
1120                 default:
1121                         panic("Unsupported opcode");
1122                         tv = tarval_bad;
1123                 }
1124
1125                 if (pscc->code == iro_Add) {
1126                         tv = tarval_add(tv, tv_incr);
1127                 } else {
1128                         assert(pscc->code == iro_Sub);
1129                         tv = tarval_sub(tv, tv_incr, NULL);
1130                 }
1131
1132                 tarval_set_integer_overflow_mode(ovmode);
1133
1134                 if (tv == tarval_bad || tv_init == tarval_bad) {
1135                         DB((dbg, LEVEL_4, " = OVERFLOW"));
1136                         return NULL;
1137                 }
1138                 return new_r_Const(current_ir_graph, get_irn_n(rc, -1), get_tarval_mode(tv), tv);
1139         }
1140         return do_apply(e->code, NULL, rc, e->rc, get_irn_mode(rc));
1141 }
1142
1143 /**
1144  * Applies the operations represented by the LFTR edges to a
1145  * region constant and returns the value.
1146  * Return NULL if the transformation cannot be done safely without
1147  * an Overflow.
1148  *
1149  * @param iv   the IV node that starts the LFTR edge chain
1150  * @param rc   the region constant that should be translated
1151  * @param env  the IV environment
1152  *
1153  * @return the translated region constant or NULL
1154  *         if the translation was not possible
1155  */
1156 static ir_node *applyEdges(ir_node *iv, ir_node *rc, iv_env *env) {
1157         if (env->flags & osr_flag_lftr_with_ov_check) {
1158                 /* overflow can only be decided for Consts */
1159                 if (! is_counter_iv(iv, env)) {
1160                         DB((dbg, LEVEL_4, "not counter IV\n", rc));
1161                         return NULL;
1162                 }
1163                 if (! is_Const(rc)) {
1164                         DB((dbg, LEVEL_4, " = UNKNOWN (%+F)\n", rc));
1165                         return NULL;
1166                 }
1167                 DB((dbg, LEVEL_4, "%+F", get_Const_tarval(rc)));
1168         }
1169
1170         for (; rc;) {
1171                 LFTR_edge *e = LFTR_find(iv, env);
1172                 if (e) {
1173                         rc = applyOneEdge(iv, rc, e, env);
1174                         iv = e->dst;
1175                 }
1176                 else
1177                         break;
1178         }
1179         DB((dbg, LEVEL_3, "\n"));
1180         return rc;
1181 }
1182
1183 /**
1184  * Walker, finds Cmp(iv, rc) or Cmp(rc, iv)
1185  * and tries to optimize them.
1186  */
1187 static void do_lftr(ir_node *cmp, void *ctx) {
1188         iv_env *env = ctx;
1189         ir_node *left, *right, *liv, *riv;
1190         ir_node *iv, *rc;
1191         ir_node *nleft = NULL, *nright = NULL;
1192
1193         if (!is_Cmp(cmp))
1194                 return;
1195
1196         left  = get_Cmp_left(cmp);
1197         right = get_Cmp_right(cmp);
1198
1199         liv = is_iv(left, env);
1200         riv = is_iv(right, env);
1201         if (liv && is_rc(right, liv)) {
1202                 iv = left; rc = right;
1203
1204                 nright = applyEdges(iv, rc, env);
1205                 if (nright && nright != rc) {
1206                         nleft = followEdges(iv, env);
1207                 }
1208         }
1209         else if (riv && is_rc(left, riv)) {
1210                 iv = right; rc = left;
1211
1212                 nleft = applyEdges(iv, rc, env);
1213                 if (nleft && nleft != rc) {
1214                         nright = followEdges(iv, env);
1215                 }
1216         }
1217
1218         if (nleft && nright) {
1219                 DB((dbg, LEVEL_2, "  LFTR for %+F\n", cmp));
1220                 set_Cmp_left(cmp, nleft);
1221                 set_Cmp_right(cmp, nright);
1222                 ++env->lftr_replaced;
1223         }
1224 }
1225
1226 /**
1227  * do linear function test replacement.
1228  *
1229  * @param irg   the graph that should be optimized
1230  * @param env   the IV environment
1231  */
1232 static void lftr(ir_graph *irg, iv_env *env) {
1233         irg_walk_graph(irg, NULL, do_lftr, env);
1234 }
1235
1236 /**
1237  * Pre-walker: set all node links to NULL and fix the
1238  * block of Proj nodes.
1239  */
1240 static void clear_and_fix(ir_node *irn, void *env)
1241 {
1242         (void) env;
1243         set_irn_link(irn, NULL);
1244
1245         if (is_Proj(irn)) {
1246                 ir_node *pred = get_Proj_pred(irn);
1247                 set_nodes_block(irn, get_nodes_block(pred));
1248         }
1249 }
1250
1251 /* Performs Operator Strength Reduction for the passed graph. */
1252 void opt_osr(ir_graph *irg, unsigned flags) {
1253         iv_env   env;
1254         ir_graph *rem;
1255         int      edges;
1256
1257         if (! get_opt_strength_red()) {
1258                 /* only kill Phi cycles  */
1259                 remove_phi_cycles(irg);
1260                 return;
1261         }
1262
1263         rem = current_ir_graph;
1264         current_ir_graph = irg;
1265
1266         FIRM_DBG_REGISTER(dbg, "firm.opt.osr");
1267
1268         DB((dbg, LEVEL_1, "Doing Operator Strength Reduction for %+F\n", irg));
1269
1270         obstack_init(&env.obst);
1271         env.stack         = NEW_ARR_F(ir_node *, 128);
1272         env.tos           = 0;
1273         env.nextDFSnum    = 0;
1274         env.POnum         = 0;
1275         env.quad_map      = new_set(quad_cmp, 64);
1276         env.lftr_edges    = new_set(LFTR_cmp, 64);
1277         env.replaced      = 0;
1278         env.lftr_replaced = 0;
1279         env.flags         = flags;
1280         env.process_scc   = process_scc;
1281
1282         /* Clear all links and move Proj nodes into the
1283            the same block as it's predecessors.
1284            This can improve the placement of new nodes.
1285          */
1286         irg_walk_graph(irg, NULL, clear_and_fix, NULL);
1287
1288         /* we need dominance */
1289         assure_doms(irg);
1290
1291         edges = edges_assure(irg);
1292
1293         /* calculate the post order number for blocks. */
1294         irg_block_edges_walk(get_irg_start_block(irg), NULL, assign_po, &env);
1295
1296         /* calculate the SCC's and drive OSR. */
1297         do_dfs(irg, &env);
1298
1299         if (env.replaced) {
1300                 /* try linear function test replacements */
1301                 //lftr(irg, &env); // currently buggy :-(
1302                 (void) lftr;
1303
1304                 set_irg_outs_inconsistent(irg);
1305                 DB((dbg, LEVEL_1, "Replacements: %u + %u (lftr)\n\n", env.replaced, env.lftr_replaced));
1306         }
1307
1308         del_set(env.lftr_edges);
1309         del_set(env.quad_map);
1310         DEL_ARR_F(env.stack);
1311         obstack_free(&env.obst, NULL);
1312
1313         if (! edges)
1314                 edges_deactivate(irg);
1315
1316         current_ir_graph = rem;
1317 }
1318
1319 /* Remove any Phi cycles with only one real input. */
1320 void remove_phi_cycles(ir_graph *irg) {
1321         iv_env   env;
1322         ir_graph *rem;
1323
1324         rem = current_ir_graph;
1325         current_ir_graph = irg;
1326
1327         FIRM_DBG_REGISTER(dbg, "firm.opt.remove_phi");
1328
1329         DB((dbg, LEVEL_1, "Doing Phi cycle removement for %+F\n", irg));
1330
1331         obstack_init(&env.obst);
1332         env.stack         = NEW_ARR_F(ir_node *, 128);
1333         env.tos           = 0;
1334         env.nextDFSnum    = 0;
1335         env.POnum         = 0;
1336         env.quad_map      = NULL;
1337         env.lftr_edges    = NULL;
1338         env.replaced      = 0;
1339         env.lftr_replaced = 0;
1340         env.flags         = 0;
1341         env.process_scc   = process_phi_only_scc;
1342
1343         /* Clear all links and move Proj nodes into the
1344            the same block as it's predecessors.
1345            This can improve the placement of new nodes.
1346          */
1347         irg_walk_graph(irg, NULL, clear_and_fix, NULL);
1348
1349         /* we need outs for calculating the post order */
1350         assure_irg_outs(irg);
1351
1352         /* calculate the post order number for blocks. */
1353         irg_out_block_walk(get_irg_start_block(irg), NULL, assign_po, &env);
1354
1355         /* calculate the SCC's and drive OSR. */
1356         do_dfs(irg, &env);
1357
1358         if (env.replaced) {
1359                 set_irg_outs_inconsistent(irg);
1360                 DB((dbg, LEVEL_1, "remove_phi_cycles: %u Cycles removed\n\n", env.replaced));
1361         }
1362
1363         DEL_ARR_F(env.stack);
1364         obstack_free(&env.obst, NULL);
1365
1366         current_ir_graph = rem;
1367 }