nsz Git - libfirm/blob - ir/opt/opt_osr.c

   1 /**
   2  * Project:     libFIRM
   3  * File name:   ir/opt/opt_osr.
   4  * Purpose:     Operator Strength Reduction, based on
   5  *              Keith D. Cooper, L. Taylor Simpson, Christopher A. Vick
   6  * Author:      Michael Beck
   7  * Modified by:
   8  * Created:     12.5.2006
   9  * CVS-ID:      $Id$
  10  * Copyright:   (c) 2006 Universität Karlsruhe
  11  * Licence:     This file protected by GPL -  GNU GENERAL PUBLIC LICENSE.
  12  */
  13 #ifdef HAVE_CONFIG_H
  14 #include "config.h"
  15 #endif
  16
  17 #ifdef HAVE_MALLOC_H
  18 #include <malloc.h>
  19 #endif
  20 #ifdef HAVE_ALLOCA_H
  21 #include <alloca.h>
  22 #endif
  23
  24 #include "opt_osr.h"
  25 #include "irgraph.h"
  26 #include "ircons.h"
  27 #include "irop_t.h"
  28 #include "irloop.h"
  29 #include "irdom.h"
  30 #include "irgmod.h"
  31 #include "irflag_t.h"
  32 #include "irgwalk.h"
  33 #include "irouts.h"
  34 #include "debug.h"
  35 #include "obst.h"
  36 #include "set.h"
  37 #include "tv.h"
  38 #include "hashptr.h"
  39 #include "irtools.h"
  40 #include "irloop_t.h"
  41 #include "array.h"
  42 #include "firmstat.h"
  43
  44 /** The debug handle. */
  45 DEBUG_ONLY(static firm_dbg_module_t *dbg;)
  46
  47 /** A scc. */
  48 typedef struct scc {
  49         ir_node *head;          /**< the head of the list */
  50 } scc;
  51
  52 /** A node entry */
  53 typedef struct node_entry {
  54         unsigned DFSnum;    /**< the DFS number of this node */
  55         unsigned low;       /**< the low number of this node */
  56         ir_node  *header;   /**< the header of this node */
  57         int      in_stack;  /**< flag, set if the node is on the stack */
  58         ir_node  *next;     /**< link to the next node the the same scc */
  59         scc      *pscc;     /**< the scc of this node */
  60         unsigned POnum;     /**< the post order number for blocks */
  61 } node_entry;
  62
  63 /** The environment. */
  64 typedef struct iv_env {
  65         struct obstack obst;    /**< an obstack for allocations */
  66         ir_node  **stack;       /**< the node stack */
  67         int      tos;           /**< tos index */
  68         unsigned nextDFSnum;    /**< the current DFS number */
  69         unsigned POnum;         /**< current post order number */
  70         set      *quad_map;     /**< a map from (op, iv, rc) to node */
  71         set      *lftr_edges;   /**< the set of lftr edges */
  72         unsigned replaced;      /**< number of replaced ops */
  73         unsigned lftr_replaced; /**< number of applied linear function test replacements */
  74         unsigned flags;         /**< additional flags */
  75 } iv_env;
  76
  77 /**
  78  * An entry in the (op, node, node) -> node map.
  79  */
  80 typedef struct quad_t {
  81         opcode  code;  /**< the opcode of the reduced operation */
  82         ir_node *op1;  /**< the first operand the reduced operation */
  83         ir_node *op2;  /**< the second operand of the reduced operation */
  84
  85         ir_node *res; /**< the reduced operation */
  86 } quad_t;
  87
  88 /**
  89  * A LFTR edge.
  90  */
  91 typedef struct LFTR_edge {
  92         ir_node *src;   /**< the source node */
  93         ir_node *dst;   /**< the destination node */
  94         opcode  code;   /**< the opcode that must be applied */
  95         ir_node *rc;    /**< the region const that must be applied */
  96 } LFTR_edge;
  97
  98 /* forward */
  99 static ir_node *reduce(ir_node *orig, ir_node *iv, ir_node *rc, iv_env *env);
 100
 101 /**
 102  * Compare two LFTR edges.
 103  */
 104 static int LFTR_cmp(const void *e1, const void *e2, size_t size) {
 105         const LFTR_edge *l1 = e1;
 106         const LFTR_edge *l2 = e2;
 107
 108         return l1->src != l2->src;
 109 }
 110
 111 /**
 112  * Find a LFTR edge.
 113  */
 114 static LFTR_edge *LFTR_find(ir_node *src, iv_env *env) {
 115         LFTR_edge key;
 116
 117         key.src  = src;
 118
 119         return set_find(env->lftr_edges, &key, sizeof(key), HASH_PTR(src));
 120 }
 121
 122 /**
 123  * Add a LFTR edge.
 124  */
 125 static void LFTR_add(ir_node *src, ir_node *dst, opcode code, ir_node *rc, iv_env *env) {
 126         LFTR_edge key;
 127
 128         key.src  = src;
 129         key.dst  = dst;
 130         key.code = code;
 131         key.rc   = rc;
 132
 133         /*
 134          * There might be more than one edge here. This is rather bad
 135          * because we currently store only one.
 136          */
 137 //      assert(LFTR_find(src, env) == NULL);
 138         set_insert(env->lftr_edges, &key, sizeof(key), HASH_PTR(src));
 139 }
 140
 141 /**
 142  * Gets the node_entry of a node
 143  */
 144 static node_entry *get_irn_ne(ir_node *irn, iv_env *env) {
 145         node_entry *e = get_irn_link(irn);
 146
 147         if (! e) {
 148                 e = obstack_alloc(&env->obst, sizeof(*e));
 149                 memset(e, 0, sizeof(*e));
 150                 set_irn_link(irn, e);
 151         }
 152         return e;
 153 }
 154
 155 /**
 156  * Check if irn is an IV.
 157  *
 158  * @param irn  the node to check
 159  * @param env  the environment
 160  *
 161  * @returns the header if it is one, NULL else
 162  */
 163 static ir_node *is_iv(ir_node *irn, iv_env *env) {
 164         return get_irn_ne(irn, env)->header;
 165 }
 166
 167 /**
 168  * Check if irn is a region constant.
 169  * The block or irn must strictly dominate the header block.
 170  *
 171  * @param irn           the node to check
 172  * @param header_block  the header block of the induction variable
 173  */
 174 static int is_rc(ir_node *irn, ir_node *header_block) {
 175         ir_node *block = get_nodes_block(irn);
 176
 177         return (block != header_block) && block_dominates(block, header_block);
 178 }
 179
 180 /**
 181  * Set compare function for the quad set.
 182  */
 183 static int quad_cmp(const void *e1, const void *e2, size_t size) {
 184         const quad_t *c1 = e1;
 185         const quad_t *c2 = e2;
 186
 187         return c1->code != c2->code || c1->op1 != c2->op1 || c1->op2 != c2->op2;
 188 }
 189
 190 /**
 191  * Check if an reduced operation was already calculated.
 192  *
 193  * @param code  the opcode of the operation
 194  * @param op1   the first operand of the operation
 195  * @param op2   the second operand of the operation
 196  * @param env   the environment
 197  *
 198  * @return the already reduced node or NULL if this operation is not yet reduced
 199  */
 200 static ir_node *search(opcode code, ir_node *op1, ir_node *op2, iv_env *env) {
 201         quad_t key, *entry;
 202
 203         key.code = code;
 204         key.op1 = op1;
 205         key.op2 = op2;
 206
 207         entry = set_find(env->quad_map, &key, sizeof(key),
 208                          (code * 9) ^ HASH_PTR(op1) ^HASH_PTR(op2));
 209         if (entry)
 210                 return entry->res;
 211         return NULL;
 212 }
 213
 214 /**
 215  * Add an reduced operation.
 216  *
 217  * @param code    the opcode of the operation
 218  * @param op1     the first operand of the operation
 219  * @param op2     the second operand of the operation
 220  * @param result  the result of the reduced operation
 221  * @param env     the environment
 222  */
 223 static void add(opcode code, ir_node *op1, ir_node *op2, ir_node *result, iv_env *env) {
 224         quad_t key;
 225
 226         key.code = code;
 227         key.op1  = op1;
 228         key.op2  = op2;
 229         key.res  = result;
 230
 231         set_insert(env->quad_map, &key, sizeof(key),
 232                    (code * 9) ^ HASH_PTR(op1) ^HASH_PTR(op2));
 233 }
 234
 235 /**
 236  * Find a location where to place a bin-op whose operands are in
 237  * block1 and block2.
 238  *
 239  * @param block1  the block of the first operand
 240  * @param block2  the block of the second operand
 241  *
 242  * Note that we know here that such a place must exists. Moreover, this means
 243  * that either block1 dominates block2 or vice versa. So, just return
 244  * the "smaller" one.
 245  */
 246 static ir_node *find_location(ir_node *block1, ir_node *block2) {
 247         if (block_dominates(block1, block2))
 248                 return block2;
 249         assert(block_dominates(block2, block1));
 250         return block1;
 251 }
 252
 253 /**
 254  * Create a node that executes an op1 code op1 operation.
 255  *
 256  * @param code   the opcode to execute
 257  * @param db     debug info to add to the new node
 258  * @param op1    the first operand
 259  * @param op2    the second operand
 260  * @param mode   the mode of the new operation
 261  *
 262  * @return the newly created node
 263  */
 264 static ir_node *do_apply(opcode code, dbg_info *db, ir_node *op1, ir_node *op2, ir_mode *mode) {
 265         ir_graph *irg = current_ir_graph;
 266         ir_node *result;
 267         ir_node *block = find_location(get_nodes_block(op1), get_nodes_block(op2));
 268
 269         switch (code) {
 270         case iro_Mul:
 271                 result = new_rd_Mul(db, irg, block, op1, op2, mode);
 272                 break;
 273         case iro_Add:
 274                 result = new_rd_Add(db, irg, block, op1, op2, mode);
 275                 break;
 276         case iro_Sub:
 277                 result = new_rd_Sub(db, irg, block, op1, op2, mode);
 278                 break;
 279         default:
 280                 assert(0);
 281                 result = NULL;
 282         }
 283         return result;
 284 }
 285
 286 /**
 287  * The Apply operation.
 288  *
 289  * @param orig   the node that represent the original operation and determines
 290  *               the opcode, debug-info and mode of a newly created one
 291  * @param op1    the first operand
 292  * @param op2    the second operand
 293  * @param env    the environment
 294  *
 295  * @return the newly created node
 296  */
 297 static ir_node *apply(ir_node *orig, ir_node *op1, ir_node *op2, iv_env *env) {
 298         opcode code = get_irn_opcode(orig);
 299         ir_node *result = search(code, op1, op2, env);
 300
 301         if (! result) {
 302                 dbg_info *db = get_irn_dbg_info(orig);
 303                 ir_node *op1_header = get_irn_ne(op1, env)->header;
 304                 ir_node *op2_header = get_irn_ne(op2, env)->header;
 305
 306                 if (op1_header != NULL && is_rc(op2, op1_header)) {
 307                         result = reduce(orig, op1, op2, env);
 308                 }
 309                 else if (op2_header != NULL && is_rc(op1, op2_header)) {
 310                         result = reduce(orig, op2, op1, env);
 311                 }
 312                 else {
 313                         result = do_apply(code, db, op1, op2, get_irn_mode(orig));
 314                         get_irn_ne(result, env)->header = NULL;         }
 315         }
 316         return result;
 317 }
 318
 319 /**
 320  * The Reduce operation.
 321  *
 322  * @param orig   the node that represent the original operation and determines
 323  *               the opcode, debug-info and mode of a newly created one
 324  * @param iv     the induction variable
 325  * @param rc     the region constant
 326  * @param env    the environment
 327  *
 328  * @return the reduced node
 329  */
 330 static ir_node *reduce(ir_node *orig, ir_node *iv, ir_node *rc, iv_env *env) {
 331         opcode code = get_irn_opcode(orig);
 332         ir_node *result = search(code, iv, rc, env);
 333
 334         if (! result) {
 335                 node_entry *e, *iv_e;
 336                 int i, n;
 337                 ir_mode *mode = get_irn_mode(orig);
 338
 339                 result = exact_copy(iv);
 340
 341                 /* Beware: we must always create a new nduction variable with the same mode
 342                    as the node we are replacing. Espicially this means the mode might be changed
 343                    from P to I and back. This is always possible, because we have only Phi, Add
 344                    and Sub nodes. */
 345                 set_irn_mode(result, mode);
 346                 add(code, iv, rc, result, env);
 347                 DB((dbg, LEVEL_3, "   Created new %+F for %+F (%s %+F)\n", result, iv,
 348                         get_irn_opname(orig), rc));
 349
 350                 iv_e = get_irn_ne(iv, env);
 351                 e    = get_irn_ne(result, env);
 352                 e->header = iv_e->header;
 353
 354                 /* create the LFTR edge */
 355                 LFTR_add(iv, result, code, rc, env);
 356
 357                 n = get_irn_arity(result);
 358                 for (i = 0; i < n; ++i) {
 359                         ir_node *o = get_irn_n(result, i);
 360
 361                         e = get_irn_ne(o, env);
 362                         if (e->header == iv_e->header)
 363                                 o = reduce(orig, o, rc, env);
 364                         else if (is_Phi(result))
 365                                 o = apply(orig, o, rc, env);
 366                         else {
 367                                 if (code == iro_Mul)
 368                                         o = apply(orig, o, rc, env);
 369                         }
 370                         set_irn_n(result, i, o);
 371                 }
 372         }
 373         else {
 374                 DB((dbg, LEVEL_3, "   Already Created %+F for %+F (%s %+F)\n", result, iv,
 375                         get_irn_opname(orig), rc));
 376         }
 377         return result;
 378 }
 379
 380 /**
 381  * The Replace operation.
 382  *
 383  * @param irn   the node that will be replaced
 384  * @param iv    the induction variable
 385  * @param rc    the region constant
 386  * @param env   the environment
 387  */
 388 static int replace(ir_node *irn, ir_node *iv, ir_node *rc, iv_env *env) {
 389         ir_node *result;
 390         ir_loop *iv_loop  = get_irn_loop(get_nodes_block(iv));
 391         ir_loop *irn_loop = get_irn_loop(get_nodes_block(irn));
 392
 393         /* only replace nodes that are in the same (or deeper loops) */
 394         if (get_loop_depth(irn_loop) >= get_loop_depth(iv_loop)) {
 395                 DB((dbg, LEVEL_2, "  Replacing %+F\n", irn));
 396
 397                 result = reduce(irn, iv, rc, env);
 398                 if (result != irn) {
 399                         node_entry *e, *iv_e;
 400
 401                         hook_strength_red(current_ir_graph, irn);
 402                         exchange(irn, result);
 403                         e = get_irn_ne(result, env);
 404                         iv_e = get_irn_ne(iv, env);
 405                         e->header = iv_e->header;
 406                 }
 407                 ++env->replaced;
 408                 return 1;
 409         }
 410         return 0;
 411 }
 412
 413 /**
 414  * Check if a node can be replaced (+, -, *).
 415  *
 416  * @param irn   the node to check
 417  * @param env   the environment
 418  *
 419  * @return non-zero if irn should be Replace'd
 420  */
 421 static int check_replace(ir_node *irn, iv_env *env) {
 422         ir_node *left, *right, *iv, *rc;
 423         ir_op   *op  = get_irn_op(irn);
 424         opcode  code = get_op_code(op);
 425         ir_node *liv, *riv;
 426
 427         switch (code) {
 428         case iro_Mul:
 429         case iro_Add:
 430         case iro_Sub:
 431                 iv = rc = NULL;
 432
 433                 left  = get_binop_left(irn);
 434                 right = get_binop_right(irn);
 435
 436                 liv = is_iv(left, env);
 437                 riv = is_iv(right, env);
 438                 if (liv && is_rc(right, liv)) {
 439                         iv = left; rc = right;
 440                 }
 441                 else if (riv && is_op_commutative(op) &&
 442                                     is_rc(left, riv)) {
 443                         iv = right; rc = left;
 444                 }
 445
 446                 if (iv) {
 447                         if (env->flags & osr_flag_ignore_x86_shift) {
 448                                 if (is_Const(rc)) {
 449                                         tarval *tv = get_Const_tarval(rc);
 450
 451                                         if (tarval_is_long(tv)) {
 452                                                 long value = get_tarval_long(tv);
 453
 454                                                 if (value == 2 || value == 4 || value == 8) {
 455                                                         /* do not reduce multiplications by 2, 4, 8 */
 456                                                         break;
 457                                                 }
 458                                         }
 459                                 }
 460                         }
 461
 462                         return replace(irn, iv, rc, env);
 463                 }
 464                 break;
 465         default:
 466                 break;
 467         }
 468         return 0;
 469 }
 470
 471 /**
 472  * Check which SCC's are induction variables.
 473  *
 474  * @param pscc  a SCC
 475  * @param env   the environment
 476  */
 477 static void classify_iv(scc *pscc, iv_env *env) {
 478         ir_node *irn, *next, *header = NULL;
 479         node_entry *b, *h = NULL;
 480         int j, only_phi, num_outside;
 481         ir_node *out_rc;
 482
 483         /* find the header block for this scc */
 484         for (irn = pscc->head; irn; irn = next) {
 485                 node_entry *e = get_irn_link(irn);
 486                 ir_node *block = get_nodes_block(irn);
 487
 488                 next = e->next;
 489                 b = get_irn_ne(block, env);
 490
 491                 if (header) {
 492                         if (h->POnum < b->POnum) {
 493                                 header = block;
 494                                 h      = b;
 495                         }
 496                 }
 497                 else {
 498                         header = block;
 499                         h      = b;
 500                 }
 501         }
 502
 503         /* check if this scc contains only Phi, Add or Sub nodes */
 504         only_phi    = 1;
 505         num_outside = 0;
 506         out_rc      = NULL;
 507         for (irn = pscc->head; irn; irn = next) {
 508                 node_entry *e = get_irn_ne(irn, env);
 509
 510                 next = e->next;
 511                 switch (get_irn_opcode(irn)) {
 512                 case iro_Add:
 513                 case iro_Sub:
 514                         only_phi = 0;
 515                         /* fall through */
 516                 case iro_Phi:
 517                         for (j = get_irn_arity(irn) - 1; j >= 0; --j) {
 518                                 ir_node *pred  = get_irn_n(irn, j);
 519                                 node_entry *pe = get_irn_ne(pred, env);
 520
 521                                 if (pe->pscc != e->pscc) {
 522                                         /* not in the same SCC, must be a region const */
 523                                         if (! is_rc(pred, header)) {
 524                                                 /* not an induction variable */
 525                                                 goto fail;
 526                                         }
 527                                         if (! out_rc) {
 528                                                 out_rc = pred;
 529                                                 ++num_outside;
 530                                         }
 531                                         else if (out_rc != pred)
 532                                                 ++num_outside;
 533                                 }
 534                         }
 535                         break;
 536                 default:
 537                         /* not an induction variable */
 538                         goto fail;
 539                 }
 540         }
 541         /* found an induction variable */
 542         DB((dbg, LEVEL_2, "  Found an induction variable:\n  "));
 543         if (only_phi && num_outside == 1) {
 544                 DB((dbg, LEVEL_2, "  Found an USELESS Phi cycle:\n  "));
 545         }
 546
 547         /* set the header for every node in this scc */
 548         for (irn = pscc->head; irn; irn = next) {
 549                 node_entry *e = get_irn_ne(irn, env);
 550                 e->header = header;
 551                 next = e->next;
 552                 DB((dbg, LEVEL_2, " %+F,", irn));
 553         }
 554         DB((dbg, LEVEL_2, "\n"));
 555         return;
 556
 557 fail:
 558         for (irn = pscc->head; irn; irn = next) {
 559                 node_entry *e = get_irn_ne(irn, env);
 560
 561                 next = e->next;
 562                 if (! check_replace(irn, env))
 563                         e->header = NULL;
 564         }
 565 }
 566
 567 /**
 568  * Process a SCC.
 569  *
 570  * @param pscc  the SCC
 571  * @param env   the environment
 572  */
 573 static void process_scc(scc *pscc, iv_env *env) {
 574         ir_node *head = pscc->head;
 575         node_entry *e = get_irn_link(head);
 576
 577 #ifdef DEBUG_libfirm
 578         {
 579                 ir_node *irn, *next;
 580
 581                 DB((dbg, LEVEL_4, " SCC at %p:\n ", pscc));
 582                 for (irn = pscc->head; irn; irn = next) {
 583                         node_entry *e = get_irn_link(irn);
 584
 585                         next = e->next;
 586
 587                         DB((dbg, LEVEL_4, " %+F,", irn));
 588                 }
 589                 DB((dbg, LEVEL_4, "\n"));
 590         }
 591 #endif
 592
 593         if (e->next == NULL) {
 594                 /* this SCC has only a single member */
 595                 check_replace(head, env);
 596         }
 597         else {
 598                 classify_iv(pscc, env);
 599         }
 600 }
 601
 602 /**
 603  * Push a node onto the stack.
 604  *
 605  * @param env   the environment
 606  * @param n     the node to push
 607  */
 608 static void push(iv_env *env, ir_node *n) {
 609         node_entry *e;
 610
 611         if (env->tos == ARR_LEN(env->stack)) {
 612                 int nlen = ARR_LEN(env->stack) * 2;
 613                 ARR_RESIZE(ir_node *, env->stack, nlen);
 614         }
 615         env->stack[env->tos++] = n;
 616         e = get_irn_ne(n, env);
 617         e->in_stack = 1;
 618 }
 619
 620 /**
 621  * pop a node from the stack
 622  *
 623  * @param env   the environment
 624  *
 625  * @return  The topmost node
 626  */
 627 static ir_node *pop(iv_env *env)
 628 {
 629         ir_node *n = env->stack[--env->tos];
 630         node_entry *e = get_irn_ne(n, env);
 631
 632         e->in_stack = 0;
 633         return n;
 634 }
 635
 636 /**
 637  * Do Tarjan's SCC algorithm and drive OSR.
 638  *
 639  * @param irn  start at this node
 640  * @param env  the environment
 641  */
 642 static void dfs(ir_node *irn, iv_env *env)
 643 {
 644         int i, n;
 645         node_entry *node = get_irn_ne(irn, env);
 646
 647         mark_irn_visited(irn);
 648
 649         /* do not put blocks into the scc */
 650         if (is_Block(irn)) {
 651                 n = get_irn_arity(irn);
 652                 for (i = 0; i < n; ++i) {
 653                         ir_node *pred = get_irn_n(irn, i);
 654
 655                         if (irn_not_visited(pred))
 656                                 dfs(pred, env);
 657                 }
 658         }
 659         else {
 660                 ir_node *block = get_nodes_block(irn);
 661
 662                 node->DFSnum = env->nextDFSnum++;
 663                 node->low    = node->DFSnum;
 664                 push(env, irn);
 665
 666                 /* handle the block */
 667                 if (irn_not_visited(block))
 668                         dfs(block, env);
 669
 670                 n = get_irn_arity(irn);
 671                 for (i = 0; i < n; ++i) {
 672                         ir_node *pred = get_irn_n(irn, i);
 673                         node_entry *o = get_irn_ne(pred, env);
 674
 675                         if (irn_not_visited(pred)) {
 676                                 dfs(pred, env);
 677                                 node->low = MIN(node->low, o->low);
 678                         }
 679                         if (o->DFSnum < node->DFSnum && o->in_stack)
 680                                 node->low = MIN(o->DFSnum, node->low);
 681                 }
 682                 if (node->low == node->DFSnum) {
 683                         scc *pscc = obstack_alloc(&env->obst, sizeof(*pscc));
 684                         ir_node *x;
 685
 686                         pscc->head = NULL;
 687                         do {
 688                                 node_entry *e;
 689
 690                                 x = pop(env);
 691                                 e = get_irn_ne(x, env);
 692                                 e->pscc    = pscc;
 693                                 e->next    = pscc->head;
 694                                 pscc->head = x;
 695                         } while (x != irn);
 696
 697                         process_scc(pscc, env);
 698                 }
 699         }
 700 }
 701
 702 /**
 703  * Do the DFS by starting at the End node of a graph.
 704  *
 705  * @param irg  the graph to process
 706  * @param env  the environment
 707  */
 708 static void do_dfs(ir_graph *irg, iv_env *env) {
 709         ir_graph *rem = current_ir_graph;
 710         ir_node *end = get_irg_end(irg);
 711         int i, n;
 712
 713         current_ir_graph = irg;
 714         inc_irg_visited(irg);
 715
 716         /* visit all visible nodes */
 717         dfs(end, env);
 718
 719         /* visit the keep-alives */
 720         n = get_End_n_keepalives(end);
 721         for (i = 0; i < n; ++i) {
 722                 ir_node *ka = get_End_keepalive(end, i);
 723
 724                 if (irn_not_visited(ka))
 725                         dfs(ka, env);
 726         }
 727
 728         current_ir_graph = rem;
 729 }
 730
 731 /**
 732  * Post-block-walker: assign the post-order number.
 733  */
 734 static void assign_po(ir_node *block, void *ctx) {
 735         iv_env *env = ctx;
 736         node_entry *e = get_irn_ne(block, env);
 737
 738         e->POnum = env->POnum++;
 739 }
 740
 741 /**
 742  * Follows the LFTR edges and return the last node in the chain.
 743  *
 744  * @param irn  the node that should be followed
 745  * @param env  the IV environment
 746  *
 747  * @note
 748  * In the current implementation only the last edge is stored, so
 749  * only one chain exists. That's why we might miss some opportunities.
 750  */
 751 static ir_node *followEdges(ir_node *irn, iv_env *env) {
 752         for (;;) {
 753                 LFTR_edge *e = LFTR_find(irn, env);
 754                 if (e)
 755                         irn = e->dst;
 756                 else
 757                         return irn;
 758         }
 759 }
 760
 761 /**
 762  * Apply one LFTR edge operation.
 763  * Return NULL if the transformation cannot be done safely without
 764  * an Overflow.
 765  *
 766  * @param rc   the IV node that should be translated
 767  * @param e    the LFTR edge
 768  * @param env  the IV environment
 769  *
 770  * @return the translated region constant or NULL
 771  *         if the translation was not possible
 772  *
 773  * @note
 774  * In the current implementation only the last edge is stored, so
 775  * only one chain exists. That's why we might miss some opportunities.
 776  */
 777 static ir_node *applyOneEdge(ir_node *rc, LFTR_edge *e, iv_env *env) {
 778         if (env->flags & osr_flag_lftr_with_ov_check) {
 779                 tarval *tv_l, *tv_r, *tv;
 780                 tarval_int_overflow_mode_t ovmode;
 781
 782                 /* overflow can only be decided for Consts */
 783                 if (! is_Const(e->rc)) {
 784                         DB((dbg, LEVEL_4, " = UNKNOWN (%+F)", e->rc));
 785                         return NULL;
 786                 }
 787
 788                 tv_l = get_Const_tarval(rc);
 789                 tv_r = get_Const_tarval(e->rc);
 790
 791                 ovmode = tarval_get_integer_overflow_mode();
 792                 tarval_set_integer_overflow_mode(TV_OVERFLOW_BAD);
 793
 794                 switch (e->code) {
 795                 case iro_Mul:
 796                         tv = tarval_mul(tv_l, tv_r);
 797                         DB((dbg, LEVEL_4, " * %+F", tv_r));
 798                         break;
 799                 case iro_Add:
 800                         tv = tarval_add(tv_l, tv_r);
 801                         DB((dbg, LEVEL_4, " + %+F", tv_r));
 802                         break;
 803                 case iro_Sub:
 804                         tv = tarval_sub(tv_l, tv_r);
 805                         DB((dbg, LEVEL_4, " - %+F", tv_r));
 806                         break;
 807                 default:
 808                         assert(0);
 809                         tv = tarval_bad;
 810                 }
 811                 tarval_set_integer_overflow_mode(ovmode);
 812
 813                 if (tv == tarval_bad) {
 814                         DB((dbg, LEVEL_4, " = OVERFLOW"));
 815                         return NULL;
 816                 }
 817                 return new_r_Const(current_ir_graph, get_irn_n(rc, -1), get_tarval_mode(tv), tv);
 818         }
 819         return do_apply(e->code, NULL, rc, e->rc, get_irn_mode(rc));
 820 }
 821
 822 /**
 823  * Applies the operations represented by the LFTR edges to a
 824  * region constant and returns the value.
 825  * Return NULL if the transformation cannot be done safely without
 826  * an Overflow.
 827  *
 828  * @param iv   the IV node that starts the LFTR edge chain
 829  * @param rc   the region constant that should be translated
 830  * @param env  the IV environment
 831  *
 832  * @return the translated region constant or NULL
 833  *         if the translation was not possible
 834  */
 835 static ir_node *applyEdges(ir_node *iv, ir_node *rc, iv_env *env) {
 836         ir_node *irn = iv;
 837
 838         if (env->flags & osr_flag_lftr_with_ov_check) {
 839                 /* overflow can only be decided for Consts */
 840                 if (! is_Const(rc)) {
 841                         DB((dbg, LEVEL_4, " = UNKNOWN (%+F)\n", rc));
 842                         return NULL;
 843                 }
 844                 DB((dbg, LEVEL_4, "%+F", get_Const_tarval(rc)));
 845         }
 846
 847         for (irn = iv; rc;) {
 848                 LFTR_edge *e = LFTR_find(irn, env);
 849                 if (e) {
 850                         rc = applyOneEdge(rc, e, env);
 851                         irn = e->dst;
 852                 }
 853                 else
 854                         break;
 855         }
 856         DB((dbg, LEVEL_3, "\n"));
 857         return rc;
 858 }
 859
 860 /**
 861  * Walker, finds Cmp(iv, rc) or Cmp(rc, iv)
 862  * and tries to optimize them.
 863  */
 864 static void do_lftr(ir_node *cmp, void *ctx) {
 865         iv_env *env = ctx;
 866         ir_node *left, *right, *liv, *riv;
 867         ir_node *iv, *rc;
 868         ir_node *nleft = NULL, *nright = NULL;
 869
 870         if (get_irn_op(cmp) != op_Cmp)
 871                 return;
 872
 873         left  = get_Cmp_left(cmp);
 874         right = get_Cmp_right(cmp);
 875
 876         liv = is_iv(left, env);
 877         riv = is_iv(right, env);
 878         if (liv && is_rc(right, liv)) {
 879                 iv = left; rc = right;
 880
 881                 nright = applyEdges(iv, rc, env);
 882                 if (nright && nright != rc) {
 883                         nleft = followEdges(iv, env);
 884                 }
 885         }
 886         else if (riv && is_rc(left, riv)) {
 887                 iv = right; rc = left;
 888
 889                 nleft = applyEdges(iv, rc, env);
 890                 if (nleft && nleft != rc) {
 891                         nright = followEdges(iv, env);
 892                 }
 893         }
 894
 895         if (nleft && nright) {
 896                 DB((dbg, LEVEL_2, "  LFTR for %+F\n", cmp));
 897                 set_Cmp_left(cmp, nleft);
 898                 set_Cmp_right(cmp, nright);
 899                 ++env->lftr_replaced;
 900         }
 901 }
 902
 903 /**
 904  * do linear function test replacement.
 905  *
 906  * @param irg   the graph that should be optimized
 907  * @param env   the IV environment
 908  */
 909 static void lftr(ir_graph *irg, iv_env *env) {
 910         irg_walk_graph(irg, NULL, do_lftr, env);
 911 }
 912
 913 /**
 914  * Pre-walker: set all node links to NULL and fix the
 915  * block of Proj nodes.
 916  */
 917 static void clear_and_fix(ir_node *irn, void *env)
 918 {
 919         set_irn_link(irn, NULL);
 920
 921         if (is_Proj(irn)) {
 922                 ir_node *pred = get_Proj_pred(irn);
 923                 set_irn_n(irn, -1, get_irn_n(pred, -1));
 924         }
 925 }
 926
 927 /* Performs Operator Strength Reduction for the passed graph. */
 928 void opt_osr(ir_graph *irg, unsigned flags) {
 929         iv_env env;
 930
 931         if (! get_opt_strength_red())
 932                 return;
 933
 934         FIRM_DBG_REGISTER(dbg, "firm.opt.osr");
 935 //      firm_dbg_set_mask(dbg, SET_LEVEL_3);
 936
 937         DB((dbg, LEVEL_1, "Doing Operator Strength Reduction for %+F\n", irg));
 938
 939         obstack_init(&env.obst);
 940         env.stack         = NEW_ARR_F(ir_node *, 128);
 941         env.tos           = 0;
 942         env.nextDFSnum    = 0;
 943         env.POnum         = 0;
 944         env.quad_map      = new_set(quad_cmp, 64);
 945         env.lftr_edges    = new_set(LFTR_cmp, 64);
 946         env.replaced      = 0;
 947         env.lftr_replaced = 0;
 948         env.flags         = flags;
 949
 950         /* we need control flow loop information to decide whether
 951          * we should do a replacement or not. */
 952         construct_cf_backedges(irg);
 953
 954         /* Clear all links and move Proj nodes into the
 955            the same block as it's predecessors.
 956            This can improve the placement of new nodes.
 957          */
 958         irg_walk_graph(irg, NULL, clear_and_fix, NULL);
 959
 960         /* we need dominance */
 961         assure_doms(irg);
 962         assure_irg_outs(irg);
 963
 964         /* calculate the post order number for blocks. */
 965         irg_out_block_walk(get_irg_start_block(irg), NULL, assign_po, &env);
 966
 967         /* calculate the SCC's and drive OSR. */
 968         do_dfs(irg, &env);
 969
 970         if (env.replaced) {
 971                 /* try linear function test replacements */
 972                 //lftr(irg, &env);
 973                 (void) lftr;
 974
 975                 set_irg_outs_inconsistent(irg);
 976                 /* cfg loop still valid */
 977
 978                 DB((dbg, LEVEL_1, "Replacements: %u + %u (lftr)\n\n", env.replaced, env.lftr_replaced));
 979         }
 980
 981         del_set(env.lftr_edges);
 982         del_set(env.quad_map);
 983         DEL_ARR_F(env.stack);
 984         obstack_free(&env.obst, NULL);
 985 }