nsz Git - libfirm/blob - ir/opt/loop_unrolling.c

   1 /**
   2  *
   3  * @file loop_unrolling.c
   4  *
   5  * Project:     libFIRM
   6  * File name:   ir/opt/loop_unrolling.c
   7  * Purpose:     Make loop unrolling.
   8  * Author:      Beyhan Veliev
   9  * Modified by:
  10  * Created:     16.11.2004
  11  * CVS-ID:      $Id$
  12  * Copyright:   (c) 2004 Universität Karlsruhe
  13  * Licence:     This file protected by GPL -  GNU GENERAL PUBLIC LICENSE.
  14  */
  15 #ifdef HAVE_CONFIG_H
  16 #include "config.h"
  17 #endif
  18
  19 #ifdef HAVE_STRING_H
  20 #include <string.h>
  21 #endif
  22 #ifdef HAVE_MALLOC_H
  23 #include <malloc.h>
  24 #endif
  25 #ifdef HAVE_ALLOCA_H
  26 #include <alloca.h>
  27 #endif
  28
  29 # include "loop_unrolling.h"
  30
  31 # include "irgwalk.h"
  32 # include "ircons.h"
  33 # include "irgmod.h"
  34 # include "irloop_t.h"
  35 # include "irgopt_t.h"
  36 # include "irnode_t.h"
  37 # include "irouts.h"
  38 # include "trouts.h"
  39 # include "hashptr.h"
  40 # include "pset.h"
  41 # include "strength_red.h"
  42
  43 /* We will unroll maximal 4-times.  */
  44 #define MAX_UNROLL 4
  45
  46 /* We will know if the head of the loop to be copy.
  47  * Default "0"  don't copy.*/
  48 static int copy_loop_head;
  49
  50 typedef struct {
  51   ir_node *irn ;                /* Node of the loop to be unrolling*/
  52   ir_node *copy[MAX_UNROLL] ;   /* The copy of the node */
  53 } copies_t;
  54
  55 /**
  56  * Compare two elements of the copies_t set
  57  */
  58 static int set_cmp(const void *elt, const void *key, size_t size)
  59 {
  60   const copies_t *c1 = elt;
  61   const copies_t *c2 = key;
  62
  63   return c1->irn != c2->irn;
  64 }
  65
  66 static INLINE int * new_backedge_arr(struct obstack *obst, int size)
  67 {
  68   int *res = NEW_ARR_D (int, obst, size);
  69   memset(res, 0, sizeof(int) * size);
  70   return res;
  71 }
  72
  73 static INLINE void new_backedge_info(ir_node *n) {
  74   switch(get_irn_opcode(n)) {
  75   case iro_Block:
  76     n->attr.block.cg_backedge = NULL;
  77     n->attr.block.backedge = new_backedge_arr(current_ir_graph->obst, get_irn_arity(n));
  78     break;
  79   case iro_Phi:
  80     n->attr.phi_backedge = new_backedge_arr(current_ir_graph->obst, get_irn_arity(n));
  81     break;
  82   case iro_Filter:
  83     n->attr.filter.backedge = new_backedge_arr(current_ir_graph->obst, get_irn_arity(n));
  84     break;
  85   default: ;
  86   }
  87 }
  88
  89 /**
  90  * Remember the new node in the old node by using a field all nodes have.
  91  */
  92 static INLINE void
  93 set_new_node (ir_node *old, ir_node *new)
  94 {
  95   old->link = new;
  96 }
  97
  98 /**
  99  * Copies the node to the new obstack. The Ins of the new node point to
 100  * the predecessors on the old obstack.  For block/phi nodes not all
 101  * predecessors might be copied.  n->link points to the new node.
 102  * For Phi and Block nodes the function allocates in-arrays with an arity
 103  * only for useful predecessors.  The arity is determined by counting
 104  * the non-bad predecessors of the block.
 105  *
 106  * @param n    The node to be copied
 107  * @param env  if non-NULL, the node number attribute will be copied to the new node
 108  */
 109 static void
 110 copy_node (ir_node *n, void *env)
 111 {
 112   ir_node *nn;
 113   int new_arity;
 114   ir_op *op = get_irn_op(n);
 115   int copy_node_nr = false;
 116
 117   /* The end node looses it's flexible in array.  This doesn't matter,
 118      as dead node elimination builds End by hand, inlineing doesn't use
 119      the End node. */
 120   /* assert(op == op_End ||  ((_ARR_DESCR(n->in))->cookie != ARR_F_MAGIC)); */
 121
 122   if (op == op_Bad)
 123     /* node copied already */
 124     return;
 125
 126   new_arity = get_irn_arity(n);
 127
 128   nn = new_ir_node(get_irn_dbg_info(n),
 129            current_ir_graph,
 130            NULL,            /* no block yet, will be set later */
 131            op,
 132            get_irn_mode(n),
 133            new_arity,
 134            get_irn_in(n));
 135
 136
 137   /* Copy the attributes.  These might point to additional data.  If this
 138      was allocated on the old obstack the pointers now are dangling.  This
 139      frees e.g. the memory of the graph_arr allocated in new_immBlock. */
 140   copy_node_attr(n, nn);
 141   new_backedge_info(nn);
 142   set_new_node(n, nn);
 143
 144 #if DEBUG_libfirm
 145   if (copy_node_nr) {
 146     /* for easier debugging, we want to copy the node numbers too */
 147     nn->node_nr = n->node_nr;
 148   }
 149 #endif
 150
 151 }
 152 /*Check if phi in the loop head is.
 153  *
 154  * @param phi               Must be a phi node from the loop.
 155  * @param loop_head         The loop head .
 156  */
 157 static int is_Phi_in_loop_head(ir_node *phi, ir_node *loop_head) {
 158   assert(is_Block(loop_head));
 159   return is_Phi(phi) && (get_nodes_block(phi) == loop_head);
 160 }
 161
 162 /**
 163  * Copies predecessors of node to copies of node.
 164  * If the predecessor is a loop invariant, then the copy get it
 165  * as predecessor, else the copy of the predecessor.
 166  *
 167  * @param l_n                A set, where the node of the loop are saved.
 168  * @param value              A element of the set.
 169  * @param info               Contains information about the induction variable.
 170  * @param unroll_factor      A integer 2 <= unroll_factor <= 4.
 171  * @param env                Free environment pointer.
 172  */
 173 static void
 174 set_preds (set *l_n, copies_t *value, induct_var_info *info, int unroll_factor, void *env)
 175 {
 176   int i, p, irn_arity;
 177   copies_t key, *value_pred;
 178   // The head of the unrolling loop.
 179   ir_node *loop_head = get_loop_node(info->l_itervar_phi, 0);
 180
 181   irn_arity = get_irn_arity(value->irn);
 182
 183   for (i = 0; i < irn_arity; i++) {
 184     ir_node *pred = get_irn_n(value->irn, i);
 185
 186     key.irn = pred;
 187     value_pred = set_find(l_n, &key, sizeof(key), HASH_PTR(pred));
 188
 189     if (value->irn != loop_head && !is_Phi_in_loop_head(value->irn, loop_head)) {
 190       if (value_pred == NULL) {
 191               /* Is loop invariant. */
 192               for (p = 0; p < unroll_factor - 1; p++)
 193                 set_irn_n(value->copy[p], i, pred);
 194               /* pred is a loop invariant. The copies of the successors get it as predecessor. */
 195       }
 196       else {
 197               for (p = 0; p < unroll_factor - 1; p++)
 198                 set_irn_n(value->copy[p], i, value_pred->copy[p]);
 199           /* value_pred->irn is a node from the unrolling loop.
 200            * The copies of the successors get the
 201                  * copies of value_pred as predecessor.
 202            */
 203       }
 204     }
 205   }
 206 }
 207
 208 /* Set the backedge of phi in the loop head.The backedge of phis in the loop head
 209  * must now point to the value defined
 210  * in the last copy of the loop body.
 211  *
 212  * @param l_n                A set, where the node of the loop are saved.
 213  * @param value              A phi in the loop head.
 214  * @param info               Contains information about the induction variable.
 215  * @param unroll_factor      A integer 2 <= unroll_factor <= 4.
 216  */
 217 static void
 218 set_phi_backedge(set *l_n, copies_t *value, induct_var_info *info, int unroll_factor)
 219 {
 220   copies_t key, *value_pred;
 221
 222   /* info->op_pred_pos is the backedge position. */
 223   key.irn = get_irn_n(value->irn, info->op_pred_pos);
 224   value_pred = set_find(l_n, &key, sizeof(key), HASH_PTR(key.irn));
 225
 226   /* value->copy[unroll_factor - 2] is the last copy. */
 227   set_Phi_pred(value->irn, info->op_pred_pos, value_pred->copy[unroll_factor - 2]);
 228 }
 229
 230 /**
 231  * Test for a loop head.
 232  *
 233  * Returns true if the node has predecessors in the loop _and_ out of
 234  * the loop.  Then it is a loop head: The loop can be entered through
 235  * this node.
 236  *
 237  * @param n      The node to be tested. Must be a block.
 238  * @param info   Contains the loop information.
 239  */
 240 static int
 241 is_loop_head(ir_node *n, induct_var_info *info)
 242 {
 243   int i, arity;
 244   int some_outof_loop = 0, some_in_loop = 0;
 245
 246   assert(get_irn_op(n) == op_Block);
 247   arity = get_Block_n_cfgpreds(n);
 248
 249   for (i = 0; i < arity; i++) {
 250     ir_node *pred = get_Block_cfgpred(n, i);
 251     assert(pred);
 252     if (is_loop_invariant(pred, get_loop_node(info->l_itervar_phi, 0))) {
 253       some_outof_loop = 1;
 254     } else
 255       some_in_loop = 1;
 256   }
 257
 258   return some_outof_loop && some_in_loop;
 259 }
 260
 261
 262 /**
 263  * Test whether the passed loop is a natural loop.
 264  *
 265  * Returns true if the loop has only one loop header and only a single
 266  * back edge.
 267  *
 268  * @param info  Contains the loop information.
 269  */
 270 static int
 271 is_natural_loop(induct_var_info *info)
 272 {
 273   ir_node *l_node;
 274   int l_n_node = 0, i;
 275
 276   l_n_node = get_loop_n_nodes (info->l_itervar_phi);
 277
 278   for (i = 1; i < (l_n_node); i ++) {
 279     l_node = get_loop_node (info->l_itervar_phi, i);
 280     if (is_loop_head(l_node, info)) return 0;
 281
 282     if (has_backedges(l_node) && i != l_n_node-1) return 0;
 283   }
 284
 285   return 1;
 286 }
 287
 288 /**
 289  * Search for all nodes of a loop.
 290  *
 291  * @param node       The induction variable of the loop.
 292  * @param loop_head  The head of the loop.
 293  * @param l_n        A set, where the node of the loop are saved.
 294  */
 295 static void
 296 find_loop_nodes(ir_node *node, ir_node *loop_head, set *l_n)
 297 {
 298   int i;
 299   copies_t key, *value;
 300
 301   /* Add this node to the list. */
 302   key.irn  = node;
 303
 304   /* Initialize all copies of the added node with NULL.*/
 305   for (i = 0; i < 4; ++i)
 306     key.copy[i] = NULL;
 307   value = set_insert(l_n, &key, sizeof(key), HASH_PTR(key.irn));
 308
 309   /* Add all outs of this node to the list, if they are within the loop. */
 310   for (i = get_irn_n_outs(node) - 1; i >= 0; --i) {
 311     ir_node *pred = get_irn_out(node, i);
 312
 313     key.irn = pred;
 314     if (!is_loop_invariant(pred, loop_head)         &&
 315             set_find( l_n, &key, sizeof(key), HASH_PTR(key.irn)) == NULL ) {
 316       find_loop_nodes(pred, loop_head, l_n);
 317     }
 318   }
 319
 320   /* Add all ins if they are within the loop. */
 321   for (i = get_irn_arity(node) - 1; i >=0; --i) {
 322     ir_node *pred = get_irn_n(node, i);
 323
 324     key.irn = pred;
 325     if (!is_loop_invariant(pred, loop_head)         &&
 326               set_find( l_n, &key, sizeof(key), HASH_PTR(key.irn)) == NULL ) {
 327       find_loop_nodes(pred, loop_head, l_n);
 328     }
 329   }
 330 }
 331
 332 /**
 333  * Make a new loop head if copy_head = 1.
 334  *
 335  * @param l_n              A set, where the node of the loop are saved.
 336  * @param info             Contains the loop information.
 337  * @param value            A element of the set.
 338  * @param unroll_factor    A integer 2 <= unroll_factor <= 4.
 339  * @param copy_head        if non-zero, the loop head will be copied
 340  *
 341  */
 342 static void
 343 new_loop_head(set *l_n, induct_var_info *info, copies_t *value, int unroll_factor, int copy_head)
 344 {
 345   int i;
 346   copies_t block, *value_backedge_jmp, *backedge_jmp_block;
 347   /* The  predecessor of the loop head in the  backedge position */
 348   ir_node *backedge_jmp = get_Block_cfgpred(value->irn, info->op_pred_pos);
 349
 350   block.irn          = backedge_jmp;
 351   value_backedge_jmp = set_find(l_n, &block, sizeof(block), HASH_PTR(block.irn));
 352
 353   if (copy_head) {
 354     /* The first copy of the loop head must point to the loop head.*/
 355     value->copy[0] = new_Block(1, &backedge_jmp);
 356
 357     for (i = 1; i < unroll_factor - 1; ++i) {
 358       /* all other copies must point to the copy before it in the array. */
 359       value->copy[i] = new_Block(1, &value_backedge_jmp->copy[i-1]);
 360     }
 361   }
 362   else {
 363     /* If the loop head must not be copy. block.irn is the successor of the loop head.*/
 364     block.irn          = get_nodes_block(backedge_jmp);
 365     backedge_jmp_block = set_find(l_n, &block, sizeof(block), HASH_PTR(block.irn));
 366
 367     /* The first copy of block.irn point to it.
 368       The another copies muss point to the copy before it in the array.*/
 369     set_irn_n(backedge_jmp_block->copy[0], 0, value_backedge_jmp->irn) ;
 370
 371     for (i = 1; i < unroll_factor - 1; ++i)
 372       set_irn_n(backedge_jmp_block->copy[i], 0, value_backedge_jmp->copy[i - 1]);
 373   }
 374 }
 375
 376 /* Set all copies of the induction variable.
 377  *
 378  * @param phi             A phi node in the loop head block.
 379  * @param phi_pred        The predecessor of the phi along the backedge.
 380  * @param unroll_factor   A integer 2 <= unroll_factor <= 4.
 381  */
 382 static void
 383 set_Phi_copies(copies_t *phi, copies_t *phi_pred, int unroll_factor)
 384 {
 385   int p;
 386
 387   /* The first copy of Phi node get the node along the backedge as predecessor. The next copies
 388      the copies of this node.*/
 389   phi->copy[0] = phi_pred->irn;
 390   for (p = 1; p < unroll_factor - 1; ++p) {
 391     /* If two phi nodes are in cycle.  */
 392     if (phi_pred->copy[p - 1] == NULL && get_irn_op(phi_pred->irn) == op_Phi) {
 393       if (p & 1)
 394               phi->copy[p] =  phi->irn;
 395       else
 396               phi->copy[p] =  phi_pred->irn;
 397     } else
 398       phi->copy[p] =  phi_pred->copy[p - 1];
 399   }
 400 }
 401
 402 /**
 403  * Decide if the loop head must be copied. A head with important nodes
 404  * must be copied.
 405  *
 406  * @param l_n         A set, where the node of the loop are saved.
 407  * @param info        Contains information about the induction variable.
 408  *
 409  * @return non-zero if the loop head must be copied
 410  */
 411 static int
 412 loop_head_nodes(set *l_n, induct_var_info *info)
 413 {
 414   int must_copy = 0;
 415   copies_t *value;
 416   ir_node *loop_head = get_loop_node(info->l_itervar_phi, 0);
 417
 418   for (value = set_first(l_n); value != NULL; value = set_next(l_n))
 419     if (is_no_Block(value->irn) && get_nodes_block(value->irn) == loop_head)
 420       /* If the loop head contains just this nodes, than must not be copy. */
 421       switch (get_irn_opcode(value->irn)) {
 422       case iro_Cond:
 423               break;
 424       case iro_Phi:
 425               break;
 426       case iro_Proj:
 427               break;
 428       case iro_Const:
 429               break;
 430       case iro_Cmp:
 431               break;
 432       default:
 433               must_copy = 1;
 434       }
 435     return must_copy;
 436 }
 437
 438 /** Copy all loop nodes.
 439  *
 440  * @param l_n             Contains all nodes of the loop.
 441  * @param info            Contains the loop information.
 442  * @param unroll_factor   A integer 2 <= unroll_factor <= 4.
 443  */
 444 static void
 445 copy_loop_body(set *l_n, induct_var_info *info, int unroll_factor)
 446 {
 447   int i;
 448   copies_t *value, *info_op, *phi, *loop_h = NULL, key, *value_block;
 449
 450   ir_node *loop_head = get_loop_node(info->l_itervar_phi, 0);
 451
 452   for (value = set_first(l_n); value != NULL; value = set_next(l_n)) {
 453     if(value->irn == loop_head)
 454       loop_h = value;
 455     else if (is_Phi_in_loop_head(value->irn, loop_head))
 456       phi = value;
 457     else if (copy_loop_head){
 458       /* The loop head must be copied. */
 459       for (i = 0; i < unroll_factor - 1; i++){
 460               copy_node(value->irn, NULL);
 461               value->copy[i] = get_irn_link(value->irn);
 462       }
 463     } else {
 464       /* The loop head and its nodes must not be copied. */
 465       if((value->irn->op == op_Block             &&
 466                 value->irn != loop_head)               ||
 467                (value->irn->op != op_Block             &&
 468                 get_nodes_block(value->irn) != loop_head)) {
 469               for (i = 0; i<unroll_factor -1; i++){
 470                 copy_node(value->irn, NULL);
 471                 value->copy[i] = get_irn_link(value->irn);
 472               }
 473       }
 474     }
 475   }
 476   /* Treat the loop head block */
 477   new_loop_head(l_n, info, loop_h, unroll_factor, copy_loop_head);
 478
 479   /* Similarly treat the Phis in the loop head block. info->op is the node
 480      along the backedges. */
 481   key.irn = info->op;
 482   info_op = set_find( l_n, &key, sizeof(key), HASH_PTR(key.irn));
 483   assert(info_op->irn == get_Phi_pred(info->itervar_phi, info->op_pred_pos));
 484   for (i = 0; i < get_irn_n_outs(loop_head); ++i) {
 485     ir_node *phi = get_irn_out(loop_head, i);
 486
 487     if (is_Phi(phi)) {
 488       copies_t *phi_pred, *phi_op;
 489
 490       key.irn  = get_Phi_pred(phi, info->op_pred_pos); // info->op;
 491       phi_pred = set_find(l_n, &key, sizeof(key), HASH_PTR(key.irn));
 492       key.irn  = phi;
 493       phi_op   = set_find(l_n, &key, sizeof(key), HASH_PTR(key.irn));
 494       set_Phi_copies(phi_op, phi_pred, unroll_factor);
 495     }
 496   }
 497
 498   for (value = set_first(l_n); value != NULL; value = set_next(l_n)) {
 499     /* Set the predecessors of the copies. */
 500     if (copy_loop_head)
 501       set_preds(l_n, value, info, unroll_factor, NULL);
 502     else if ((value->irn->op != op_Block) && get_nodes_block(value->irn) != loop_head)
 503       set_preds(l_n, value, info, unroll_factor, NULL);
 504
 505     if (is_Phi_in_loop_head(value->irn, loop_head))
 506       /* Set the backedge of phis in the loop head. */
 507       set_phi_backedge(l_n, value, info, unroll_factor);
 508
 509     if ((value->irn->op != op_Block) && !is_Phi_in_loop_head(value->irn, loop_head)) {
 510       ir_node *nodes_block = get_nodes_block(value->irn);
 511
 512       if (!copy_loop_head && nodes_block == loop_head)
 513         continue;
 514
 515       key.irn = nodes_block;
 516       value_block = set_find( l_n, &key, sizeof(key), HASH_PTR(key.irn));
 517       /* Set the copy of the node in the accordant copy of its block. */
 518       for(i = 0; i < unroll_factor - 1; i++){
 519         set_nodes_block(value->copy[i], value_block->copy[i]);
 520         //add_End_keepalive(get_irg_end(current_ir_graph), value->copy[p]);
 521       }
 522     }
 523   }
 524 }
 525 /**
 526  * Returns non-zero if a Proj node from the loop has a predecessor that
 527  * can throw an exception.
 528  *
 529  * @param node  A Proj node from the loop.
 530  *
 531  * @return non-zero if the predecessor of the Proj node can throw an exception
 532  */
 533 static int is_exception_possible(ir_node *node)
 534 {
 535   ir_node *pred = get_Proj_pred(node);
 536
 537   /* only fragile ops can throw an exception */
 538   if (! is_fragile_op(pred))
 539     return 0;
 540
 541   /*
 542    * fragile ops that are known NOT to throw
 543    * an exception if their pin_state is op_pin_state_floats
 544    */
 545   return get_irn_pinned(pred) != op_pin_state_floats;
 546 }
 547
 548 /**
 549  * If a node from the loop is predecessor of the end block, then the end block must get all copies
 550  * of this node as predecessors. This is possible with function calls in the unrolling loop.
 551  *
 552  * @param end_block          The end block.
 553  * @param loop_head          The head of the unrolling loop.
 554  * @param l_n                Contains all nodes of the loop.
 555  * @param loop_endblock_outs The set loop_endblock_outs contains all predecessors
 556  *                           of the end block from the unrolling loop.
 557  * @param unroll_factor      A integer 2 <= unroll_factor <= 4.
 558  */
 559 static void
 560 new_end_block(ir_node *end_block, ir_node *loop_head, set *l_n, set *loop_endblock_outs, int unroll_factor)
 561 {
 562   copies_t key, *value;
 563   int set_el, new_preds, all_new_preds, i, q;
 564   int old_preds = get_Block_n_cfgpreds(end_block);   /* All old predecessors of the end block. */
 565   ir_node **all_in;
 566
 567   for (i = 0; i < old_preds; i++) {
 568     ir_node *pred = get_Block_cfgpred(end_block, i);
 569      key.irn = pred;
 570      value = NULL;
 571      value = set_find( l_n, &key, sizeof(key), HASH_PTR(key.irn));
 572
 573      /* If a predecessor of the end block is a Proj from the unrolling loop (for function calls) .*/
 574      if (get_irn_op(pred) == op_Proj && is_exception_possible(pred) && value != NULL &&
 575                !(!copy_loop_head && get_nodes_block(pred) == loop_head))
 576        value =  set_insert(loop_endblock_outs, &key, sizeof(key), HASH_PTR(key.irn));
 577   }
 578
 579   /* The set loop_endblock_outs contains all predecessors of the end block from the unrolling loop,
 580    * set_el the amount of such nodes.
 581    */
 582   set_el = set_count (loop_endblock_outs);
 583
 584   /* If the end block haven't such predecessors, we are finished. */
 585   if (!set_el) return;
 586
 587   new_preds = (unroll_factor - 1) * set_el;          /* All new predecessors of the end block */
 588   all_new_preds = old_preds + new_preds;             /* All predecessors of this block. */
 589   all_in = alloca(sizeof(*all_in) * all_new_preds);  /* A array with size for all predecessors of this block. */
 590
 591   for (i = 0; i < old_preds; i++)
 592     all_in[i] = get_Block_cfgpred(end_block, i);    /* The old predecessors. */
 593
 594   value = set_first(loop_endblock_outs);
 595
 596   for (; value != NULL; value = set_next(loop_endblock_outs)) {
 597     key.irn = value->irn;
 598     value = set_find( l_n, &key, sizeof(key), HASH_PTR(key.irn));
 599     for (q = 0; q < unroll_factor - 1 ; q++) {
 600       all_in[i++] = value->copy[q];                 /* The new predecessors. */
 601     }
 602
 603   }
 604
 605   /* Replace the old predecessors of the end block wit´h the new ones. */
 606   set_irn_in(end_block, all_new_preds, all_in);
 607 }
 608
 609 /** new_after_loop_block
 610  *
 611  * The after loop block must examine the possible exceptions in the loop.
 612  * If a (Proj) node from the loop is predecessor of this block, then the
 613  * after loop block must have as well all copies of this node as predecessors.
 614  *
 615  * @param l_n             Contains all nodes of the loop.
 616  * @block block           A block after the loop.
 617  * @param loop_in         A node from the loop, that is predecessor of the end block.
 618  * @param unroll_factor   An integer 2 <= unroll_factor <= 4.
 619  */
 620 static void
 621 new_after_loop_block (set *l_n, ir_node* block, copies_t *loop_in, int unroll_factor)
 622 {
 623   copies_t key, *value;
 624   int i, p, q, s, old_preds, new_preds, all_new_preds ;
 625   ir_node **all_in;
 626
 627   /* The node from the unrolling loop must be a Proj. */
 628   if (loop_in->irn->op != op_Proj) return;
 629
 630   old_preds = get_Block_n_cfgpreds(block);     /* All old predecessors of this block. */
 631   new_preds = old_preds * (unroll_factor - 1); /* All new predecessors of this block. */
 632   all_new_preds = old_preds + new_preds;       /* All predecessors of this block. */
 633
 634   all_in = alloca(sizeof(*all_in) * all_new_preds); /* An array with size for all predecessors of this block. */
 635
 636   for (i = 0 ; i < old_preds; i++)
 637     all_in[i] = get_Block_cfgpred(block, i);      /* The old predecessors. */
 638
 639   q = old_preds;
 640   for (i = 0; i < old_preds; i++){
 641     key.irn = all_in[i];
 642     value = set_find( l_n, &key, sizeof(key), HASH_PTR(key.irn));
 643     p = 0;
 644     for (s = 0; s < (unroll_factor - 1); s++) {
 645       all_in[q++] = value->copy[p++];          /* The new predecessors. */
 646     }
 647   }
 648   /* Replace the old predecessors of the end block whit the new one. */
 649   set_irn_in(block, all_new_preds, all_in);
 650 }
 651
 652 /** new_after_loop_node
 653  *
 654  * An after loop node (phi or call) must examine the possible exceptions in the loop.
 655  * If a (Proj) node from the loop is predecessor of this node, then the after loop
 656  * node must have as well all copies of this node as predecessors.
 657  *
 658  * @param l_n             Contains all nodes of the loop.
 659  * @param loop_outs       Contains nodes after the loop,that have as predecessor a node from the loop.
 660  * @block node            A node after the loop.
 661  * @param loop_in         A node (Proj) from the loop, that is predecessor of *node.
 662  * @param unroll_factor   An integer 2 <= unroll_factor <= 4.
 663  */
 664 static void
 665 new_after_loop_node(set *l_n, set *loop_outs, ir_node *node, copies_t *loop_in, int unroll_factor)
 666 {
 667   ir_node *pred, *block_pred = NULL, *node_block, *new_phi;
 668   int phi = 0, old_preds, new_preds, all_new_preds, p, q, i, s;
 669   copies_t key, *value = NULL;
 670   ir_node **all_in;
 671
 672   old_preds = get_irn_arity(node);            /* All old predecessors of this node. */
 673   new_preds =old_preds * (unroll_factor - 1); /* All new predecessors of this block. */
 674   all_new_preds = old_preds + new_preds;      /* All predecessors of this block. */
 675
 676   all_in  = alloca(sizeof(*all_in) * all_new_preds); /* An array with size for all predecessors of this block. */
 677
 678   /* Verification Predecessors, successors and operation of node and loop_in.
 679    * loop_in must be a Proj node.
 680    */
 681   if (loop_in->irn->op != op_Proj) return;
 682   /* Node must be operation Phi with mode memory or a Call node. */
 683   if (get_irn_op(node) == op_Phi  &&
 684       get_irn_mode(node) == mode_M){
 685     /* If node is a Phi node,then must have a Call node as successor. */
 686     for (i = 0; i < get_irn_n_outs(node); i++)
 687       if (get_irn_op(get_irn_out(node, i)) == op_Call) {
 688               phi = 1;
 689         break;
 690       }
 691
 692     if (!phi) return;
 693   }
 694
 695   /* The predecessor of loop_in must not be a loop invariant. */
 696   pred = get_Proj_pred(loop_in->irn);
 697   key.irn = pred;
 698   value = set_find( l_n, &key, sizeof(key), HASH_PTR(key.irn));
 699   if (! value) return;
 700
 701   node_block = get_nodes_block(node);
 702
 703   /* The block of node must also have a (Proj) predecessor from the unrolling loop. */
 704   for (i = 0; i < get_Block_n_cfgpreds(node_block); i++) {
 705     block_pred = get_Block_cfgpred(node_block, i);
 706
 707     if (get_irn_op(block_pred) == op_Proj) {
 708       if (get_Proj_pred(block_pred) == pred)
 709         break;
 710     }
 711     else {
 712       block_pred = NULL;
 713     }
 714   }
 715
 716   if (! block_pred) return;
 717
 718   key.irn = block_pred;
 719   value   = set_find(l_n, &key, sizeof(key), HASH_PTR(key.irn));
 720
 721   if (! value)
 722     return;
 723   else
 724     new_after_loop_block(l_n, node_block, value, unroll_factor);
 725
 726   for (i = 0; i < old_preds; ++i)
 727     all_in[i] = get_irn_n(node, i);  /* The old predecessors. */
 728
 729   q = old_preds;
 730   for (i = 0; i < old_preds; ++i) {
 731     key.irn = all_in[i];
 732     value = set_find( l_n, &key, sizeof(key), HASH_PTR(key.irn));
 733     p = 0;
 734     for (s = 0; s < (unroll_factor - 1); ++s) {
 735       all_in[q] = value->copy[p];  /* The new predecessors. */
 736       p++;
 737       q++;
 738     }
 739   }
 740   /* A new phi node with the new predecessors. */
 741   new_phi = new_r_Phi(current_ir_graph, get_nodes_block(node), all_new_preds,all_in,
 742                        get_irn_mode(node));
 743
 744   if (phi)
 745     exchange(node, new_phi);
 746   else{
 747     for (i = 0; i < get_irn_arity(node); ++i)
 748       if (get_irn_n(node, i) == pred)
 749         set_irn_n(node, i, new_phi);
 750   }
 751
 752   /* The set loop_outs contains the visited nodes and their blocks. */
 753   key.irn = node;
 754   value = set_insert(loop_outs, &key, sizeof(key), HASH_PTR(key.irn));
 755   key.irn = get_nodes_block(node);
 756   value = set_insert(loop_outs, &key, sizeof(key), HASH_PTR(key.irn));
 757 }
 758
 759
 760 /* Set the outs of the unrolling loop. All loop outs of a node muss now
 761  * point to the last copy of it. Just phi nodes in the loop head and proj
 762  * nodes save it outs. The all copies of some Projs  have too outs.
 763  *
 764  * @param l_n                Contains all nodes of the loop.
 765  * @param loop_outs          The set contains the visited and changed loop outs.
 766  * @param loop_endblock_outs The set loop_endblock_outs contains all predecessors
 767  *                           of the end block from the unrolling loop.
 768  * @param info               Contains the loop information.
 769  * @param unroll_factor      A integer 2 <= unroll_factor <= 4.
 770  */
 771 static void
 772 set_loop_outs(set *l_n, set *loop_outs, set *loop_endblock_outs, induct_var_info *info, int unroll_factor)
 773 {
 774   copies_t *value, key;
 775   int i, p;
 776   ir_node *loop_head = get_loop_node(info->l_itervar_phi, 0);
 777   ir_node *end_block = get_irg_end_block(current_ir_graph);
 778
 779   for (value = set_first(l_n); value != NULL; value = set_next(l_n))
 780     if (! is_Phi_in_loop_head(value->irn, loop_head) &&
 781         (get_irn_opcode(value->irn) == iro_Proj && value->copy[0] != NULL))
 782       for (i = 0; i < get_irn_n_outs(value->irn); ++i) {
 783         key.irn = get_irn_out(value->irn, i);
 784
 785         /* Search for loop outs. */
 786         if (set_find( l_n, &key, sizeof(key), HASH_PTR(key.irn)) == NULL)
 787           if ((key.irn->op == op_Block && get_Block_dom_depth(key.irn)  >
 788                get_Block_dom_depth(loop_head))                          ||
 789               (key.irn->op != op_Block && get_Block_dom_depth(get_nodes_block(key.irn)) >
 790                get_Block_dom_depth(loop_head))) {
 791
 792             for (p = 0; p < get_irn_arity(key.irn); p++)
 793               if (value->irn == get_irn_n(key.irn, p)) {
 794                               if (get_irn_op(value->irn) == op_Proj && is_exception_possible(value->irn)) {
 795                     if (set_find(loop_outs, &key, sizeof(key), HASH_PTR(key.irn)) == NULL) {
 796                       /* If the loop out is for exceptions in the loop. */
 797                       if ((get_irn_op(key.irn) == op_Phi && get_irn_mode(key.irn) == mode_M) ||
 798                         get_irn_op(key.irn) == op_Call)
 799                         new_after_loop_node(l_n,loop_outs, key.irn, value, unroll_factor);
 800                       else
 801                         continue;
 802                     } else
 803                       continue;
 804                   } else
 805                     set_irn_n(key.irn, p, value->copy[unroll_factor-2]);
 806               }
 807           }
 808       }
 809   /* This function searches for loop outs associated with function call in the unrolling loop. */
 810   new_end_block (end_block, loop_head, l_n, loop_endblock_outs, unroll_factor);
 811 }
 812
 813 /**
 814  * Unroll the loop body with a factor that must be power of two.
 815  * Called from a post walker.
 816  *
 817  * @param n        An IR node.
 818  * @param env      points to a result
 819  */
 820 static void do_loop_unroll(ir_node *n, void *env)
 821 {
 822   int *unroll_done = env;
 823   induct_var_info info;
 824   copies_t *value;
 825   set *loop_nodes, *loop_outs, *loop_endblock_outs;
 826   ir_node *cmp_out, *phi_init, *loop_head;
 827   ir_node *backedge_jmp;
 828   int l_sons = 0, unroll_factor = 0;
 829   int cmp_typ, init, iter_end, iter_increment, diff, iter_number;
 830   int backedge_pos;
 831
 832   copy_loop_head = 0;
 833   info.itervar_phi = n;
 834
 835   /* The IR node must be a induction variable. */
 836   if (get_irn_op(n) == op_Phi) {
 837     if (is_induction_variable (&info) == NULL)
 838       return;
 839   }
 840   else
 841     return;
 842
 843   /* Brute force limiting of loop body size. */
 844   if (get_loop_n_nodes(info.l_itervar_phi) > 2 ) return;
 845
 846   /* Only unroll loops that compare against a constant for exiting. */
 847   if (info.cmp == NULL) return;
 848
 849   /* We only want to unroll innermost loops. */
 850   l_sons = get_loop_n_sons(info.l_itervar_phi);
 851   if ( l_sons != 0)
 852     return;
 853
 854   cmp_out  = get_irn_out(info.cmp, 0);
 855   phi_init = get_Phi_pred(info.itervar_phi, info.init_pred_pos);
 856
 857   if (! is_Proj(cmp_out)) return;
 858   if (get_irn_op(info.increment) != op_Const   ||
 859       get_irn_op(phi_init) != op_Const         ||
 860       get_irn_op(info.cmp_const) != op_Const) return;
 861
 862   cmp_typ        = get_Proj_proj(cmp_out);
 863   init           = get_tarval_long(get_Const_tarval(phi_init));
 864   iter_end       = get_tarval_long(get_Const_tarval(info.cmp_const));
 865   iter_increment = get_tarval_long(get_Const_tarval(info.increment));
 866
 867   if (iter_end < init){
 868     int p = iter_end;
 869     iter_end = init;
 870     init = p;
 871   }
 872
 873   iter_increment = iter_increment < 0 ? -iter_increment : iter_increment;
 874   diff = iter_end - init;
 875
 876   if (diff == 0 || iter_increment == 0) return;
 877   /* Test for the value of unroll factor. */
 878   iter_number = diff/iter_increment;
 879   if ((cmp_typ == pn_Cmp_Le || cmp_typ == pn_Cmp_Ge) && (iter_end % iter_increment == 0))
 880     iter_number ++;
 881
 882   if ((iter_number & 3) == 0)
 883     unroll_factor = 4;
 884   else if ((iter_number % 3) == 0)
 885     unroll_factor = 3;
 886   else if ((iter_number & 1) == 0)
 887     unroll_factor = 2;
 888   else return;
 889
 890   if (get_firm_verbosity())
 891     printf("\nloop unrolling with factor %d \n", unroll_factor);
 892
 893   /* ok, we will do unrolling */
 894   *unroll_done += 1;
 895
 896   /* The unroll factor must be less than 4. */
 897   assert(unroll_factor <= MAX_UNROLL);
 898
 899   loop_head = (is_natural_loop(&info)) ? get_loop_node(info.l_itervar_phi, 0) : NULL;
 900
 901   assert(loop_head != NULL && is_Block(loop_head));
 902
 903   /* We assume, the loop head has exactly one backedge.  The position of
 904      the backedge is in the following variable: */
 905   backedge_pos = (is_backedge(loop_head, 0)) ? 0:1;
 906
 907   /* A set with the nodes to copy. */
 908   loop_nodes = new_set(set_cmp, 8);
 909   /* A set with the loop outs for exceptions. */
 910   loop_outs =  new_set(set_cmp, 8);
 911
 912   /* A set containing all predecessors
 913      of the end block from the unrolling loop. */
 914   loop_endblock_outs = new_set(set_cmp, 8);
 915
 916   /* Find all nodes of the unrolling loop. */
 917   find_loop_nodes(info.itervar_phi, get_loop_node(info.l_itervar_phi, 0), loop_nodes);
 918
 919   /* Decide if the loop head to be copy.*/
 920   copy_loop_head = loop_head_nodes(loop_nodes, &info);
 921
 922   /* Copy all nodes of the unrolling loop, that muss be copy. */
 923   copy_loop_body(loop_nodes, &info, unroll_factor);
 924
 925   backedge_jmp = get_Block_cfgpred(loop_head, backedge_pos);
 926
 927   /* Set the backedge of the loop head. */
 928   for (value = set_first(loop_nodes); value != NULL; value = set_next(loop_nodes)) {
 929     if (value->irn == backedge_jmp)
 930       set_Block_cfgpred(loop_head, backedge_pos, value->copy[unroll_factor-2]);
 931   }
 932   set_loop_outs(loop_nodes, loop_outs, loop_endblock_outs, &info, unroll_factor);
 933 }
 934
 935 /* Performs loop unrolling for the passed graph. */
 936 void optimize_loop_unrolling(ir_graph *irg /* unroll factor, max body size */)
 937 {
 938   ir_graph *rem;
 939   int unroll_done = 0;
 940
 941   if ( !get_opt_loop_unrolling()) return;
 942
 943   rem = current_ir_graph;
 944   current_ir_graph = irg;
 945
 946   /* -- Precompute some information -- */
 947
 948   /* Call algorithm that computes the backedges */
 949   construct_cf_backedges(irg);
 950
 951   /* Call algorithm that computes the dominator trees. */
 952   compute_doms(irg);
 953
 954   /* Call algorithm that computes the out edges */
 955   compute_irg_outs(irg);
 956
 957   collect_phiprojs(irg);
 958
 959   /* -- Search expressions that can be optimized -- */
 960   irg_walk_graph(irg, NULL, do_loop_unroll, &unroll_done);
 961
 962   if (unroll_done) {
 963     /* unrolling was done, all info is invalid */
 964     set_irg_dom_inconsistent(irg);
 965     set_irg_outs_inconsistent(irg);
 966     set_irg_loopinfo_state(current_ir_graph, loopinfo_cf_inconsistent);
 967     set_trouts_inconsistent();
 968     set_irg_callee_info_state(irg, irg_callee_info_inconsistent);
 969   }
 970
 971   current_ir_graph = rem;
 972 }