nsz Git - libfirm/blob - ir/opt/loop_unrolling.c

   1 /**
   2  *
   3  * @file loop_unrolling.c
   4  *
   5  * Project:     libFIRM
   6  * File name:   ir/opt/loop_unrolling.c
   7  * Purpose:     Make loop unrolling.
   8  * Author:      Beyhan Veliev
   9  * Modified by:
  10  * Created:     16.11.2004
  11  * CVS-ID:      $Id$
  12  * Copyright:   (c) 2004 Universität Karlsruhe
  13  * Licence:     This file protected by GPL -  GNU GENERAL PUBLIC LICENSE.
  14  */
  15 #ifdef HAVE_CONFIG_H
  16 #include "config.h"
  17 #endif
  18
  19 #ifdef HAVE_STRING_H
  20 #include <string.h>
  21 #endif
  22 #ifdef HAVE_MALLOC_H
  23 #include <malloc.h>
  24 #endif
  25 #ifdef HAVE_ALLOCA_H
  26 #include <alloca.h>
  27 #endif
  28
  29 # include "loop_unrolling.h"
  30
  31 # include "irgwalk.h"
  32 # include "ircons.h"
  33 # include "irgmod.h"
  34 # include "irloop_t.h"
  35 # include "irgopt_t.h"
  36 # include "irnode_t.h"
  37 # include "irouts.h"
  38 # include "hashptr.h"
  39 # include "pset.h"
  40 # include "strength_red.h"
  41
  42 /* We will unroll maximal 4-times.  */
  43 #define MAX_UNROLL 4
  44
  45 /* We will know if the head of the loop to be copy.
  46  * Default "0"  don't copy.*/
  47 static int copy_loop_head;
  48
  49 typedef struct {
  50   ir_node *irn ;                /* Node of the loop to be unrolling*/
  51   ir_node *copy[MAX_UNROLL] ;   /* The copy of the node */
  52 } copies_t;
  53
  54 /**
  55  * Compare two elements of the copies_t set
  56  */
  57 static int set_cmp(const void *elt, const void *key, size_t size)
  58 {
  59   const copies_t *c1 = elt;
  60   const copies_t *c2 = key;
  61
  62   return c1->irn != c2->irn;
  63 }
  64
  65 static INLINE int * new_backedge_arr(struct obstack *obst, int size)
  66 {
  67   int *res = NEW_ARR_D (int, obst, size);
  68   memset(res, 0, sizeof(int) * size);
  69   return res;
  70 }
  71
  72 static INLINE void new_backedge_info(ir_node *n) {
  73   switch(get_irn_opcode(n)) {
  74   case iro_Block:
  75     n->attr.block.cg_backedge = NULL;
  76     n->attr.block.backedge = new_backedge_arr(current_ir_graph->obst, get_irn_arity(n));
  77     break;
  78   case iro_Phi:
  79     n->attr.phi_backedge = new_backedge_arr(current_ir_graph->obst, get_irn_arity(n));
  80     break;
  81   case iro_Filter:
  82     n->attr.filter.backedge = new_backedge_arr(current_ir_graph->obst, get_irn_arity(n));
  83     break;
  84   default: ;
  85   }
  86 }
  87
  88 /**
  89  * Remember the new node in the old node by using a field all nodes have.
  90  */
  91 static INLINE void
  92 set_new_node (ir_node *old, ir_node *new)
  93 {
  94   old->link = new;
  95 }
  96
  97 /**
  98  * Copies the node to the new obstack. The Ins of the new node point to
  99  * the predecessors on the old obstack.  For block/phi nodes not all
 100  * predecessors might be copied.  n->link points to the new node.
 101  * For Phi and Block nodes the function allocates in-arrays with an arity
 102  * only for useful predecessors.  The arity is determined by counting
 103  * the non-bad predecessors of the block.
 104  *
 105  * @param n    The node to be copied
 106  * @param env  if non-NULL, the node number attribute will be copied to the new node
 107  */
 108 static void
 109 copy_node (ir_node *n, void *env)
 110 {
 111   ir_node *nn;
 112   int new_arity;
 113   ir_op *op = get_irn_op(n);
 114   int copy_node_nr = false;
 115
 116   /* The end node looses it's flexible in array.  This doesn't matter,
 117      as dead node elimination builds End by hand, inlineing doesn't use
 118      the End node. */
 119   /* assert(op == op_End ||  ((_ARR_DESCR(n->in))->cookie != ARR_F_MAGIC)); */
 120
 121   if (op == op_Bad)
 122     /* node copied already */
 123     return;
 124
 125   new_arity = get_irn_arity(n);
 126
 127   nn = new_ir_node(get_irn_dbg_info(n),
 128            current_ir_graph,
 129            NULL,            /* no block yet, will be set later */
 130            op,
 131            get_irn_mode(n),
 132            new_arity,
 133            get_irn_in(n));
 134
 135
 136   /* Copy the attributes.  These might point to additional data.  If this
 137      was allocated on the old obstack the pointers now are dangling.  This
 138      frees e.g. the memory of the graph_arr allocated in new_immBlock. */
 139   copy_node_attr(n, nn);
 140   new_backedge_info(nn);
 141   set_new_node(n, nn);
 142
 143 #if DEBUG_libfirm
 144   if (copy_node_nr) {
 145     /* for easier debugging, we want to copy the node numbers too */
 146     nn->node_nr = n->node_nr;
 147   }
 148 #endif
 149
 150 }
 151 /*Check if phi in the loop head is.
 152  *
 153  * @param phi               Must be a phi node from the loop.
 154  * @param loop_head         The loop head .
 155  */
 156 static int is_Phi_in_loop_head(ir_node *phi, ir_node *loop_head) {
 157   assert(is_Block(loop_head));
 158   return is_Phi(phi) && (get_nodes_block(phi) == loop_head);
 159 }
 160
 161 /**
 162  * Copies predecessors of node to copies of node.
 163  * If the predecessor is a loop invariant, then the copy get it
 164  * as predecessor, else the copy of the predecessor.
 165  *
 166  * @param l_n                A set, where the node of the loop are saved.
 167  * @param value              A element of the set.
 168  * @param info               Contains information about the induction variable.
 169  * @param unroll_factor      A integer 2 <= unroll_factor <= 4.
 170  * @param env                Free environment pointer.
 171  */
 172 static void
 173 set_preds (set *l_n, copies_t *value, induct_var_info *info, int unroll_factor, void *env)
 174 {
 175   int i, p, irn_arity;
 176   copies_t key, *value_pred;
 177   // The head of the unrolling loop.
 178   ir_node *loop_head = get_loop_node(info->l_itervar_phi, 0);
 179
 180   irn_arity = get_irn_arity(value->irn);
 181
 182   for (i = 0; i < irn_arity; i++) {
 183     ir_node *pred = get_irn_n(value->irn, i);
 184
 185     key.irn = pred;
 186     value_pred = set_find(l_n, &key, sizeof(key), HASH_PTR(pred));
 187
 188     if (value->irn != loop_head && !is_Phi_in_loop_head(value->irn, loop_head)) {
 189       if (value_pred == NULL) {
 190               /* Is loop invariant. */
 191               for (p = 0; p < unroll_factor - 1; p++)
 192                 set_irn_n(value->copy[p], i, pred);
 193               /* pred is a loop invariant. The copies of the successors get it as predecessor. */
 194       }
 195       else {
 196               for (p = 0; p < unroll_factor - 1; p++)
 197                 set_irn_n(value->copy[p], i, value_pred->copy[p]);
 198           /* value_pred->irn is a node from the unrolling loop.
 199            * The copies of the successors get the
 200                  * copies of value_pred as predecessor.
 201            */
 202       }
 203     }
 204   }
 205 }
 206
 207 /* Set the backedge of phi in the loop head.The backedge of phis in the loop head
 208  * must now point to the value defined
 209  * in the last copy of the loop body.
 210  *
 211  * @param l_n                A set, where the node of the loop are saved.
 212  * @param value              A phi in the loop head.
 213  * @param info               Contains information about the induction variable.
 214  * @param unroll_factor      A integer 2 <= unroll_factor <= 4.
 215  */
 216 static void
 217 set_phi_backedge(set *l_n, copies_t *value, induct_var_info *info, int unroll_factor)
 218 {
 219   copies_t key, *value_pred;
 220
 221   /* info->op_pred_pos is the backedge position. */
 222   key.irn = get_irn_n(value->irn, info->op_pred_pos);
 223   value_pred = set_find(l_n, &key, sizeof(key), HASH_PTR(key.irn));
 224
 225   /* value->copy[unroll_factor - 2] is the last copy. */
 226   set_Phi_pred(value->irn, info->op_pred_pos, value_pred->copy[unroll_factor - 2]);
 227 }
 228
 229 /**
 230  * Test for a loop head.
 231  *
 232  * Returns true if the node has predecessors in the loop _and_ out of
 233  * the loop.  Then it is a loop head: The loop can be entered through
 234  * this node.
 235  *
 236  * @param n      The node to be tested. Must be a block.
 237  * @param info   Contains the loop information.
 238  */
 239 static int
 240 is_loop_head(ir_node *n, induct_var_info *info)
 241 {
 242   int i, arity;
 243   int some_outof_loop = 0, some_in_loop = 0;
 244
 245   assert(get_irn_op(n) == op_Block);
 246   arity = get_Block_n_cfgpreds(n);
 247
 248   for (i = 0; i < arity; i++) {
 249     ir_node *pred = get_Block_cfgpred(n, i);
 250     assert(pred);
 251     if (is_loop_invariant(pred, get_loop_node(info->l_itervar_phi, 0))) {
 252       some_outof_loop = 1;
 253     } else
 254       some_in_loop = 1;
 255   }
 256
 257   return some_outof_loop && some_in_loop;
 258 }
 259
 260
 261 /**
 262  * Test whether the passed loop is a natural loop.
 263  *
 264  * Returns true if the loop has only one loop header and only a single
 265  * back edge.
 266  *
 267  * @param info  Contains the loop information.
 268  */
 269 static int
 270 is_natural_loop(induct_var_info *info)
 271 {
 272   ir_node *l_node;
 273   int l_n_node = 0, i;
 274
 275   l_n_node = get_loop_n_nodes (info->l_itervar_phi);
 276
 277   for (i = 1; i < (l_n_node); i ++) {
 278     l_node = get_loop_node (info->l_itervar_phi, i);
 279     if (is_loop_head(l_node, info)) return 0;
 280
 281     if (has_backedges(l_node) && i != l_n_node-1) return 0;
 282   }
 283
 284   return 1;
 285 }
 286
 287 /**
 288  * Search for all nodes of a loop.
 289  *
 290  * @param node       The induction variable of the loop.
 291  * @param loop_head  The head of the loop.
 292  * @param l_n        A set, where the node of the loop are saved.
 293  */
 294 static void
 295 find_loop_nodes(ir_node *node, ir_node *loop_head, set *l_n)
 296 {
 297   int i;
 298   copies_t key, *value;
 299
 300   /* Add this node to the list. */
 301   key.irn  = node;
 302
 303   /* Initialize all copies of the added node with NULL.*/
 304   for (i = 0; i < 4; ++i)
 305     key.copy[i] = NULL;
 306   value = set_insert(l_n, &key, sizeof(key), HASH_PTR(key.irn));
 307
 308   /* Add all outs of this node to the list, if they are within the loop. */
 309   for (i = get_irn_n_outs(node) - 1; i >= 0; --i) {
 310     ir_node *pred = get_irn_out(node, i);
 311
 312     key.irn = pred;
 313     if (!is_loop_invariant(pred, loop_head)         &&
 314             set_find( l_n, &key, sizeof(key), HASH_PTR(key.irn)) == NULL ) {
 315       find_loop_nodes(pred, loop_head, l_n);
 316     }
 317   }
 318
 319   /* Add all ins if they are within the loop. */
 320   for (i = get_irn_arity(node) - 1; i >=0; --i) {
 321     ir_node *pred = get_irn_n(node, i);
 322
 323     key.irn = pred;
 324     if (!is_loop_invariant(pred, loop_head)         &&
 325               set_find( l_n, &key, sizeof(key), HASH_PTR(key.irn)) == NULL ) {
 326       find_loop_nodes(pred, loop_head, l_n);
 327     }
 328   }
 329 }
 330
 331 /**
 332  * Make a new loop head if copy_head = 1.
 333  *
 334  * @param l_n              A set, where the node of the loop are saved.
 335  * @param info             Contains the loop information.
 336  * @param value            A element of the set.
 337  * @param unroll_factor    A integer 2 <= unroll_factor <= 4.
 338  * @param copy_head        if non-zero, the loop head will be copied
 339  *
 340  */
 341 static void
 342 new_loop_head(set *l_n, induct_var_info *info, copies_t *value, int unroll_factor, int copy_head)
 343 {
 344   int i;
 345   copies_t block, *value_backedge_jmp, *backedge_jmp_block;
 346   /* The  predecessor of the loop head in the  backedge position */
 347   ir_node *backedge_jmp = get_Block_cfgpred(value->irn, info->op_pred_pos);
 348
 349   block.irn          = backedge_jmp;
 350   value_backedge_jmp = set_find(l_n, &block, sizeof(block), HASH_PTR(block.irn));
 351
 352   if (copy_head) {
 353     /* The first copy of the loop head must point to the loop head.*/
 354     value->copy[0] = new_Block(1, &backedge_jmp);
 355
 356     for (i = 1; i < unroll_factor - 1; ++i) {
 357       /* all other copies must point to the copy before it in the array. */
 358       value->copy[i] = new_Block(1, &value_backedge_jmp->copy[i-1]);
 359     }
 360   }
 361   else {
 362     /* If the loop head must not be copy. block.irn is the successor of the loop head.*/
 363     block.irn          = get_nodes_block(backedge_jmp);
 364     backedge_jmp_block = set_find(l_n, &block, sizeof(block), HASH_PTR(block.irn));
 365
 366     /* The first copy of block.irn point to it.
 367       The another copies muss point to the copy before it in the array.*/
 368     set_irn_n(backedge_jmp_block->copy[0], 0, value_backedge_jmp->irn) ;
 369
 370     for (i = 1; i < unroll_factor - 1; ++i)
 371       set_irn_n(backedge_jmp_block->copy[i], 0, value_backedge_jmp->copy[i - 1]);
 372   }
 373 }
 374
 375 /* Set all copies of the induction variable.
 376  *
 377  * @param phi             A phi node in the loop head block.
 378  * @param phi_pred        The predecessor of the phi along the backedge.
 379  * @param unroll_factor   A integer 2 <= unroll_factor <= 4.
 380  */
 381 static void
 382 set_Phi_copies(copies_t *phi, copies_t *phi_pred, int unroll_factor)
 383 {
 384   int p;
 385
 386   /* The first copy of Phi node get the node along the backedge as predecessor. The next copies
 387      the copies of this node.*/
 388   phi->copy[0] = phi_pred->irn;
 389   for (p = 1; p < unroll_factor - 1; ++p) {
 390     /* If two phi nodes are in cycle.  */
 391     if (phi_pred->copy[p - 1] == NULL && get_irn_op(phi_pred->irn) == op_Phi) {
 392       if (p & 1)
 393               phi->copy[p] =  phi->irn;
 394       else
 395               phi->copy[p] =  phi_pred->irn;
 396     } else
 397       phi->copy[p] =  phi_pred->copy[p - 1];
 398   }
 399 }
 400
 401 /**
 402  * Decide if the loop head must be copied. A head with important nodes
 403  * must be copied.
 404  *
 405  * @param l_n         A set, where the node of the loop are saved.
 406  * @param info        Contains information about the induction variable.
 407  *
 408  * @return non-zero if the loop head must be copied
 409  */
 410 static int
 411 loop_head_nodes(set *l_n, induct_var_info *info)
 412 {
 413   int must_copy = 0;
 414   copies_t *value;
 415   ir_node *loop_head = get_loop_node(info->l_itervar_phi, 0);
 416
 417   for (value = set_first(l_n); value != NULL; value = set_next(l_n))
 418     if (is_no_Block(value->irn) && get_nodes_block(value->irn) == loop_head)
 419       /* If the loop head contains just this nodes, than must not be copy. */
 420       switch (get_irn_opcode(value->irn)) {
 421       case iro_Cond:
 422               break;
 423       case iro_Phi:
 424               break;
 425       case iro_Proj:
 426               break;
 427       case iro_Const:
 428               break;
 429       case iro_Cmp:
 430               break;
 431       default:
 432               must_copy = 1;
 433       }
 434     return must_copy;
 435 }
 436
 437 /** Copy all loop nodes.
 438  *
 439  * @param l_n             Contains all nodes of the loop.
 440  * @param info            Contains the loop information.
 441  * @param unroll_factor   A integer 2 <= unroll_factor <= 4.
 442  */
 443 static void
 444 copy_loop_body(set *l_n, induct_var_info *info, int unroll_factor)
 445 {
 446   int i;
 447   copies_t *value, *info_op, *phi, *loop_h = NULL, key, *value_block;
 448
 449   ir_node *loop_head = get_loop_node(info->l_itervar_phi, 0);
 450
 451   for (value = set_first(l_n); value != NULL; value = set_next(l_n)) {
 452     if(value->irn == loop_head)
 453       loop_h = value;
 454     else if (is_Phi_in_loop_head(value->irn, loop_head))
 455       phi = value;
 456     else if (copy_loop_head){
 457       /* The loop head must be copied. */
 458       for (i = 0; i < unroll_factor - 1; i++){
 459               copy_node(value->irn, NULL);
 460               value->copy[i] = get_irn_link(value->irn);
 461       }
 462     } else {
 463       /* The loop head and its nodes must not be copied. */
 464       if((value->irn->op == op_Block             &&
 465                 value->irn != loop_head)               ||
 466                (value->irn->op != op_Block             &&
 467                 get_nodes_block(value->irn) != loop_head)) {
 468               for (i = 0; i<unroll_factor -1; i++){
 469                 copy_node(value->irn, NULL);
 470                 value->copy[i] = get_irn_link(value->irn);
 471               }
 472       }
 473     }
 474   }
 475   /* Treat the loop head block */
 476   new_loop_head(l_n, info, loop_h, unroll_factor, copy_loop_head);
 477
 478   /* Similarly treat the Phis in the loop head block. info->op is the node
 479      along the backedges. */
 480   key.irn = info->op;
 481   info_op = set_find( l_n, &key, sizeof(key), HASH_PTR(key.irn));
 482   assert(info_op->irn == get_Phi_pred(info->itervar_phi, info->op_pred_pos));
 483   for (i = 0; i < get_irn_n_outs(loop_head); ++i) {
 484     ir_node *phi = get_irn_out(loop_head, i);
 485
 486     if (is_Phi(phi)) {
 487       copies_t *phi_pred, *phi_op;
 488
 489       key.irn  = get_Phi_pred(phi, info->op_pred_pos); // info->op;
 490       phi_pred = set_find(l_n, &key, sizeof(key), HASH_PTR(key.irn));
 491       key.irn  = phi;
 492       phi_op   = set_find(l_n, &key, sizeof(key), HASH_PTR(key.irn));
 493       set_Phi_copies(phi_op, phi_pred, unroll_factor);
 494     }
 495   }
 496
 497   for (value = set_first(l_n); value != NULL; value = set_next(l_n)) {
 498     /* Set the predecessors of the copies. */
 499     if (copy_loop_head)
 500       set_preds(l_n, value, info, unroll_factor, NULL);
 501     else if ((value->irn->op != op_Block) && get_nodes_block(value->irn) != loop_head)
 502       set_preds(l_n, value, info, unroll_factor, NULL);
 503
 504     if (is_Phi_in_loop_head(value->irn, loop_head))
 505       /* Set the backedge of phis in the loop head. */
 506       set_phi_backedge(l_n, value, info, unroll_factor);
 507
 508     if ((value->irn->op != op_Block) && !is_Phi_in_loop_head(value->irn, loop_head)) {
 509       ir_node *nodes_block = get_nodes_block(value->irn);
 510
 511       if (!copy_loop_head && nodes_block == loop_head)
 512         continue;
 513
 514       key.irn = nodes_block;
 515       value_block = set_find( l_n, &key, sizeof(key), HASH_PTR(key.irn));
 516       /* Set the copy of the node in the accordant copy of its block. */
 517       for(i = 0; i < unroll_factor - 1; i++){
 518         set_nodes_block(value->copy[i], value_block->copy[i]);
 519         //add_End_keepalive(get_irg_end(current_ir_graph), value->copy[p]);
 520       }
 521     }
 522   }
 523 }
 524 /**
 525  * Returns non-zero if a Proj node from the loop has a predecessor that
 526  * can throw an exception.
 527  *
 528  * @param node  A Proj node from the loop.
 529  *
 530  * @return non-zero if the predecessor of the Proj node can throw an exception
 531  */
 532 static int is_exception_possible(ir_node *node)
 533 {
 534   ir_node *pred = get_Proj_pred(node);
 535
 536   /* only fragile ops can throw an exception */
 537   if (! is_fragile_op(pred))
 538     return 0;
 539
 540   /*
 541    * fragile ops that are known NOT to throw
 542    * an exception if their pin_state is op_pin_state_floats
 543    */
 544   return get_irn_pinned(pred) != op_pin_state_floats;
 545 }
 546
 547 /**
 548  * If a node from the loop is predecessor of the end block, then the end block must get all copies
 549  * of this node as predecessors. This is possible with function calls in the unrolling loop.
 550  *
 551  * @param end_block          The end block.
 552  * @param loop_head          The head of the unrolling loop.
 553  * @param l_n                Contains all nodes of the loop.
 554  * @param loop_endblock_outs The set loop_endblock_outs contains all predecessors
 555  *                           of the end block from the unrolling loop.
 556  * @param unroll_factor      A integer 2 <= unroll_factor <= 4.
 557  */
 558 static void
 559 new_end_block(ir_node *end_block, ir_node *loop_head, set *l_n, set *loop_endblock_outs, int unroll_factor)
 560 {
 561   copies_t key, *value;
 562   int set_el, new_preds, all_new_preds, i, q;
 563   int old_preds = get_Block_n_cfgpreds(end_block);   /* All old predecessors of the end block. */
 564   ir_node **all_in;
 565
 566   for (i = 0; i < old_preds; i++) {
 567     ir_node *pred = get_Block_cfgpred(end_block, i);
 568      key.irn = pred;
 569      value = NULL;
 570      value = set_find( l_n, &key, sizeof(key), HASH_PTR(key.irn));
 571
 572      /* If a predecessor of the end block is a Proj from the unrolling loop (for function calls) .*/
 573      if (get_irn_op(pred) == op_Proj && is_exception_possible(pred) && value != NULL &&
 574                !(!copy_loop_head && get_nodes_block(pred) == loop_head))
 575        value =  set_insert(loop_endblock_outs, &key, sizeof(key), HASH_PTR(key.irn));
 576   }
 577
 578   /* The set loop_endblock_outs contains all predecessors of the end block from the unrolling loop,
 579    * set_el the amount of such nodes.
 580    */
 581   set_el = set_count (loop_endblock_outs);
 582
 583   /* If the end block haven't such predecessors, we are finished. */
 584   if (!set_el) return;
 585
 586   new_preds = (unroll_factor - 1) * set_el;          /* All new predecessors of the end block */
 587   all_new_preds = old_preds + new_preds;             /* All predecessors of this block. */
 588   all_in = alloca(sizeof(*all_in) * all_new_preds);  /* A array with size for all predecessors of this block. */
 589
 590   for (i = 0; i < old_preds; i++)
 591     all_in[i] = get_Block_cfgpred(end_block, i);    /* The old predecessors. */
 592
 593   value = set_first(loop_endblock_outs);
 594
 595   for (; value != NULL; value = set_next(loop_endblock_outs)) {
 596     key.irn = value->irn;
 597     value = set_find( l_n, &key, sizeof(key), HASH_PTR(key.irn));
 598     for (q = 0; q < unroll_factor - 1 ; q++) {
 599       all_in[i++] = value->copy[q];                 /* The new predecessors. */
 600     }
 601
 602   }
 603
 604   /* Replace the old predecessors of the end block wit´h the new ones. */
 605   set_irn_in(end_block, all_new_preds, all_in);
 606 }
 607
 608 /** new_after_loop_block
 609  *
 610  * The after loop block must examine the possible exceptions in the loop.
 611  * If a (Proj) node from the loop is predecessor of this block, then the
 612  * after loop block must have as well all copies of this node as predecessors.
 613  *
 614  * @param l_n             Contains all nodes of the loop.
 615  * @block block           A block after the loop.
 616  * @param loop_in         A node from the loop, that is predecessor of the end block.
 617  * @param unroll_factor   An integer 2 <= unroll_factor <= 4.
 618  */
 619 static void
 620 new_after_loop_block (set *l_n, ir_node* block, copies_t *loop_in, int unroll_factor)
 621 {
 622   copies_t key, *value;
 623   int i, p, q, s, old_preds, new_preds, all_new_preds ;
 624   ir_node **all_in;
 625
 626   /* The node from the unrolling loop must be a Proj. */
 627   if (loop_in->irn->op != op_Proj) return;
 628
 629   old_preds = get_Block_n_cfgpreds(block);     /* All old predecessors of this block. */
 630   new_preds = old_preds * (unroll_factor - 1); /* All new predecessors of this block. */
 631   all_new_preds = old_preds + new_preds;       /* All predecessors of this block. */
 632
 633   all_in = alloca(sizeof(*all_in) * all_new_preds); /* An array with size for all predecessors of this block. */
 634
 635   for (i = 0 ; i < old_preds; i++)
 636     all_in[i] = get_Block_cfgpred(block, i);      /* The old predecessors. */
 637
 638   q = old_preds;
 639   for (i = 0; i < old_preds; i++){
 640     key.irn = all_in[i];
 641     value = set_find( l_n, &key, sizeof(key), HASH_PTR(key.irn));
 642     p = 0;
 643     for (s = 0; s < (unroll_factor - 1); s++) {
 644       all_in[q++] = value->copy[p++];          /* The new predecessors. */
 645     }
 646   }
 647   /* Replace the old predecessors of the end block whit the new one. */
 648   set_irn_in(block, all_new_preds, all_in);
 649 }
 650
 651 /** new_after_loop_node
 652  *
 653  * An after loop node (phi or call) must examine the possible exceptions in the loop.
 654  * If a (Proj) node from the loop is predecessor of this node, then the after loop
 655  * node must have as well all copies of this node as predecessors.
 656  *
 657  * @param l_n             Contains all nodes of the loop.
 658  * @param loop_outs       Contains nodes after the loop,that have as predecessor a node from the loop.
 659  * @block node            A node after the loop.
 660  * @param loop_in         A node (Proj) from the loop, that is predecessor of *node.
 661  * @param unroll_factor   An integer 2 <= unroll_factor <= 4.
 662  */
 663 static void
 664 new_after_loop_node(set *l_n, set *loop_outs, ir_node *node, copies_t *loop_in, int unroll_factor)
 665 {
 666   ir_node *pred, *block_pred = NULL, *node_block, *new_phi;
 667   int phi = 0, old_preds, new_preds, all_new_preds, p, q, i, s;
 668   copies_t key, *value = NULL;
 669   ir_node **all_in;
 670
 671   old_preds = get_irn_arity(node);            /* All old predecessors of this node. */
 672   new_preds =old_preds * (unroll_factor - 1); /* All new predecessors of this block. */
 673   all_new_preds = old_preds + new_preds;      /* All predecessors of this block. */
 674
 675   all_in  = alloca(sizeof(*all_in) * all_new_preds); /* An array with size for all predecessors of this block. */
 676
 677   /* Verification Predecessors, successors and operation of node and loop_in.
 678    * loop_in must be a Proj node.
 679    */
 680   if (loop_in->irn->op != op_Proj) return;
 681   /* Node must be operation Phi with mode memory or a Call node. */
 682   if (get_irn_op(node) == op_Phi  &&
 683       get_irn_mode(node) == mode_M){
 684     /* If node is a Phi node,then must have a Call node as successor. */
 685     for (i = 0; i < get_irn_n_outs(node); i++)
 686       if (get_irn_op(get_irn_out(node, i)) == op_Call) {
 687               phi = 1;
 688         break;
 689       }
 690
 691     if (!phi) return;
 692   }
 693
 694   /* The predecessor of loop_in must not be a loop invariant. */
 695   pred = get_Proj_pred(loop_in->irn);
 696   key.irn = pred;
 697   value = set_find( l_n, &key, sizeof(key), HASH_PTR(key.irn));
 698   if (! value) return;
 699
 700   node_block = get_nodes_block(node);
 701
 702   /* The block of node must also have a (Proj) predecessor from the unrolling loop. */
 703   for (i = 0; i < get_Block_n_cfgpreds(node_block); i++) {
 704     block_pred = get_Block_cfgpred(node_block, i);
 705
 706     if (get_irn_op(block_pred) == op_Proj) {
 707       if (get_Proj_pred(block_pred) == pred)
 708         break;
 709     }
 710     else {
 711       block_pred = NULL;
 712     }
 713   }
 714
 715   if (! block_pred) return;
 716
 717   key.irn = block_pred;
 718   value   = set_find(l_n, &key, sizeof(key), HASH_PTR(key.irn));
 719
 720   if (! value)
 721     return;
 722   else
 723     new_after_loop_block(l_n, node_block, value, unroll_factor);
 724
 725   for (i = 0; i < old_preds; ++i)
 726     all_in[i] = get_irn_n(node, i);  /* The old predecessors. */
 727
 728   q = old_preds;
 729   for (i = 0; i < old_preds; ++i) {
 730     key.irn = all_in[i];
 731     value = set_find( l_n, &key, sizeof(key), HASH_PTR(key.irn));
 732     p = 0;
 733     for (s = 0; s < (unroll_factor - 1); ++s) {
 734       all_in[q] = value->copy[p];  /* The new predecessors. */
 735       p++;
 736       q++;
 737     }
 738   }
 739   /* A new phi node with the new predecessors. */
 740   new_phi = new_r_Phi(current_ir_graph, get_nodes_block(node), all_new_preds,all_in,
 741                        get_irn_mode(node));
 742
 743   if (phi)
 744     exchange(node, new_phi);
 745   else{
 746     for (i = 0; i < get_irn_arity(node); ++i)
 747       if (get_irn_n(node, i) == pred)
 748         set_irn_n(node, i, new_phi);
 749   }
 750
 751   /* The set loop_outs contains the visited nodes and their blocks. */
 752   key.irn = node;
 753   value = set_insert(loop_outs, &key, sizeof(key), HASH_PTR(key.irn));
 754   key.irn = get_nodes_block(node);
 755   value = set_insert(loop_outs, &key, sizeof(key), HASH_PTR(key.irn));
 756 }
 757
 758
 759 /* Set the outs of the unrolling loop. All loop outs of a node muss now
 760  * point to the last copy of it. Just phi nodes in the loop head and proj
 761  * nodes save it outs. The all copies of some Projs  have too outs.
 762  *
 763  * @param l_n                Contains all nodes of the loop.
 764  * @param loop_outs          The set contains the visited and changed loop outs.
 765  * @param loop_endblock_outs The set loop_endblock_outs contains all predecessors
 766  *                           of the end block from the unrolling loop.
 767  * @param info               Contains the loop information.
 768  * @param unroll_factor      A integer 2 <= unroll_factor <= 4.
 769  */
 770 static void
 771 set_loop_outs(set *l_n, set *loop_outs, set *loop_endblock_outs, induct_var_info *info, int unroll_factor)
 772 {
 773   copies_t *value, key;
 774   int i, p;
 775   ir_node *loop_head = get_loop_node(info->l_itervar_phi, 0);
 776   ir_node *end_block = get_irg_end_block(current_ir_graph);
 777
 778   for (value = set_first(l_n); value != NULL; value = set_next(l_n))
 779     if (! is_Phi_in_loop_head(value->irn, loop_head) &&
 780         (get_irn_opcode(value->irn) == iro_Proj && value->copy[0] != NULL))
 781       for (i = 0; i < get_irn_n_outs(value->irn); ++i) {
 782         key.irn = get_irn_out(value->irn, i);
 783
 784         /* Search for loop outs. */
 785         if (set_find( l_n, &key, sizeof(key), HASH_PTR(key.irn)) == NULL)
 786           if ((key.irn->op == op_Block && get_Block_dom_depth(key.irn)  >
 787                get_Block_dom_depth(loop_head))                          ||
 788               (key.irn->op != op_Block && get_Block_dom_depth(get_nodes_block(key.irn)) >
 789                get_Block_dom_depth(loop_head))) {
 790
 791             for (p = 0; p < get_irn_arity(key.irn); p++)
 792               if (value->irn == get_irn_n(key.irn, p)) {
 793                               if (get_irn_op(value->irn) == op_Proj && is_exception_possible(value->irn)) {
 794                     if (set_find(loop_outs, &key, sizeof(key), HASH_PTR(key.irn)) == NULL) {
 795                       /* If the loop out is for exceptions in the loop. */
 796                       if ((get_irn_op(key.irn) == op_Phi && get_irn_mode(key.irn) == mode_M) ||
 797                         get_irn_op(key.irn) == op_Call)
 798                         new_after_loop_node(l_n,loop_outs, key.irn, value, unroll_factor);
 799                       else
 800                         continue;
 801                     } else
 802                       continue;
 803                   } else
 804                     set_irn_n(key.irn, p, value->copy[unroll_factor-2]);
 805               }
 806           }
 807       }
 808   /* This function searches for loop outs associated with function call in the unrolling loop. */
 809   new_end_block (end_block, loop_head, l_n, loop_endblock_outs, unroll_factor);
 810 }
 811
 812 /**
 813  * Unroll the loop body with a factor that must be power of two.
 814  * Called from a post walker.
 815  *
 816  * @param n        An IR node.
 817  * @param env      points to a result
 818  */
 819 static void do_loop_unroll(ir_node *n, void *env)
 820 {
 821   int *unroll_done = env;
 822   induct_var_info info;
 823   copies_t *value;
 824   set *loop_nodes, *loop_outs, *loop_endblock_outs;
 825   ir_node *cmp_out, *phi_init, *loop_head;
 826   ir_node *backedge_jmp;
 827   int l_sons = 0, unroll_factor = 0;
 828   int cmp_typ, init, iter_end, iter_increment, diff, iter_number;
 829   int backedge_pos;
 830
 831   copy_loop_head = 0;
 832   info.itervar_phi = n;
 833
 834   /* The IR node must be a induction variable. */
 835   if (get_irn_op(n) == op_Phi) {
 836     if (is_induction_variable (&info) == NULL)
 837       return;
 838   }
 839   else
 840     return;
 841
 842   /* Brute force limiting of loop body size. */
 843   if (get_loop_n_nodes(info.l_itervar_phi) > 2 ) return;
 844
 845   /* Only unroll loops that compare against a constant for exiting. */
 846   if (info.cmp == NULL) return;
 847
 848   /* We only want to unroll innermost loops. */
 849   l_sons = get_loop_n_sons(info.l_itervar_phi);
 850   if ( l_sons != 0)
 851     return;
 852
 853   cmp_out  = get_irn_out(info.cmp, 0);
 854   phi_init = get_Phi_pred(info.itervar_phi, info.init_pred_pos);
 855
 856   if (! is_Proj(cmp_out)) return;
 857   if (get_irn_op(info.increment) != op_Const   ||
 858       get_irn_op(phi_init) != op_Const         ||
 859       get_irn_op(info.cmp_const) != op_Const) return;
 860
 861   cmp_typ        = get_Proj_proj(cmp_out);
 862   init           = get_tarval_long(get_Const_tarval(phi_init));
 863   iter_end       = get_tarval_long(get_Const_tarval(info.cmp_const));
 864   iter_increment = get_tarval_long(get_Const_tarval(info.increment));
 865
 866   if (iter_end < init){
 867     int p = iter_end;
 868     iter_end = init;
 869     init = p;
 870   }
 871
 872   iter_increment = iter_increment < 0 ? -iter_increment : iter_increment;
 873   diff = iter_end - init;
 874
 875   if (diff == 0 || iter_increment == 0) return;
 876   /* Test for the value of unroll factor. */
 877   iter_number = diff/iter_increment;
 878   if ((cmp_typ == pn_Cmp_Le || cmp_typ == pn_Cmp_Ge) && (iter_end % iter_increment == 0))
 879     iter_number ++;
 880
 881   if ((iter_number & 3) == 0)
 882     unroll_factor = 4;
 883   else if ((iter_number % 3) == 0)
 884     unroll_factor = 3;
 885   else if ((iter_number & 1) == 0)
 886     unroll_factor = 2;
 887   else return;
 888
 889   if (get_firm_verbosity())
 890     printf("\nloop unrolling with factor %d \n", unroll_factor);
 891
 892   /* ok, we will do unrolling */
 893   *unroll_done += 1;
 894
 895   /* The unroll factor must be less than 4. */
 896   assert(unroll_factor <= MAX_UNROLL);
 897
 898   loop_head = (is_natural_loop(&info)) ? get_loop_node(info.l_itervar_phi, 0) : NULL;
 899
 900   assert(loop_head != NULL && is_Block(loop_head));
 901
 902   /* We assume, the loop head has exactly one backedge.  The position of
 903      the backedge is in the following variable: */
 904   backedge_pos = (is_backedge(loop_head, 0)) ? 0:1;
 905
 906   /* A set with the nodes to copy. */
 907   loop_nodes = new_set(set_cmp, 8);
 908   /* A set with the loop outs for exceptions. */
 909   loop_outs =  new_set(set_cmp, 8);
 910
 911   /* A set containing all predecessors
 912      of the end block from the unrolling loop. */
 913   loop_endblock_outs = new_set(set_cmp, 8);
 914
 915   /* Find all nodes of the unrolling loop. */
 916   find_loop_nodes(info.itervar_phi, get_loop_node(info.l_itervar_phi, 0), loop_nodes);
 917
 918   /* Decide if the loop head to be copy.*/
 919   copy_loop_head = loop_head_nodes(loop_nodes, &info);
 920
 921   /* Copy all nodes of the unrolling loop, that muss be copy. */
 922   copy_loop_body(loop_nodes, &info, unroll_factor);
 923
 924   backedge_jmp = get_Block_cfgpred(loop_head, backedge_pos);
 925
 926   /* Set the backedge of the loop head. */
 927   for (value = set_first(loop_nodes); value != NULL; value = set_next(loop_nodes)) {
 928     if (value->irn == backedge_jmp)
 929       set_Block_cfgpred(loop_head, backedge_pos, value->copy[unroll_factor-2]);
 930   }
 931   set_loop_outs(loop_nodes, loop_outs, loop_endblock_outs, &info, unroll_factor);
 932 }
 933
 934 /* Performs loop unrolling for the passed graph. */
 935 void optimize_loop_unrolling(ir_graph *irg /* unroll factor, max body size */)
 936 {
 937   ir_graph *rem;
 938   int unroll_done = 0;
 939
 940   if ( !get_opt_loop_unrolling()) return;
 941
 942   rem = current_ir_graph;
 943   current_ir_graph = irg;
 944
 945   /* -- Precompute some information -- */
 946
 947   /* Call algorithm that computes the backedges */
 948   construct_cf_backedges(irg);
 949
 950   /* Call algorithm that computes the dominator trees. */
 951   compute_doms(irg);
 952
 953   /* Call algorithm that computes the out edges */
 954   compute_outs(irg);
 955
 956   collect_phiprojs(irg);
 957
 958   /* -- Search expressions that can be optimized -- */
 959   irg_walk_graph(irg, NULL, do_loop_unroll, &unroll_done);
 960
 961   if (unroll_done) {
 962     /* unrolling was done, all info is invalid */
 963     set_irg_dom_inconsistent(irg);
 964     set_irg_outs_inconsistent(irg);
 965     set_irg_loopinfo_state(current_ir_graph, loopinfo_cf_inconsistent);
 966     set_trouts_inconsistent(irg);
 967     set_irg_callee_info_state(irg, irg_callee_info_inconsistent);
 968   }
 969
 970   current_ir_graph = rem;
 971 }