nsz Git - libfirm/blob - ir/opt/loop_unrolling.c

   1 /**
   2  *
   3  * @file loop_unrolling.c
   4  *
   5  * Project:     libFIRM
   6  * File name:   ir/opt/loop_unrolling.c
   7  * Purpose:     Make loop unrolling.
   8  * Author:      Beyhan Veliev
   9  * Modified by:
  10  * Created:     16.11.2004
  11  * CVS-ID:      $Id$
  12  * Copyright:   (c) 2004 Universität Karlsruhe
  13  * Licence:     This file protected by GPL -  GNU GENERAL PUBLIC LICENSE.
  14  */
  15 #ifdef HAVE_CONFIG_H
  16 #include "config.h"
  17 #endif
  18
  19 #ifdef HAVE_STRING_H
  20 #include <string.h>
  21 #endif
  22 #ifdef HAVE_MALLOC_H
  23 #include <malloc.h>
  24 #endif
  25 #ifdef HAVE_ALLOCA_H
  26 #include <alloca.h>
  27 #endif
  28
  29 # include "loop_unrolling.h"
  30
  31 #include "irnode_t.h"
  32 # include "irgwalk.h"
  33 # include "ircons.h"
  34 # include "irgmod.h"
  35 # include "irloop_t.h"
  36 # include "irgopt_t.h"
  37 # include "irouts.h"
  38 # include "trouts.h"
  39 # include "hashptr.h"
  40 # include "pset.h"
  41 # include "strength_red.h"
  42 # include "compute_loop_info.h"
  43 # include "irdump.h"
  44
  45 /* We will unroll maximal 4-times.  */
  46 #define MAX_UNROLL 4
  47
  48 /* We will know if the head of the loop to be copy.
  49  * Default "0"  don't copy.*/
  50 static int copy_loop_head;
  51
  52 typedef struct {
  53   ir_node *irn ;                /* Node of the loop to be unrolling*/
  54   ir_node *copy[MAX_UNROLL] ;   /* The copy of the node */
  55 } copies_t;
  56
  57 /**
  58  * Compare two elements of the copies_t set
  59  */
  60 static int set_cmp(const void *elt, const void *key, size_t size)
  61 {
  62   const copies_t *c1 = elt;
  63   const copies_t *c2 = key;
  64
  65   return c1->irn != c2->irn;
  66 }
  67
  68 static INLINE int * new_backedge_arr(struct obstack *obst, int size)
  69 {
  70   int *res = NEW_ARR_D (int, obst, size);
  71   memset(res, 0, sizeof(int) * size);
  72   return res;
  73 }
  74
  75 static INLINE void new_backedge_info(ir_node *n) {
  76   switch(get_irn_opcode(n)) {
  77   case iro_Block:
  78     n->attr.block.cg_backedge = NULL;
  79     n->attr.block.backedge = new_backedge_arr(current_ir_graph->obst, get_irn_arity(n));
  80     break;
  81   case iro_Phi:
  82     n->attr.phi_backedge = new_backedge_arr(current_ir_graph->obst, get_irn_arity(n));
  83     break;
  84   case iro_Filter:
  85     n->attr.filter.backedge = new_backedge_arr(current_ir_graph->obst, get_irn_arity(n));
  86     break;
  87   default: ;
  88   }
  89 }
  90
  91 /**
  92  * Remember the new node in the old node by using a field all nodes have.
  93  */
  94 static INLINE void
  95 set_new_node (ir_node *old, ir_node *new)
  96 {
  97   old->link = new;
  98 }
  99
 100 /**
 101  * Copies the node to the new obstack. The Ins of the new node point to
 102  * the predecessors on the old obstack.  For block/phi nodes not all
 103  * predecessors might be copied.  n->link points to the new node.
 104  * For Phi and Block nodes the function allocates in-arrays with an arity
 105  * only for useful predecessors.  The arity is determined by counting
 106  * the non-bad predecessors of the block.
 107  *
 108  * @param n    The node to be copied
 109  * @param env  if non-NULL, the node number attribute will be copied to the new node
 110  */
 111 void
 112 copy_irn (ir_node *n, void *env)
 113 {
 114   ir_graph *irg;
 115   ir_node *nn;
 116   int new_arity;
 117   irg              = env;
 118   ir_op *op        = get_irn_op(n);
 119   int copy_node_nr = 0;
 120
 121   if(irg == NULL)
 122     irg = current_ir_graph;
 123
 124   /* The end node looses it's flexible in array.  This doesn't matter,
 125      as dead node elimination builds End by hand, inlineing doesn't use
 126      the End node. */
 127   /* assert(op == op_End ||  ((_ARR_DESCR(n->in))->cookie != ARR_F_MAGIC)); */
 128
 129   if (op == op_Bad)
 130     /* node copied already */
 131     return;
 132
 133   new_arity = get_irn_arity(n);
 134
 135   nn = new_ir_node(get_irn_dbg_info(n),
 136            irg,
 137            NULL,            /* no block yet, will be set later */
 138            op,
 139            get_irn_mode(n),
 140            new_arity,
 141            get_irn_in(n));
 142
 143
 144   /* Copy the attributes.  These might point to additional data.  If this
 145      was allocated on the old obstack the pointers now are dangling.  This
 146      frees e.g. the memory of the graph_arr allocated in new_immBlock. */
 147   copy_node_attr(n, nn);
 148   new_backedge_info(nn);
 149   set_new_node(n, nn);
 150
 151 #if DEBUG_libfirm
 152   if (copy_node_nr) {
 153     /* for easier debugging, we want to copy the node numbers too */
 154     nn->node_nr = n->node_nr;
 155   }
 156 #endif
 157
 158 }
 159 /*Check if phi in the loop head is.
 160  *
 161  * @param phi               Must be a phi node from the loop.
 162  * @param loop_head         The loop head .
 163  */
 164 static int is_Phi_in_loop_head(ir_node *phi, ir_node *loop_head) {
 165   assert(is_Block(loop_head));
 166   return is_Phi(phi) && (get_nodes_block(phi) == loop_head);
 167 }
 168
 169 /**
 170  * Copies predecessors of node to copies of node.
 171  * If the predecessor is a loop invariant, then the copy get it
 172  * as predecessor, else the copy of the predecessor.
 173  *
 174  * @param l_n                A set, where the node of the loop are saved.
 175  * @param value              A element of the set.
 176  * @param info               Contains information about the induction variable.
 177  * @param unroll_factor      A integer 2 <= unroll_factor <= 4.
 178  * @param env                Free environment pointer.
 179  */
 180 static void
 181 set_preds (set *l_n, copies_t *value, induct_var_info *info, int unroll_factor, void *env)
 182 {
 183   int i, p, irn_arity;
 184   copies_t key, *value_pred;
 185   if(value->copy[0] == NULL ||
 186      get_irn_op(value->irn) != get_irn_op(value->copy[0]))
 187     return;
 188   // The head of the unrolling loop.
 189   ir_node *loop_head = get_loop_node(info->l_itervar_phi, 0);
 190   irn_arity = get_irn_arity(value->irn);
 191
 192   for (i = 0; i < irn_arity; i++) {
 193     ir_node *pred = get_irn_n(value->irn, i);
 194
 195     key.irn = pred;
 196     value_pred = set_find(l_n, &key, sizeof(key), HASH_PTR(pred));
 197
 198     if (value->irn != loop_head && !is_Phi_in_loop_head(value->irn, loop_head)) {
 199       if (value_pred == NULL) {
 200               /* Is loop invariant. */
 201               for (p = 0; p < unroll_factor - 1; p++)
 202                 set_irn_n(value->copy[p], i, pred);
 203               /* pred is a loop invariant. The copies of the successors get it as predecessor. */
 204       }
 205       else {
 206               for (p = 0; p < unroll_factor - 1; p++)
 207                 set_irn_n(value->copy[p], i, value_pred->copy[p]);
 208           /* value_pred->irn is a node from the unrolling loop.
 209            * The copies of the successors get the
 210                  * copies of value_pred as predecessor.
 211            */
 212       }
 213     }
 214   }
 215 }
 216
 217 /* Set the backedge of phi in the loop head.The backedge of phis in the loop head
 218  * must now point to the value defined
 219  * in the last copy of the loop body.
 220  *
 221  * @param l_n                A set, where the node of the loop are saved.
 222  * @param value              A phi in the loop head.
 223  * @param info               Contains information about the induction variable.
 224  * @param unroll_factor      A integer 2 <= unroll_factor <= 4.
 225  */
 226 static void
 227 set_phi_backedge(set *l_n, copies_t *value, induct_var_info *info, int unroll_factor)
 228 {
 229   copies_t key, *value_pred;
 230
 231   /* info->op_pred_pos is the backedge position. */
 232   key.irn = get_irn_n(value->irn, info->op_pred_pos);
 233   value_pred = set_find(l_n, &key, sizeof(key), HASH_PTR(key.irn));
 234
 235   /* value->copy[unroll_factor - 2] is the last copy. */
 236   set_Phi_pred(value->irn, info->op_pred_pos, value_pred->copy[unroll_factor - 2]);
 237 }
 238
 239 /**
 240  * Test for a loop head.
 241  *
 242  * Returns true if the node has predecessors in the loop _and_ out of
 243  * the loop.  Then it is a loop head: The loop can be entered through
 244  * this node.
 245  *
 246  * @param n      The node to be tested. Must be a block.
 247  * @param info   Contains the loop information.
 248  */
 249 static int
 250 is_loop_head(ir_node *n, induct_var_info *info)
 251 {
 252   int i, arity;
 253   int some_outof_loop = 0, some_in_loop = 0;
 254
 255   assert(get_irn_op(n) == op_Block);
 256   arity = get_Block_n_cfgpreds(n);
 257
 258   for (i = 0; i < arity; i++) {
 259     ir_node *pred = get_Block_cfgpred(n, i);
 260     assert(pred);
 261     if (is_loop_invariant(pred, get_loop_node(info->l_itervar_phi, 0))) {
 262       some_outof_loop = 1;
 263     } else
 264       some_in_loop = 1;
 265   }
 266
 267   return some_outof_loop && some_in_loop;
 268 }
 269
 270
 271 /**
 272  * Test whether the passed loop is a natural loop.
 273  *
 274  * Returns true if the loop has only one loop header and only a single
 275  * back edge.
 276  *
 277  * @param info  Contains the loop information.
 278  */
 279 static int
 280 is_natural_loop(induct_var_info *info)
 281 {
 282   ir_node *l_node;
 283   int l_n_node = 0, i;
 284
 285   l_n_node = get_loop_n_nodes (info->l_itervar_phi);
 286
 287   for (i = 1; i < (l_n_node); i ++) {
 288     l_node = get_loop_node (info->l_itervar_phi, i);
 289     if (is_loop_head(l_node, info)) return 0;
 290
 291     if (has_backedges(l_node) && i != l_n_node-1) return 0;
 292   }
 293
 294   return 1;
 295 }
 296
 297 /**
 298  * Search for all nodes of a loop.
 299  *
 300  * @param node       The induction variable of the loop.
 301  * @param loop_head  The head of the loop.
 302  * @param l_n        A set, where the node of the loop are saved.
 303  */
 304 static void
 305 find_loop_nodes(ir_node *node, ir_node *loop_head, set *l_n)
 306 {
 307   int i;
 308   copies_t key, *value;
 309
 310   /* Add this node to the list. */
 311   key.irn  = node;
 312
 313   /* Initialize all copies of the added node with NULL.*/
 314   for (i = 0; i < 4; ++i)
 315     key.copy[i] = NULL;
 316   value = set_insert(l_n, &key, sizeof(key), HASH_PTR(key.irn));
 317
 318   /* Add all outs of this node to the list, if they are within the loop. */
 319   for (i = get_irn_n_outs(node) - 1; i >= 0; --i) {
 320     ir_node *pred = get_irn_out(node, i);
 321
 322     key.irn = pred;
 323     if (!is_loop_invariant(pred, loop_head)         &&
 324             set_find( l_n, &key, sizeof(key), HASH_PTR(key.irn)) == NULL ) {
 325       find_loop_nodes(pred, loop_head, l_n);
 326     }
 327   }
 328
 329   /* Add all ins if they are within the loop. */
 330   for (i = get_irn_arity(node) - 1; i >=0; --i) {
 331     ir_node *pred = get_irn_n(node, i);
 332
 333     key.irn = pred;
 334     if (!is_loop_invariant(pred, loop_head)         &&
 335               set_find( l_n, &key, sizeof(key), HASH_PTR(key.irn)) == NULL ) {
 336       find_loop_nodes(pred, loop_head, l_n);
 337     }
 338   }
 339 }
 340
 341 /**
 342  * Make a new loop head if copy_head = 1.
 343  *
 344  * @param l_n              A set, where the node of the loop are saved.
 345  * @param info             Contains the loop information.
 346  * @param value            A element of the set.
 347  * @param unroll_factor    A integer 2 <= unroll_factor <= 4.
 348  * @param copy_head        if non-zero, the loop head will be copied
 349  *
 350  */
 351 static void
 352 new_loop_head(set *l_n, induct_var_info *info, copies_t *value, int unroll_factor, int copy_head)
 353 {
 354   int i;
 355   copies_t block, *value_backedge_jmp, *backedge_jmp_block;
 356   /* The  predecessor of the loop head in the  backedge position */
 357   ir_node *backedge_jmp = get_Block_cfgpred(value->irn, info->op_pred_pos);
 358
 359   block.irn          = backedge_jmp;
 360   value_backedge_jmp = set_find(l_n, &block, sizeof(block), HASH_PTR(block.irn));
 361
 362   if (copy_head) {
 363     /* The first copy of the loop head must point to the loop head.*/
 364     value->copy[0] = new_Block(1, &backedge_jmp);
 365     if(info->l_itervar_phi->flags & do_loop){
 366       value_backedge_jmp->copy[0] = new_r_Jmp(current_ir_graph, value->copy[0]);
 367       for (i = 1; i < unroll_factor - 1; ++i) {
 368       /* all other copies must point to the copy before it in the array. */
 369       value->copy[i] = new_Block(1, &backedge_jmp);
 370       value_backedge_jmp->copy[i] = new_r_Jmp(current_ir_graph, value->copy[i]);
 371       }
 372       for (i = 1; i < unroll_factor - 1; ++i){
 373         set_nodes_block(value_backedge_jmp->copy[i-1], get_nodes_block(backedge_jmp));
 374         set_Block_cfgpred (value->copy[i], 0, value_backedge_jmp->copy[i-1]);
 375       }
 376     }else
 377       for (i = 1; i < unroll_factor - 1; ++i) {
 378         /* all other copies must point to the copy before it in the array. */
 379         set_nodes_block(value_backedge_jmp->copy[i-1], get_nodes_block(backedge_jmp));
 380         value->copy[i] = new_Block(1, &value_backedge_jmp->copy[i-1]);
 381       }
 382   }
 383   else {
 384     /* If the loop head must not be copy. block.irn is the successor of the loop head.*/
 385     block.irn          = get_nodes_block(backedge_jmp);
 386     backedge_jmp_block = set_find(l_n, &block, sizeof(block), HASH_PTR(block.irn));
 387
 388     /* The first copy of block.irn point to it.
 389       The another copies muss point to the copy before it in the array.*/
 390     set_irn_n(backedge_jmp_block->copy[0], 0, value_backedge_jmp->irn) ;
 391
 392     for (i = 1; i < unroll_factor - 1; ++i)
 393       set_irn_n(backedge_jmp_block->copy[i], 0, value_backedge_jmp->copy[i - 1]);
 394   }
 395 }
 396
 397 /* Set all copies of the induction variable.
 398  *
 399  * @param phi             A phi node in the loop head block.
 400  * @param phi_pred        The predecessor of the phi along the backedge.
 401  * @param unroll_factor   A integer 2 <= unroll_factor <= 4.
 402  */
 403 static void
 404 set_Phi_copies(copies_t *phi, copies_t *phi_pred, int unroll_factor)
 405 {
 406   int p;
 407
 408   if(phi_pred != NULL){
 409     /* The first copy of Phi node get the node along the backedge as predecessor. The next copies
 410        the copies of this node.*/
 411     phi->copy[0] = phi_pred->irn;
 412     for (p = 1; p < unroll_factor - 1; ++p) {
 413       /* If two phi nodes are in cycle.  */
 414       if (phi_pred->copy[p - 1] == NULL && get_irn_op(phi_pred->irn) == op_Phi) {
 415         if (p & 1)
 416           phi->copy[p] =  phi->irn;
 417         else
 418           phi->copy[p] =  phi_pred->irn;
 419       } else
 420         phi->copy[p] =  phi_pred->copy[p - 1];
 421     }
 422   }else
 423     /* The predecessors of phi are loop invariant and the copies von phi
 424        must be phi it self.*/
 425     for(p = 0; p < unroll_factor - 1; ++p)
 426       phi->copy[p] = phi->irn;
 427 }
 428
 429 /**
 430  * Decide if the loop head must be copied. A head with important nodes
 431  * must be copied.
 432  *
 433  * @param l_n         A set, where the node of the loop are saved.
 434  * @param info        Contains information about the induction variable.
 435  *
 436  * @return non-zero if the loop head must be copied
 437  */
 438 static int
 439 loop_head_nodes(set *l_n, induct_var_info *info)
 440 {
 441   int must_copy = 0;
 442   copies_t *value;
 443   ir_node *loop_head = get_loop_node(info->l_itervar_phi, 0);
 444
 445   for (value = set_first(l_n); value != NULL; value = set_next(l_n))
 446     if (is_no_Block(value->irn) && get_nodes_block(value->irn) == loop_head)
 447       /* If the loop head contains just this nodes, than must not be copy. */
 448       switch (get_irn_opcode(value->irn)) {
 449       case iro_Cond:
 450               break;
 451       case iro_Phi:
 452               break;
 453       case iro_Proj:
 454               break;
 455       case iro_Const:
 456               break;
 457       case iro_Cmp:
 458               break;
 459       default:
 460               must_copy = 1;
 461       }
 462     return must_copy;
 463 }
 464
 465 /** Copy all loop nodes.
 466  *
 467  * @param l_n             Contains all nodes of the loop.
 468  * @param info            Contains the loop information.
 469  * @param unroll_factor   A integer 2 <= unroll_factor <= 4.
 470  */
 471 static void
 472 copy_loop_body(set *l_n, induct_var_info *info, int unroll_factor)
 473 {
 474   int i, opt;
 475   copies_t *value, *info_op, *phi, *loop_h = NULL, key, *value_block;
 476   ir_node *proj_b, *cond, *proj_1, *proj_0;
 477   proj_b = get_irn_out(info->cmp, 0);
 478   cond   = get_irn_out(proj_b, 0);
 479   proj_0 = get_irn_out(cond, 0);
 480   proj_1 = get_irn_out(cond, 1);
 481
 482   ir_node *loop_head = get_loop_node(info->l_itervar_phi, 0);
 483   /* I will create some nodes and need the optimization to
 484      be set off.*/
 485   opt = get_opt_optimize();
 486   set_opt_optimize(0);
 487
 488   for (value = set_first(l_n); value != NULL; value = set_next(l_n)) {
 489     if(value->irn == loop_head)
 490       loop_h = value;
 491     else if (is_Phi_in_loop_head(value->irn, loop_head))
 492       phi = value;
 493     else if (copy_loop_head){
 494       if(value->irn == proj_0){
 495         for (i = 0; i < unroll_factor - 1; i++)
 496           /* In the copies of the loop head we replace the cmp branche with a jmp node.
 497              This is just for "for" loops."do" loops are handled in function
 498              new_loop_head().*/
 499           value->copy[i] = new_r_Jmp(current_ir_graph, get_nodes_block(value->irn));
 500       }else if(value->irn != proj_b && value->irn != cond &&
 501               value->irn != proj_1)
 502         /* The loop head must be copied. */
 503         for (i = 0; i < unroll_factor - 1; i++){
 504           copy_irn(value->irn, NULL);
 505           value->copy[i] = get_irn_link(value->irn);
 506         }
 507     } else {
 508       /* The loop head and its nodes must not be copied. */
 509       if((value->irn->op == op_Block             &&
 510           value->irn != loop_head)               ||
 511          (value->irn->op != op_Block             &&
 512           get_nodes_block(value->irn) != loop_head)) {
 513           for (i = 0; i<unroll_factor -1; i++){
 514             copy_irn(value->irn, NULL);
 515             value->copy[i] = get_irn_link(value->irn);
 516           }
 517       }
 518     }
 519   }
 520
 521   /* Treat the loop head block */
 522   new_loop_head(l_n, info, loop_h, unroll_factor, copy_loop_head);
 523   /* I have created the nodes and I set the optimization
 524      to it old value.*/
 525   set_opt_optimize(opt);
 526
 527   /* Similarly treat the Phis in the loop head block. info->op is the node
 528      along the backedges. */
 529   key.irn = info->op;
 530   info_op = set_find( l_n, &key, sizeof(key), HASH_PTR(key.irn));
 531   assert(info_op->irn == get_Phi_pred(info->itervar_phi, info->op_pred_pos));
 532   for (i = 0; i < get_irn_n_outs(loop_head); ++i) {
 533     ir_node *phi = get_irn_out(loop_head, i);
 534
 535     if (is_Phi(phi)) {
 536       copies_t *phi_pred, *phi_op;
 537
 538       key.irn  = get_Phi_pred(phi, info->op_pred_pos); // info->op;
 539       phi_pred = set_find(l_n, &key, sizeof(key), HASH_PTR(key.irn));
 540       key.irn  = phi;
 541       phi_op   = set_find(l_n, &key, sizeof(key), HASH_PTR(key.irn));
 542       set_Phi_copies(phi_op, phi_pred, unroll_factor);
 543     }
 544   }
 545
 546   for (value = set_first(l_n); value != NULL; value = set_next(l_n)) {
 547
 548     /* Set the predecessors of the copies. */
 549     if (copy_loop_head){
 550         set_preds(l_n, value, info, unroll_factor, NULL);
 551     }else if ((value->irn->op != op_Block) && get_nodes_block(value->irn) != loop_head)
 552       set_preds(l_n, value, info, unroll_factor, NULL);
 553
 554     if (is_Phi_in_loop_head(value->irn, loop_head) &&
 555         has_backedges(value->irn))
 556       /* Set the backedge of phis in the loop head. */
 557       set_phi_backedge(l_n, value, info, unroll_factor);
 558
 559     if ((value->irn->op != op_Block) && !is_Phi_in_loop_head(value->irn, loop_head) &&
 560         value->copy[0] != NULL ) {
 561       ir_node *nodes_block = get_nodes_block(value->irn);
 562
 563       if (!copy_loop_head && nodes_block == loop_head)
 564         continue;
 565
 566       key.irn = nodes_block;
 567       value_block = set_find( l_n, &key, sizeof(key), HASH_PTR(key.irn));
 568       /* Set the copy of the node in the accordant copy of its block. */
 569       for(i = 0; i < unroll_factor - 1; i++){
 570         set_nodes_block(value->copy[i], value_block->copy[i]);
 571       }
 572     }
 573   }
 574   /* I muss repaire some control flow edges, when the loop head
 575      have been copied.*/
 576   if (copy_loop_head && !(info->l_itervar_phi->flags & do_loop)){
 577     key.irn = proj_0;
 578     value   = set_find( l_n, &key, sizeof(key), HASH_PTR(key.irn));
 579     key.irn = get_irn_out(proj_0, 0);
 580     info_op = set_find( l_n, &key, sizeof(key), HASH_PTR(key.irn));
 581     for (i = 0; i < unroll_factor - 1; i++){
 582       set_Block_cfgpred(info_op->copy[i], 0, value->copy[i]);
 583     }
 584   }
 585 }
 586 /**
 587  * Returns non-zero if a Proj node from the loop has a predecessor that
 588  * can throw an exception.
 589  *
 590  * @param node  A Proj node from the loop.
 591  *
 592  * @return non-zero if the predecessor of the Proj node can throw an exception
 593  */
 594 static int is_exception_possible(ir_node *node)
 595 {
 596   ir_node *pred = get_Proj_pred(node);
 597
 598   /* only fragile ops can throw an exception */
 599   if (! is_fragile_op(pred))
 600     return 0;
 601
 602   /*
 603    * fragile ops that are known NOT to throw
 604    * an exception if their pin_state is op_pin_state_floats
 605    */
 606   return get_irn_pinned(pred) != op_pin_state_floats;
 607 }
 608
 609 /**
 610  * If a node from the loop is predecessor of the end block, then the end block must get all copies
 611  * of this node as predecessors. This is possible with function calls in the unrolling loop.
 612  *
 613  * @param end_block          The end block.
 614  * @param loop_head          The head of the unrolling loop.
 615  * @param l_n                Contains all nodes of the loop.
 616  * @param loop_endblock_outs The set loop_endblock_outs contains all predecessors
 617  *                           of the end block from the unrolling loop.
 618  * @param unroll_factor      A integer 2 <= unroll_factor <= 4.
 619  */
 620 static void
 621 new_end_block(ir_node *end_block, ir_node *loop_head, set *l_n, set *loop_endblock_outs, int unroll_factor)
 622 {
 623   copies_t key, *value;
 624   int set_el, new_preds, all_new_preds, i, q;
 625   int old_preds = get_Block_n_cfgpreds(end_block);   /* All old predecessors of the end block. */
 626   ir_node **all_in;
 627
 628   for (i = 0; i < old_preds; i++) {
 629     ir_node *pred = get_Block_cfgpred(end_block, i);
 630      key.irn = pred;
 631      value = NULL;
 632      value = set_find( l_n, &key, sizeof(key), HASH_PTR(key.irn));
 633
 634      /* If a predecessor of the end block is a Proj from the unrolling loop (for function calls) .*/
 635      if (get_irn_op(pred) == op_Proj && is_exception_possible(pred) && value != NULL &&
 636                !(!copy_loop_head && get_nodes_block(pred) == loop_head))
 637        value =  set_insert(loop_endblock_outs, &key, sizeof(key), HASH_PTR(key.irn));
 638   }
 639
 640   /* The set loop_endblock_outs contains all predecessors of the end block from the unrolling loop,
 641    * set_el the amount of such nodes.
 642    */
 643   set_el = set_count (loop_endblock_outs);
 644
 645   /* If the end block haven't such predecessors, we are finished. */
 646   if (!set_el) return;
 647
 648   new_preds = (unroll_factor - 1) * set_el;          /* All new predecessors of the end block */
 649   all_new_preds = old_preds + new_preds;             /* All predecessors of this block. */
 650   all_in = alloca(sizeof(*all_in) * all_new_preds);  /* A array with size for all predecessors of this block. */
 651
 652   for (i = 0; i < old_preds; i++)
 653     all_in[i] = get_Block_cfgpred(end_block, i);    /* The old predecessors. */
 654
 655   value = set_first(loop_endblock_outs);
 656
 657   for (; value != NULL; value = set_next(loop_endblock_outs)) {
 658     key.irn = value->irn;
 659     value = set_find( l_n, &key, sizeof(key), HASH_PTR(key.irn));
 660     for (q = 0; q < unroll_factor - 1 ; q++) {
 661       all_in[i++] = value->copy[q];                 /* The new predecessors. */
 662     }
 663
 664   }
 665
 666   /* Replace the old predecessors of the end block wit´h the new ones. */
 667   set_irn_in(end_block, all_new_preds, all_in);
 668 }
 669
 670 /** new_after_loop_block
 671  *
 672  * The after loop block must examine the possible exceptions in the loop.
 673  * If a (Proj) node from the loop is predecessor of this block, then the
 674  * after loop block must have as well all copies of this node as predecessors.
 675  *
 676  * @param l_n             Contains all nodes of the loop.
 677  * @param block           A block after the loop.
 678  * @param loop_in         A node from the loop, that is predecessor of the end block.
 679  * @param unroll_factor   An integer 2 <= unroll_factor <= 4.
 680  */
 681 static void
 682 new_after_loop_block (set *l_n, ir_node* block, copies_t *loop_in, int unroll_factor)
 683 {
 684   copies_t key, *value;
 685   int i, p, q, s, old_preds, new_preds, all_new_preds ;
 686   ir_node **all_in;
 687
 688   /* The node from the unrolling loop must be a Proj. */
 689   if (loop_in->irn->op != op_Proj) return;
 690
 691   old_preds = get_Block_n_cfgpreds(block);     /* All old predecessors of this block. */
 692   new_preds = old_preds * (unroll_factor - 1); /* All new predecessors of this block. */
 693   all_new_preds = old_preds + new_preds;       /* All predecessors of this block. */
 694
 695   all_in = alloca(sizeof(*all_in) * all_new_preds); /* An array with size for all predecessors of this block. */
 696
 697   for (i = 0 ; i < old_preds; i++)
 698     all_in[i] = get_Block_cfgpred(block, i);      /* The old predecessors. */
 699
 700   q = old_preds;
 701   for (i = 0; i < old_preds; i++){
 702     key.irn = all_in[i];
 703     value = set_find( l_n, &key, sizeof(key), HASH_PTR(key.irn));
 704     p = 0;
 705     for (s = 0; s < (unroll_factor - 1); s++) {
 706       all_in[q++] = value->copy[p++];          /* The new predecessors. */
 707     }
 708   }
 709   /* Replace the old predecessors of the end block whit the new one. */
 710   set_irn_in(block, all_new_preds, all_in);
 711 }
 712
 713 /** new_after_loop_node
 714  *
 715  * An after loop node (phi or call) must examine the possible exceptions in the loop.
 716  * If a (Proj) node from the loop is predecessor of this node, then the after loop
 717  * node must have as well all copies of this node as predecessors.
 718  *
 719  * @param l_n             Contains all nodes of the loop.
 720  * @param loop_outs       Contains nodes after the loop,that have as predecessor a node from the loop.
 721  * @param node            A node after the loop.
 722  * @param loop_in         A node (Proj) from the loop, that is predecessor of *node.
 723  * @param unroll_factor   An integer 2 <= unroll_factor <= 4.
 724  */
 725 static void
 726 new_after_loop_node(set *l_n, set *loop_outs, ir_node *node, copies_t *loop_in, int unroll_factor)
 727 {
 728   ir_node *pred, *block_pred = NULL, *node_block, *new_phi;
 729   int phi = 0, old_preds, new_preds, all_new_preds, p, q, i, s;
 730   copies_t key, *value = NULL;
 731   ir_node **all_in;
 732
 733   old_preds = get_irn_arity(node);            /* All old predecessors of this node. */
 734   new_preds =old_preds * (unroll_factor - 1); /* All new predecessors of this block. */
 735   all_new_preds = old_preds + new_preds;      /* All predecessors of this block. */
 736
 737   all_in  = alloca(sizeof(*all_in) * all_new_preds); /* An array with size for all predecessors of this block. */
 738
 739   /* Verification Predecessors, successors and operation of node and loop_in.
 740    * loop_in must be a Proj node.
 741    */
 742   if (loop_in->irn->op != op_Proj) return;
 743   /* Node must be operation Phi with mode memory or a Call node. */
 744   if (get_irn_op(node) == op_Phi  &&
 745       get_irn_mode(node) == mode_M){
 746     /* If node is a Phi node,then must have a Call node as successor. */
 747     for (i = 0; i < get_irn_n_outs(node); i++)
 748       if (get_irn_op(get_irn_out(node, i)) == op_Call) {
 749               phi = 1;
 750         break;
 751       }
 752
 753     if (!phi) return;
 754   }
 755
 756   /* The predecessor of loop_in must not be a loop invariant. */
 757   pred = get_Proj_pred(loop_in->irn);
 758   key.irn = pred;
 759   value = set_find( l_n, &key, sizeof(key), HASH_PTR(key.irn));
 760   if (! value) return;
 761
 762   node_block = get_nodes_block(node);
 763
 764   /* The block of node must also have a (Proj) predecessor from the unrolling loop. */
 765   for (i = 0; i < get_Block_n_cfgpreds(node_block); i++) {
 766     block_pred = get_Block_cfgpred(node_block, i);
 767
 768     if (get_irn_op(block_pred) == op_Proj) {
 769       if (get_Proj_pred(block_pred) == pred)
 770         break;
 771     }
 772     else {
 773       block_pred = NULL;
 774     }
 775   }
 776
 777   if (! block_pred) return;
 778
 779   key.irn = block_pred;
 780   value   = set_find(l_n, &key, sizeof(key), HASH_PTR(key.irn));
 781
 782   if (! value)
 783     return;
 784   else
 785     new_after_loop_block(l_n, node_block, value, unroll_factor);
 786
 787   for (i = 0; i < old_preds; ++i)
 788     all_in[i] = get_irn_n(node, i);  /* The old predecessors. */
 789
 790   q = old_preds;
 791   for (i = 0; i < old_preds; ++i) {
 792     key.irn = all_in[i];
 793     value = set_find( l_n, &key, sizeof(key), HASH_PTR(key.irn));
 794     p = 0;
 795     for (s = 0; s < (unroll_factor - 1); ++s) {
 796       all_in[q] = value->copy[p];  /* The new predecessors. */
 797       p++;
 798       q++;
 799     }
 800   }
 801   /* A new phi node with the new predecessors. */
 802   new_phi = new_r_Phi(current_ir_graph, get_nodes_block(node), all_new_preds,all_in,
 803                        get_irn_mode(node));
 804
 805   if (phi)
 806     exchange(node, new_phi);
 807   else{
 808     for (i = 0; i < get_irn_arity(node); ++i)
 809       if (get_irn_n(node, i) == pred)
 810         set_irn_n(node, i, new_phi);
 811   }
 812
 813   /* The set loop_outs contains the visited nodes and their blocks. */
 814   key.irn = node;
 815   value = set_insert(loop_outs, &key, sizeof(key), HASH_PTR(key.irn));
 816   key.irn = get_nodes_block(node);
 817   value = set_insert(loop_outs, &key, sizeof(key), HASH_PTR(key.irn));
 818 }
 819
 820
 821 /* Set the outs of the unrolling loop. All loop outs of a node muss now
 822  * point to the last copy of it. Just phi nodes in the loop head and proj
 823  * nodes save it outs. The all copies of some Projs  have too outs.
 824  *
 825  * @param l_n                Contains all nodes of the loop.
 826  * @param loop_outs          The set contains the visited and changed loop outs.
 827  * @param loop_endblock_outs The set loop_endblock_outs contains all predecessors
 828  *                           of the end block from the unrolling loop.
 829  * @param info               Contains the loop information.
 830  * @param unroll_factor      A integer 2 <= unroll_factor <= 4.
 831  */
 832 static void
 833 set_loop_outs(set *l_n, set *loop_outs, set *loop_endblock_outs, induct_var_info *info, int unroll_factor)
 834 {
 835   copies_t *value, key;
 836   int i, p;
 837   ir_node *loop_head = get_loop_node(info->l_itervar_phi, 0);
 838   ir_node *end_block = get_irg_end_block(current_ir_graph);
 839
 840   for (value = set_first(l_n); value != NULL; value = set_next(l_n)){
 841     if(get_irn_node_nr(value->irn) == 35047)
 842       printf("\nHi\n");
 843     if (! is_Phi_in_loop_head(value->irn, loop_head) &&
 844         (get_irn_opcode(value->irn) == iro_Proj && value->copy[0] != NULL))
 845       for (i = 0; i < get_irn_n_outs(value->irn); ++i) {
 846         key.irn = get_irn_out(value->irn, i);
 847
 848         /* Search for loop outs. */
 849         if (set_find( l_n, &key, sizeof(key), HASH_PTR(key.irn)) == NULL)
 850           if ((key.irn->op == op_Block && get_Block_dom_depth(key.irn)  >
 851                get_Block_dom_depth(loop_head))                          ||
 852               (key.irn->op != op_Block && get_Block_dom_depth(get_nodes_block(key.irn)) >
 853                get_Block_dom_depth(loop_head))) {
 854
 855             for (p = 0; p < get_irn_arity(key.irn); p++)
 856               if (value->irn == get_irn_n(key.irn, p)) {
 857                 if (get_irn_op(value->irn) == op_Proj && is_exception_possible(value->irn)) {
 858                   if (set_find(loop_outs, &key, sizeof(key), HASH_PTR(key.irn)) == NULL) {
 859                     /* If the loop out is for exceptions in the loop. */
 860                     if ((get_irn_op(key.irn) == op_Phi && get_irn_mode(key.irn) == mode_M) ||
 861                         get_irn_op(key.irn) == op_Call)
 862                       new_after_loop_node(l_n,loop_outs, key.irn, value, unroll_factor);
 863                     else
 864                       continue;
 865                   } else
 866                     continue;
 867                 } else
 868                   /* Loop outs in the loop head must be not changed.*/
 869                   if(get_nodes_block(value->irn) != loop_head)
 870                     set_irn_n(key.irn, p, value->copy[unroll_factor-2]);
 871               }
 872           }
 873       }
 874   }
 875   /* This function searches for loop outs associated with function call in the unrolling loop. */
 876   new_end_block (end_block, loop_head, l_n, loop_endblock_outs, unroll_factor);
 877 }
 878
 879 /**
 880  * Unroll the loop body with a factor that must be power of two.
 881  * Called from a post walker.
 882  *
 883  * @param n        An IR node.
 884  * @param env      points to a result
 885  */
 886 static void do_loop_unroll(ir_node *n, void *env)
 887 {
 888   int *unroll_done = env;
 889   induct_var_info info;
 890   copies_t *value;
 891   set *loop_nodes, *loop_outs, *loop_endblock_outs;
 892   ir_node *phi_init, *loop_head;
 893   ir_node *backedge_jmp;
 894   int l_sons = 0, unroll_factor = 0;
 895   tarval *init, *iter_end, *iter_increment,*tarval_null, *tarval_one, *tarval_three, *tarval_two, *diff, *iter_number;
 896   int backedge_pos;
 897
 898   copy_loop_head = 0;
 899   info.itervar_phi = n;
 900
 901   /* The IR node must be a induction variable. */
 902   if (get_irn_op(n) == op_Phi) {
 903     if (is_induction_variable (&info) == NULL)
 904       return;
 905   }
 906   else
 907     return;
 908
 909   /* Brute force limiting of loop body size. */
 910   if (get_loop_n_nodes(info.l_itervar_phi) > 2 ) return;
 911
 912   /* Only unroll loops that compare against a constant for exiting. */
 913   if (info.cmp == NULL) return;
 914
 915   /* We only want to unroll innermost loops. */
 916   l_sons = get_loop_n_sons(info.l_itervar_phi);
 917   if ( l_sons != 0)
 918     return;
 919   /* "do" loops are not implementit gut for this reason
 920       at the time we don't unroll them.*/
 921   if(info.l_itervar_phi->flags & do_loop)
 922     return;
 923
 924   phi_init = get_Phi_pred(info.itervar_phi, info.init_pred_pos);
 925
 926   if ((!(info.l_itervar_phi->flags & loop_is_count_loop))  ||
 927       (info.l_itervar_phi->flags & loop_is_endless)        ||
 928       (info.l_itervar_phi->flags & loop_is_dead)           ||
 929       (info.l_itervar_phi->flags & once))
 930     return;
 931
 932   init           = info.l_itervar_phi->loop_iter_start;
 933   iter_end       = info.l_itervar_phi->loop_iter_end;
 934   iter_increment = info.l_itervar_phi->loop_iter_increment;
 935   tarval_null = get_mode_null(get_tarval_mode(iter_increment));
 936   tarval_one  = get_mode_one(get_tarval_mode(init));
 937
 938   if(tarval_is_double(init) || tarval_is_double(iter_end) || tarval_is_double(iter_increment))
 939     return;
 940
 941   if((tarval_is_negative(init) && tarval_is_negative(iter_end)) ||
 942      (!tarval_is_negative(init) && !tarval_is_negative(iter_end)) ||
 943      (tarval_is_null(init) || tarval_is_null(iter_end)))
 944     diff = tarval_abs(tarval_sub(iter_end, init));
 945   else
 946     diff = tarval_add(tarval_abs(iter_end),tarval_abs(init));
 947
 948   iter_increment = tarval_abs(iter_increment);
 949
 950   if(!(info.l_itervar_phi->flags & do_loop))
 951     diff = tarval_add(diff, tarval_one);
 952   /* Test for the value of unroll factor. */
 953   if (tarval_cmp(tarval_mod(diff,iter_increment), tarval_null) == pn_Cmp_Eq)
 954     iter_number = tarval_div(diff, iter_increment);
 955   else
 956     return;
 957   tarval_two = tarval_add(tarval_one, tarval_one);
 958   tarval_three = tarval_add(tarval_two,tarval_one);
 959
 960   if (tarval_cmp(tarval_mod(iter_number, tarval_three), tarval_null) == pn_Cmp_Eq)
 961     unroll_factor = 3;
 962   else if (tarval_cmp(tarval_mod(iter_number, tarval_two), tarval_null) == pn_Cmp_Eq)
 963     unroll_factor = 2;
 964   else return;
 965
 966   if (get_firm_verbosity())
 967     printf("\nloop unrolling with factor %d \n", unroll_factor);
 968
 969   /* ok, we will do unrolling */
 970   *unroll_done += 1;
 971
 972   /* The unroll factor must be less than 4. */
 973   assert(unroll_factor <= MAX_UNROLL);
 974
 975   loop_head = (is_natural_loop(&info)) ? get_loop_node(info.l_itervar_phi, 0) : NULL;
 976
 977   assert(loop_head != NULL && is_Block(loop_head));
 978
 979   /* We assume, the loop head has exactly one backedge.  The position of
 980      the backedge is in the following variable: */
 981   backedge_pos = (is_backedge(loop_head, 0)) ? 0:1;
 982
 983   /* A set with the nodes to copy. */
 984   loop_nodes = new_set(set_cmp, 8);
 985   /* A set with the loop outs for exceptions. */
 986   loop_outs =  new_set(set_cmp, 8);
 987
 988   /* A set containing all predecessors
 989      of the end block from the unrolling loop. */
 990   loop_endblock_outs = new_set(set_cmp, 8);
 991   /* Find all nodes of the unrolling loop. */
 992   find_loop_nodes(info.itervar_phi, get_loop_node(info.l_itervar_phi, 0), loop_nodes);
 993
 994   /* Decide if the loop head to be copy.*/
 995   copy_loop_head = loop_head_nodes(loop_nodes, &info);
 996
 997   /* Copy all nodes of the unrolling loop, that muss be copy. */
 998   copy_loop_body(loop_nodes, &info, unroll_factor);
 999
1000   backedge_jmp = get_Block_cfgpred(loop_head, backedge_pos);
1001
1002   /* Set the backedge of the loop head. */
1003   for (value = set_first(loop_nodes); value != NULL; value = set_next(loop_nodes)) {
1004     if (value->irn == backedge_jmp)
1005       set_Block_cfgpred(loop_head, backedge_pos, value->copy[unroll_factor-2]);
1006   }
1007   set_loop_outs(loop_nodes, loop_outs, loop_endblock_outs, &info, unroll_factor);
1008 }
1009
1010 /* Performs loop unrolling for the passed graph. */
1011 void optimize_loop_unrolling(ir_graph *irg /* unroll factor, max body size */)
1012 {
1013   ir_graph *rem;
1014   int unroll_done = 0;
1015
1016   if ( !get_opt_loop_unrolling()) return;
1017
1018   rem = current_ir_graph;
1019   current_ir_graph = irg;
1020
1021   /* -- Precompute some information -- */
1022
1023   /* Call algorithm that computes the loop information */
1024   // construct_backedges(irg);
1025   compute_loop_info(irg);
1026   /* Call algorithm that computes the backedges */
1027   // construct_cf_backedges(irg);
1028   dump_loop_tree(current_ir_graph, "-deadlooptree");
1029
1030   /* Call algorithm that computes the dominator trees. */
1031   compute_doms(irg);
1032
1033   /* Call algorithm that computes the out edges */
1034   compute_irg_outs(irg);
1035
1036   collect_phiprojs(irg);
1037
1038   /* -- Search expressions that can be optimized -- */
1039   irg_walk_graph(irg, NULL, do_loop_unroll, &unroll_done);
1040
1041   if (unroll_done) {
1042     /* unrolling was done, all info is invalid */
1043     set_irg_dom_inconsistent(irg);
1044     set_irg_outs_inconsistent(irg);
1045     set_irg_loopinfo_state(current_ir_graph, loopinfo_cf_inconsistent);
1046     set_trouts_inconsistent();
1047     set_irg_callee_info_state(irg, irg_callee_info_inconsistent);
1048   }
1049
1050   current_ir_graph = rem;
1051 }