nsz Git - libfirm/blob - ir/opt/loop_unrolling.c

   1 /**
   2  *
   3  * @file loop_unrolling.c
   4  *
   5  * Project:     libFIRM
   6  * File name:   ir/opt/loop_unrolling.c
   7  * Purpose:     Make loop unrolling.
   8  * Author:      Beyhan Veliev
   9  * Modified by:
  10  * Created:     16.11.2004
  11  * CVS-ID:      $Id$
  12  * Copyright:   (c) 2004 Universität Karlsruhe
  13  * Licence:     This file protected by GPL -  GNU GENERAL PUBLIC LICENSE.
  14  */
  15 #ifdef HAVE_CONFIG_H
  16 #include "config.h"
  17 #endif
  18
  19 #ifdef HAVE_STRING_H
  20 #include <string.h>
  21 #endif
  22
  23 #include "loop_unrolling.h"
  24
  25 #include "irnode_t.h"
  26 #include "irgwalk.h"
  27 #include "ircons.h"
  28 #include "irgmod.h"
  29 #include "irloop_t.h"
  30 #include "irgopt_t.h"
  31 #include "irouts.h"
  32 #include "trouts.h"
  33 #include "hashptr.h"
  34 #include "pset.h"
  35 #include "strength_red.h"
  36 #include "compute_loop_info.h"
  37 #include "irdump.h"
  38 #include "irtools.h"
  39 #include "xmalloc.h"
  40
  41 /* We will unroll maximal 4-times.  */
  42 #define MAX_UNROLL 4
  43
  44 /* We will know if the head of the loop to be copy.
  45  * Default "0"  don't copy.*/
  46 static int copy_loop_head;
  47
  48 typedef struct {
  49   ir_node *irn ;                /* Node of the loop to be unrolling*/
  50   ir_node *copy[MAX_UNROLL] ;   /* The copy of the node */
  51 } copies_t;
  52
  53 /**
  54  * Compare two elements of the copies_t set
  55  */
  56 static int set_cmp(const void *elt, const void *key, size_t size)
  57 {
  58   const copies_t *c1 = elt;
  59   const copies_t *c2 = key;
  60
  61   return c1->irn != c2->irn;
  62 }
  63
  64 /**
  65  * Remember the new node in the old node by using a field all nodes have.
  66  */
  67 static INLINE void
  68 set_new_node (ir_node *old, ir_node *new)
  69 {
  70   old->link = new;
  71 }
  72
  73 /*Check if phi in the loop head is.
  74  *
  75  * @param phi               Must be a phi node from the loop.
  76  * @param loop_head         The loop head .
  77  */
  78 static int is_Phi_in_loop_head(ir_node *phi, ir_node *loop_head) {
  79   assert(is_Block(loop_head));
  80   return is_Phi(phi) && (get_nodes_block(phi) == loop_head);
  81 }
  82
  83 /**
  84  * Copies predecessors of node to copies of node.
  85  * If the predecessor is a loop invariant, then the copy get it
  86  * as predecessor, else the copy of the predecessor.
  87  *
  88  * @param l_n                A set, where the node of the loop are saved.
  89  * @param value              A element of the set.
  90  * @param info               Contains information about the induction variable.
  91  * @param unroll_factor      A integer 2 <= unroll_factor <= 4.
  92  * @param env                Free environment pointer.
  93  */
  94 static void
  95 set_preds (set *l_n, copies_t *value, induct_var_info *info, int unroll_factor, void *env)
  96 {
  97   ir_node *loop_head;
  98   int i, p, irn_arity;
  99   copies_t key, *value_pred;
 100   if(value->copy[0] == NULL ||
 101      get_irn_op(value->irn) != get_irn_op(value->copy[0]))
 102     return;
 103
 104   /* The head of the unrolling loop. */
 105   loop_head = get_loop_node(info->l_itervar_phi, 0);
 106   irn_arity = get_irn_arity(value->irn);
 107
 108   for (i = 0; i < irn_arity; i++) {
 109     ir_node *pred = get_irn_n(value->irn, i);
 110
 111     key.irn = pred;
 112     value_pred = set_find(l_n, &key, sizeof(key), HASH_PTR(pred));
 113
 114     if (value->irn != loop_head && !is_Phi_in_loop_head(value->irn, loop_head)) {
 115       if (value_pred == NULL) {
 116               /* Is loop invariant. */
 117               for (p = 0; p < unroll_factor - 1; p++)
 118                 set_irn_n(value->copy[p], i, pred);
 119               /* pred is a loop invariant. The copies of the successors get it as predecessor. */
 120       }
 121       else {
 122               for (p = 0; p < unroll_factor - 1; p++)
 123                 set_irn_n(value->copy[p], i, value_pred->copy[p]);
 124           /* value_pred->irn is a node from the unrolling loop.
 125            * The copies of the successors get the
 126                  * copies of value_pred as predecessor.
 127            */
 128       }
 129     }
 130   }
 131 }
 132
 133 /* Set the backedge of phi in the loop head.The backedge of phis in the loop head
 134  * must now point to the value defined
 135  * in the last copy of the loop body.
 136  *
 137  * @param l_n                A set, where the node of the loop are saved.
 138  * @param value              A phi in the loop head.
 139  * @param info               Contains information about the induction variable.
 140  * @param unroll_factor      A integer 2 <= unroll_factor <= 4.
 141  */
 142 static void
 143 set_phi_backedge(set *l_n, copies_t *value, induct_var_info *info, int unroll_factor)
 144 {
 145   copies_t key, *value_pred;
 146
 147   /* info->op_pred_pos is the backedge position. */
 148   key.irn = get_irn_n(value->irn, info->op_pred_pos);
 149   value_pred = set_find(l_n, &key, sizeof(key), HASH_PTR(key.irn));
 150
 151   /* value->copy[unroll_factor - 2] is the last copy. */
 152   set_Phi_pred(value->irn, info->op_pred_pos, value_pred->copy[unroll_factor - 2]);
 153 }
 154
 155 /**
 156  * Test for a loop head.
 157  *
 158  * Returns true if the node has predecessors in the loop _and_ out of
 159  * the loop.  Then it is a loop head: The loop can be entered through
 160  * this node.
 161  *
 162  * @param n      The node to be tested. Must be a block.
 163  * @param info   Contains the loop information.
 164  */
 165 static int
 166 is_loop_head(ir_node *n, induct_var_info *info)
 167 {
 168   int i, arity;
 169   int some_outof_loop = 0, some_in_loop = 0;
 170
 171   assert(get_irn_op(n) == op_Block);
 172   arity = get_Block_n_cfgpreds(n);
 173
 174   for (i = 0; i < arity; i++) {
 175     ir_node *pred = get_Block_cfgpred(n, i);
 176     assert(pred);
 177     if (is_loop_invariant(pred, get_loop_node(info->l_itervar_phi, 0))) {
 178       some_outof_loop = 1;
 179     } else
 180       some_in_loop = 1;
 181   }
 182
 183   return some_outof_loop && some_in_loop;
 184 }
 185
 186
 187 /**
 188  * Test whether the passed loop is a natural loop.
 189  *
 190  * Returns true if the loop has only one loop header and only a single
 191  * back edge.
 192  *
 193  * @param info  Contains the loop information.
 194  */
 195 static int
 196 is_natural_loop(induct_var_info *info)
 197 {
 198   ir_node *l_node;
 199   int l_n_node = 0, i;
 200
 201   l_n_node = get_loop_n_nodes (info->l_itervar_phi);
 202
 203   for (i = 1; i < (l_n_node); i ++) {
 204     l_node = get_loop_node (info->l_itervar_phi, i);
 205     if (is_loop_head(l_node, info)) return 0;
 206
 207     if (has_backedges(l_node) && i != l_n_node-1) return 0;
 208   }
 209
 210   return 1;
 211 }
 212
 213 /**
 214  * Search for all nodes of a loop.
 215  *
 216  * @param node       The induction variable of the loop.
 217  * @param loop_head  The head of the loop.
 218  * @param l_n        A set, where the node of the loop are saved.
 219  */
 220 static void
 221 find_loop_nodes(ir_node *node, ir_node *loop_head, set *l_n)
 222 {
 223   int i;
 224   copies_t key, *value;
 225
 226   /* Add this node to the list. */
 227   key.irn  = node;
 228
 229   /* Initialize all copies of the added node with NULL.*/
 230   for (i = 0; i < 4; ++i)
 231     key.copy[i] = NULL;
 232   value = set_insert(l_n, &key, sizeof(key), HASH_PTR(key.irn));
 233
 234   /* Add all outs of this node to the list, if they are within the loop. */
 235   for (i = get_irn_n_outs(node) - 1; i >= 0; --i) {
 236     ir_node *pred = get_irn_out(node, i);
 237
 238     key.irn = pred;
 239     if (!is_loop_invariant(pred, loop_head)         &&
 240             set_find( l_n, &key, sizeof(key), HASH_PTR(key.irn)) == NULL ) {
 241       find_loop_nodes(pred, loop_head, l_n);
 242     }
 243   }
 244
 245   /* Add all ins if they are within the loop. */
 246   for (i = get_irn_arity(node) - 1; i >=0; --i) {
 247     ir_node *pred = get_irn_n(node, i);
 248
 249     key.irn = pred;
 250     if (!is_loop_invariant(pred, loop_head)         &&
 251               set_find( l_n, &key, sizeof(key), HASH_PTR(key.irn)) == NULL ) {
 252       find_loop_nodes(pred, loop_head, l_n);
 253     }
 254   }
 255 }
 256
 257 /**
 258  * Make a new loop head if copy_head = 1.
 259  *
 260  * @param l_n              A set, where the node of the loop are saved.
 261  * @param info             Contains the loop information.
 262  * @param value            A element of the set.
 263  * @param unroll_factor    A integer 2 <= unroll_factor <= 4.
 264  * @param copy_head        if non-zero, the loop head will be copied
 265  *
 266  */
 267 static void
 268 new_loop_head(set *l_n, induct_var_info *info, copies_t *value, int unroll_factor, int copy_head)
 269 {
 270   int i;
 271   copies_t block, *value_backedge_jmp, *backedge_jmp_block;
 272   /* The  predecessor of the loop head in the  backedge position */
 273   ir_node *backedge_jmp = get_Block_cfgpred(value->irn, info->op_pred_pos);
 274
 275   block.irn          = backedge_jmp;
 276   value_backedge_jmp = set_find(l_n, &block, sizeof(block), HASH_PTR(block.irn));
 277
 278   if (copy_head) {
 279     /* The first copy of the loop head must point to the loop head.*/
 280     value->copy[0] = new_Block(1, &backedge_jmp);
 281     if(info->l_itervar_phi->flags & do_loop){
 282       value_backedge_jmp->copy[0] = new_r_Jmp(current_ir_graph, value->copy[0]);
 283       for (i = 1; i < unroll_factor - 1; ++i) {
 284       /* all other copies must point to the copy before it in the array. */
 285       value->copy[i] = new_Block(1, &backedge_jmp);
 286       value_backedge_jmp->copy[i] = new_r_Jmp(current_ir_graph, value->copy[i]);
 287       }
 288       for (i = 1; i < unroll_factor - 1; ++i){
 289         set_nodes_block(value_backedge_jmp->copy[i-1], get_nodes_block(backedge_jmp));
 290         set_Block_cfgpred (value->copy[i], 0, value_backedge_jmp->copy[i-1]);
 291       }
 292     }else
 293       for (i = 1; i < unroll_factor - 1; ++i) {
 294         /* all other copies must point to the copy before it in the array. */
 295         set_nodes_block(value_backedge_jmp->copy[i-1], get_nodes_block(backedge_jmp));
 296         value->copy[i] = new_Block(1, &value_backedge_jmp->copy[i-1]);
 297       }
 298   }
 299   else {
 300     /* If the loop head must not be copy. block.irn is the successor of the loop head.*/
 301     block.irn          = get_nodes_block(backedge_jmp);
 302     backedge_jmp_block = set_find(l_n, &block, sizeof(block), HASH_PTR(block.irn));
 303
 304     /* The first copy of block.irn point to it.
 305       The another copies muss point to the copy before it in the array.*/
 306     set_irn_n(backedge_jmp_block->copy[0], 0, value_backedge_jmp->irn) ;
 307
 308     for (i = 1; i < unroll_factor - 1; ++i)
 309       set_irn_n(backedge_jmp_block->copy[i], 0, value_backedge_jmp->copy[i - 1]);
 310   }
 311 }
 312
 313 /* Set all copies of the induction variable.
 314  *
 315  * @param phi             A phi node in the loop head block.
 316  * @param phi_pred        The predecessor of the phi along the backedge.
 317  * @param unroll_factor   A integer 2 <= unroll_factor <= 4.
 318  */
 319 static void
 320 set_Phi_copies(copies_t *phi, copies_t *phi_pred, int unroll_factor)
 321 {
 322   int p;
 323
 324   if(phi_pred != NULL){
 325     /* The first copy of Phi node get the node along the backedge as predecessor. The next copies
 326        the copies of this node.*/
 327     phi->copy[0] = phi_pred->irn;
 328     for (p = 1; p < unroll_factor - 1; ++p) {
 329       /* If two phi nodes are in cycle.  */
 330       if (phi_pred->copy[p - 1] == NULL && get_irn_op(phi_pred->irn) == op_Phi) {
 331         if (p & 1)
 332           phi->copy[p] =  phi->irn;
 333         else
 334           phi->copy[p] =  phi_pred->irn;
 335       } else
 336         phi->copy[p] =  phi_pred->copy[p - 1];
 337     }
 338   }else
 339     /* The predecessors of phi are loop invariant and the copies von phi
 340        must be phi it self.*/
 341     for(p = 0; p < unroll_factor - 1; ++p)
 342       phi->copy[p] = phi->irn;
 343 }
 344
 345 /**
 346  * Decide if the loop head must be copied. A head with important nodes
 347  * must be copied.
 348  *
 349  * @param l_n         A set, where the node of the loop are saved.
 350  * @param info        Contains information about the induction variable.
 351  *
 352  * @return non-zero if the loop head must be copied
 353  */
 354 static int
 355 loop_head_nodes(set *l_n, induct_var_info *info)
 356 {
 357   int must_copy = 0;
 358   copies_t *value;
 359   ir_node *loop_head = get_loop_node(info->l_itervar_phi, 0);
 360
 361   for (value = set_first(l_n); value != NULL; value = set_next(l_n))
 362     if (is_no_Block(value->irn) && get_nodes_block(value->irn) == loop_head)
 363       /* If the loop head contains just this nodes, than must not be copy. */
 364       switch (get_irn_opcode(value->irn)) {
 365       case iro_Cond:
 366               break;
 367       case iro_Phi:
 368               break;
 369       case iro_Proj:
 370               break;
 371       case iro_Const:
 372               break;
 373       case iro_Cmp:
 374               break;
 375       default:
 376               must_copy = 1;
 377       }
 378     return must_copy;
 379 }
 380
 381 /** Copy all loop nodes.
 382  *
 383  * @param l_n             Contains all nodes of the loop.
 384  * @param info            Contains the loop information.
 385  * @param unroll_factor   A integer 2 <= unroll_factor <= 4.
 386  */
 387 static void
 388 copy_loop_body(set *l_n, induct_var_info *info, int unroll_factor)
 389 {
 390   int i, opt;
 391   copies_t *value, *info_op, *phi, *loop_h = NULL, key, *value_block;
 392   ir_node *proj_b, *cond, *proj_1, *proj_0, *loop_head;
 393   proj_b = get_irn_out(info->cmp, 0);
 394   cond   = get_irn_out(proj_b, 0);
 395   proj_0 = get_irn_out(cond, 0);
 396   proj_1 = get_irn_out(cond, 1);
 397
 398   loop_head = get_loop_node(info->l_itervar_phi, 0);
 399   /* I will create some nodes and need the optimization to
 400      be set off.*/
 401   opt = get_optimize();
 402   set_optimize(0);
 403
 404   for (value = set_first(l_n); value != NULL; value = set_next(l_n)) {
 405     if(value->irn == loop_head)
 406       loop_h = value;
 407     else if (is_Phi_in_loop_head(value->irn, loop_head))
 408       phi = value;
 409     else if (copy_loop_head){
 410       if(value->irn == proj_0){
 411         for (i = 0; i < unroll_factor - 1; i++)
 412         /* In the copies of the loop head we replace the cmp branche with a jmp node.
 413         This is just for "for" loops."do" loops are handled in function
 414         new_loop_head().*/
 415         value->copy[i] = new_r_Jmp(current_ir_graph, get_nodes_block(value->irn));
 416       }else if(value->irn != proj_b && value->irn != cond &&
 417         value->irn != proj_1)
 418         /* The loop head must be copied. */
 419         for (i = 0; i < unroll_factor - 1; i++){
 420           copy_irn_to_irg(value->irn, current_ir_graph);
 421           value->copy[i] = get_irn_link(value->irn);
 422         }
 423     } else {
 424       /* The loop head and its nodes must not be copied. */
 425       if((value->irn->op == op_Block             &&
 426         value->irn != loop_head)               ||
 427         (value->irn->op != op_Block             &&
 428         get_nodes_block(value->irn) != loop_head)) {
 429         for (i = 0; i<unroll_factor -1; i++) {
 430           copy_irn_to_irg(value->irn, current_ir_graph);
 431           value->copy[i] = get_irn_link(value->irn);
 432         }
 433       }
 434     }
 435   }
 436
 437   /* Treat the loop head block */
 438   new_loop_head(l_n, info, loop_h, unroll_factor, copy_loop_head);
 439   /* I have created the nodes and I set the optimization
 440      to it old value.*/
 441   set_optimize(opt);
 442
 443   /* Similarly treat the Phis in the loop head block. info->op is the node
 444      along the backedges. */
 445   key.irn = info->op;
 446   info_op = set_find( l_n, &key, sizeof(key), HASH_PTR(key.irn));
 447   assert(info_op->irn == get_Phi_pred(info->itervar_phi, info->op_pred_pos));
 448   for (i = 0; i < get_irn_n_outs(loop_head); ++i) {
 449     ir_node *phi = get_irn_out(loop_head, i);
 450
 451     if (is_Phi(phi)) {
 452       copies_t *phi_pred, *phi_op;
 453
 454       key.irn  = get_Phi_pred(phi, info->op_pred_pos); // info->op;
 455       phi_pred = set_find(l_n, &key, sizeof(key), HASH_PTR(key.irn));
 456       key.irn  = phi;
 457       phi_op   = set_find(l_n, &key, sizeof(key), HASH_PTR(key.irn));
 458       set_Phi_copies(phi_op, phi_pred, unroll_factor);
 459     }
 460   }
 461
 462   for (value = set_first(l_n); value != NULL; value = set_next(l_n)) {
 463
 464     /* Set the predecessors of the copies. */
 465     if (copy_loop_head){
 466       set_preds(l_n, value, info, unroll_factor, NULL);
 467     }else if ((value->irn->op != op_Block) && get_nodes_block(value->irn) != loop_head)
 468       set_preds(l_n, value, info, unroll_factor, NULL);
 469
 470     if (is_Phi_in_loop_head(value->irn, loop_head) &&
 471       has_backedges(value->irn))
 472       /* Set the backedge of phis in the loop head. */
 473       set_phi_backedge(l_n, value, info, unroll_factor);
 474
 475     if ((value->irn->op != op_Block) && !is_Phi_in_loop_head(value->irn, loop_head) &&
 476       value->copy[0] != NULL ) {
 477       ir_node *nodes_block = get_nodes_block(value->irn);
 478
 479       if (!copy_loop_head && nodes_block == loop_head)
 480         continue;
 481
 482       key.irn = nodes_block;
 483       value_block = set_find( l_n, &key, sizeof(key), HASH_PTR(key.irn));
 484       /* Set the copy of the node in the accordant copy of its block. */
 485       for(i = 0; i < unroll_factor - 1; i++){
 486         set_nodes_block(value->copy[i], value_block->copy[i]);
 487       }
 488     }
 489   }
 490   /* I must repair some control flow edges, when the loop head
 491      have been copied.*/
 492   if (copy_loop_head && !(info->l_itervar_phi->flags & do_loop)){
 493     key.irn = proj_0;
 494     value   = set_find( l_n, &key, sizeof(key), HASH_PTR(key.irn));
 495     key.irn = get_irn_out(proj_0, 0);
 496     info_op = set_find( l_n, &key, sizeof(key), HASH_PTR(key.irn));
 497     for (i = 0; i < unroll_factor - 1; i++){
 498       set_Block_cfgpred(info_op->copy[i], 0, value->copy[i]);
 499     }
 500   }
 501 }
 502
 503 /**
 504  * Returns non-zero if a Proj node from the loop has a predecessor that
 505  * can throw an exception.
 506  *
 507  * @param node  A Proj node from the loop.
 508  *
 509  * @return non-zero if the predecessor of the Proj node can throw an exception
 510  */
 511 static int is_exception_possible(ir_node *node)
 512 {
 513   ir_node *pred = get_Proj_pred(node);
 514
 515   /* only fragile ops can throw an exception */
 516   if (! is_fragile_op(pred))
 517     return 0;
 518
 519   /*
 520    * fragile ops that are known NOT to throw
 521    * an exception if their pin_state is op_pin_state_floats
 522    */
 523   return get_irn_pinned(pred) != op_pin_state_floats;
 524 }
 525
 526 /**
 527  * If a node from the loop is predecessor of the end block, then the end block must get all copies
 528  * of this node as predecessors. This is possible with function calls in the unrolling loop.
 529  *
 530  * @param end_block          The end block.
 531  * @param loop_head          The head of the unrolling loop.
 532  * @param l_n                Contains all nodes of the loop.
 533  * @param loop_endblock_outs The set loop_endblock_outs contains all predecessors
 534  *                           of the end block from the unrolling loop.
 535  * @param unroll_factor      A integer 2 <= unroll_factor <= 4.
 536  */
 537 static void
 538 new_end_block(ir_node *end_block, ir_node *loop_head, set *l_n, set *loop_endblock_outs, int unroll_factor)
 539 {
 540   copies_t key, *value;
 541   int set_el, new_preds, all_new_preds, i, q;
 542   int old_preds = get_Block_n_cfgpreds(end_block);   /* All old predecessors of the end block. */
 543   ir_node **all_in;
 544
 545   for (i = 0; i < old_preds; i++) {
 546     ir_node *pred = get_Block_cfgpred(end_block, i);
 547      key.irn = pred;
 548      value = NULL;
 549      value = set_find( l_n, &key, sizeof(key), HASH_PTR(key.irn));
 550
 551      /* If a predecessor of the end block is a Proj from the unrolling loop (for function calls) .*/
 552      if (get_irn_op(pred) == op_Proj && is_exception_possible(pred) && value != NULL &&
 553                !(!copy_loop_head && get_nodes_block(pred) == loop_head))
 554        value =  set_insert(loop_endblock_outs, &key, sizeof(key), HASH_PTR(key.irn));
 555   }
 556
 557   /* The set loop_endblock_outs contains all predecessors of the end block from the unrolling loop,
 558    * set_el the amount of such nodes.
 559    */
 560   set_el = set_count (loop_endblock_outs);
 561
 562   /* If the end block haven't such predecessors, we are finished. */
 563   if (!set_el) return;
 564
 565   new_preds = (unroll_factor - 1) * set_el;          /* All new predecessors of the end block */
 566   all_new_preds = old_preds + new_preds;             /* All predecessors of this block. */
 567   all_in = alloca(sizeof(*all_in) * all_new_preds);  /* A array with size for all predecessors of this block. */
 568
 569   for (i = 0; i < old_preds; i++)
 570     all_in[i] = get_Block_cfgpred(end_block, i);    /* The old predecessors. */
 571
 572   value = set_first(loop_endblock_outs);
 573
 574   for (; value != NULL; value = set_next(loop_endblock_outs)) {
 575     key.irn = value->irn;
 576     value = set_find( l_n, &key, sizeof(key), HASH_PTR(key.irn));
 577     for (q = 0; q < unroll_factor - 1 ; q++) {
 578       all_in[i++] = value->copy[q];                 /* The new predecessors. */
 579     }
 580
 581   }
 582
 583   /* Replace the old predecessors of the end block wit´h the new ones. */
 584   set_irn_in(end_block, all_new_preds, all_in);
 585 }
 586
 587 /** new_after_loop_block
 588  *
 589  * The after loop block must examine the possible exceptions in the loop.
 590  * If a (Proj) node from the loop is predecessor of this block, then the
 591  * after loop block must have as well all copies of this node as predecessors.
 592  *
 593  * @param l_n             Contains all nodes of the loop.
 594  * @param block           A block after the loop.
 595  * @param loop_in         A node from the loop, that is predecessor of the end block.
 596  * @param unroll_factor   An integer 2 <= unroll_factor <= 4.
 597  */
 598 static void
 599 new_after_loop_block (set *l_n, ir_node* block, copies_t *loop_in, int unroll_factor)
 600 {
 601   copies_t key, *value;
 602   int i, p, q, s, old_preds, new_preds, all_new_preds ;
 603   ir_node **all_in;
 604
 605   /* The node from the unrolling loop must be a Proj. */
 606   if (loop_in->irn->op != op_Proj) return;
 607
 608   old_preds = get_Block_n_cfgpreds(block);     /* All old predecessors of this block. */
 609   new_preds = old_preds * (unroll_factor - 1); /* All new predecessors of this block. */
 610   all_new_preds = old_preds + new_preds;       /* All predecessors of this block. */
 611
 612   all_in = alloca(sizeof(*all_in) * all_new_preds); /* An array with size for all predecessors of this block. */
 613
 614   for (i = 0 ; i < old_preds; i++)
 615     all_in[i] = get_Block_cfgpred(block, i);      /* The old predecessors. */
 616
 617   q = old_preds;
 618   for (i = 0; i < old_preds; i++){
 619     key.irn = all_in[i];
 620     value = set_find( l_n, &key, sizeof(key), HASH_PTR(key.irn));
 621     p = 0;
 622     for (s = 0; s < (unroll_factor - 1); s++) {
 623       all_in[q++] = value->copy[p++];          /* The new predecessors. */
 624     }
 625   }
 626   /* Replace the old predecessors of the end block whit the new one. */
 627   set_irn_in(block, all_new_preds, all_in);
 628 }
 629
 630 /** new_after_loop_node
 631  *
 632  * An after loop node (phi or call) must examine the possible exceptions in the loop.
 633  * If a (Proj) node from the loop is predecessor of this node, then the after loop
 634  * node must have as well all copies of this node as predecessors.
 635  *
 636  * @param l_n             Contains all nodes of the loop.
 637  * @param loop_outs       Contains nodes after the loop,that have as predecessor a node from the loop.
 638  * @param node            A node after the loop.
 639  * @param loop_in         A node (Proj) from the loop, that is predecessor of *node.
 640  * @param unroll_factor   An integer 2 <= unroll_factor <= 4.
 641  */
 642 static void
 643 new_after_loop_node(set *l_n, set *loop_outs, ir_node *node, copies_t *loop_in, int unroll_factor)
 644 {
 645   ir_node *pred, *block_pred = NULL, *node_block, *new_phi;
 646   int phi = 0, old_preds, new_preds, all_new_preds, p, q, i, s;
 647   copies_t key, *value = NULL;
 648   ir_node **all_in;
 649
 650   old_preds = get_irn_arity(node);            /* All old predecessors of this node. */
 651   new_preds =old_preds * (unroll_factor - 1); /* All new predecessors of this block. */
 652   all_new_preds = old_preds + new_preds;      /* All predecessors of this block. */
 653
 654   all_in  = alloca(sizeof(*all_in) * all_new_preds); /* An array with size for all predecessors of this block. */
 655
 656   /* Verification Predecessors, successors and operation of node and loop_in.
 657    * loop_in must be a Proj node.
 658    */
 659   if (loop_in->irn->op != op_Proj) return;
 660   /* Node must be operation Phi with mode memory or a Call node. */
 661   if (get_irn_op(node) == op_Phi  &&
 662       get_irn_mode(node) == mode_M){
 663     /* If node is a Phi node,then must have a Call node as successor. */
 664     for (i = 0; i < get_irn_n_outs(node); i++)
 665       if (get_irn_op(get_irn_out(node, i)) == op_Call) {
 666               phi = 1;
 667         break;
 668       }
 669
 670     if (!phi) return;
 671   }
 672
 673   /* The predecessor of loop_in must not be a loop invariant. */
 674   pred = get_Proj_pred(loop_in->irn);
 675   key.irn = pred;
 676   value = set_find( l_n, &key, sizeof(key), HASH_PTR(key.irn));
 677   if (! value) return;
 678
 679   node_block = get_nodes_block(node);
 680
 681   /* The block of node must also have a (Proj) predecessor from the unrolling loop. */
 682   for (i = 0; i < get_Block_n_cfgpreds(node_block); i++) {
 683     block_pred = get_Block_cfgpred(node_block, i);
 684
 685     if (get_irn_op(block_pred) == op_Proj) {
 686       if (get_Proj_pred(block_pred) == pred)
 687         break;
 688     }
 689     else {
 690       block_pred = NULL;
 691     }
 692   }
 693
 694   if (! block_pred) return;
 695
 696   key.irn = block_pred;
 697   value   = set_find(l_n, &key, sizeof(key), HASH_PTR(key.irn));
 698
 699   if (! value)
 700     return;
 701   else
 702     new_after_loop_block(l_n, node_block, value, unroll_factor);
 703
 704   for (i = 0; i < old_preds; ++i)
 705     all_in[i] = get_irn_n(node, i);  /* The old predecessors. */
 706
 707   q = old_preds;
 708   for (i = 0; i < old_preds; ++i) {
 709     key.irn = all_in[i];
 710     value = set_find( l_n, &key, sizeof(key), HASH_PTR(key.irn));
 711     p = 0;
 712     for (s = 0; s < (unroll_factor - 1); ++s) {
 713       all_in[q] = value->copy[p];  /* The new predecessors. */
 714       p++;
 715       q++;
 716     }
 717   }
 718   /* A new phi node with the new predecessors. */
 719   new_phi = new_r_Phi(current_ir_graph, get_nodes_block(node), all_new_preds,all_in,
 720                        get_irn_mode(node));
 721
 722   if (phi)
 723     exchange(node, new_phi);
 724   else{
 725     for (i = 0; i < get_irn_arity(node); ++i)
 726       if (get_irn_n(node, i) == pred)
 727         set_irn_n(node, i, new_phi);
 728   }
 729
 730   /* The set loop_outs contains the visited nodes and their blocks. */
 731   key.irn = node;
 732   value = set_insert(loop_outs, &key, sizeof(key), HASH_PTR(key.irn));
 733   key.irn = get_nodes_block(node);
 734   value = set_insert(loop_outs, &key, sizeof(key), HASH_PTR(key.irn));
 735 }
 736
 737
 738 /* Set the outs of the unrolling loop. All loop outs of a node muss now
 739  * point to the last copy of it. Just phi nodes in the loop head and proj
 740  * nodes save it outs. The all copies of some Projs  have too outs.
 741  *
 742  * @param l_n                Contains all nodes of the loop.
 743  * @param loop_outs          The set contains the visited and changed loop outs.
 744  * @param loop_endblock_outs The set loop_endblock_outs contains all predecessors
 745  *                           of the end block from the unrolling loop.
 746  * @param info               Contains the loop information.
 747  * @param unroll_factor      A integer 2 <= unroll_factor <= 4.
 748  */
 749 static void
 750 set_loop_outs(set *l_n, set *loop_outs, set *loop_endblock_outs, induct_var_info *info, int unroll_factor)
 751 {
 752   copies_t *value, key;
 753   int i, p;
 754   ir_node *loop_head = get_loop_node(info->l_itervar_phi, 0);
 755   ir_node *end_block = get_irg_end_block(current_ir_graph);
 756
 757   for (value = set_first(l_n); value != NULL; value = set_next(l_n)){
 758     if(get_irn_node_nr(value->irn) == 35047)
 759       printf("\nHi\n");
 760     if (! is_Phi_in_loop_head(value->irn, loop_head) &&
 761         (get_irn_opcode(value->irn) == iro_Proj && value->copy[0] != NULL))
 762       for (i = 0; i < get_irn_n_outs(value->irn); ++i) {
 763         key.irn = get_irn_out(value->irn, i);
 764
 765         /* Search for loop outs. */
 766         if (set_find( l_n, &key, sizeof(key), HASH_PTR(key.irn)) == NULL)
 767           if ((key.irn->op == op_Block && get_Block_dom_depth(key.irn)  >
 768                get_Block_dom_depth(loop_head))                          ||
 769               (key.irn->op != op_Block && get_Block_dom_depth(get_nodes_block(key.irn)) >
 770                get_Block_dom_depth(loop_head))) {
 771
 772             for (p = 0; p < get_irn_arity(key.irn); p++)
 773               if (value->irn == get_irn_n(key.irn, p)) {
 774                 if (get_irn_op(value->irn) == op_Proj && is_exception_possible(value->irn)) {
 775                   if (set_find(loop_outs, &key, sizeof(key), HASH_PTR(key.irn)) == NULL) {
 776                     /* If the loop out is for exceptions in the loop. */
 777                     if ((get_irn_op(key.irn) == op_Phi && get_irn_mode(key.irn) == mode_M) ||
 778                         get_irn_op(key.irn) == op_Call)
 779                       new_after_loop_node(l_n,loop_outs, key.irn, value, unroll_factor);
 780                     else
 781                       continue;
 782                   } else
 783                     continue;
 784                 } else
 785                   /* Loop outs in the loop head must be not changed.*/
 786                   if(get_nodes_block(value->irn) != loop_head)
 787                     set_irn_n(key.irn, p, value->copy[unroll_factor-2]);
 788               }
 789           }
 790       }
 791   }
 792   /* This function searches for loop outs associated with function call in the unrolling loop. */
 793   new_end_block (end_block, loop_head, l_n, loop_endblock_outs, unroll_factor);
 794 }
 795
 796 /**
 797  * Unroll the loop body with a factor that must be power of two.
 798  * Called from a post walker.
 799  *
 800  * @param n        An IR node.
 801  * @param env      points to a result
 802  */
 803 static void do_loop_unroll(ir_node *n, void *env)
 804 {
 805   int *unroll_done = env;
 806   induct_var_info info;
 807   copies_t *value;
 808   set *loop_nodes, *loop_outs, *loop_endblock_outs;
 809   ir_node *phi_init, *loop_head;
 810   ir_node *backedge_jmp;
 811   int l_sons = 0, unroll_factor = 0;
 812   tarval *init, *iter_end, *iter_increment,*tarval_null, *tarval_one, *tarval_three, *tarval_two, *diff, *iter_number;
 813   int backedge_pos;
 814
 815   copy_loop_head = 0;
 816   info.itervar_phi = n;
 817
 818   /* The IR node must be a induction variable. */
 819   if (get_irn_op(n) == op_Phi) {
 820     if (is_induction_variable (&info) == NULL)
 821       return;
 822   }
 823   else
 824     return;
 825
 826   /* Brute force limiting of loop body size. */
 827   if (get_loop_n_nodes(info.l_itervar_phi) > 2 ) return;
 828
 829   /* Only unroll loops that compare against a constant for exiting. */
 830   if (info.cmp == NULL) return;
 831
 832   /* We only want to unroll innermost loops. */
 833   l_sons = get_loop_n_sons(info.l_itervar_phi);
 834   if ( l_sons != 0)
 835     return;
 836   /* "do" loops are not implementit gut for this reason
 837       at the time we don't unroll them.*/
 838   if(info.l_itervar_phi->flags & do_loop)
 839     return;
 840
 841   phi_init = get_Phi_pred(info.itervar_phi, info.init_pred_pos);
 842
 843   if ((!(info.l_itervar_phi->flags & loop_is_count_loop))  ||
 844       (info.l_itervar_phi->flags & loop_is_endless)        ||
 845       (info.l_itervar_phi->flags & loop_is_dead)           ||
 846       (info.l_itervar_phi->flags & once))
 847     return;
 848
 849   init           = info.l_itervar_phi->loop_iter_start;
 850   iter_end       = info.l_itervar_phi->loop_iter_end;
 851   iter_increment = info.l_itervar_phi->loop_iter_increment;
 852   tarval_null = get_mode_null(get_tarval_mode(iter_increment));
 853   tarval_one  = get_mode_one(get_tarval_mode(init));
 854
 855   if(tarval_is_double(init) || tarval_is_double(iter_end) || tarval_is_double(iter_increment))
 856     return;
 857
 858   if((tarval_is_negative(init) && tarval_is_negative(iter_end)) ||
 859      (!tarval_is_negative(init) && !tarval_is_negative(iter_end)) ||
 860      (tarval_is_null(init) || tarval_is_null(iter_end)))
 861     diff = tarval_abs(tarval_sub(iter_end, init));
 862   else
 863     diff = tarval_add(tarval_abs(iter_end),tarval_abs(init));
 864
 865   iter_increment = tarval_abs(iter_increment);
 866
 867   if(!(info.l_itervar_phi->flags & do_loop))
 868     diff = tarval_add(diff, tarval_one);
 869   /* Test for the value of unroll factor. */
 870   if (tarval_cmp(tarval_mod(diff,iter_increment), tarval_null) == pn_Cmp_Eq)
 871     iter_number = tarval_div(diff, iter_increment);
 872   else
 873     return;
 874   tarval_two = tarval_add(tarval_one, tarval_one);
 875   tarval_three = tarval_add(tarval_two,tarval_one);
 876
 877   if (tarval_cmp(tarval_mod(iter_number, tarval_three), tarval_null) == pn_Cmp_Eq)
 878     unroll_factor = 3;
 879   else if (tarval_cmp(tarval_mod(iter_number, tarval_two), tarval_null) == pn_Cmp_Eq)
 880     unroll_factor = 2;
 881   else return;
 882
 883   if (get_firm_verbosity())
 884     printf("\nloop unrolling with factor %d \n", unroll_factor);
 885
 886   /* ok, we will do unrolling */
 887   *unroll_done += 1;
 888
 889   /* The unroll factor must be less than 4. */
 890   assert(unroll_factor <= MAX_UNROLL);
 891
 892   loop_head = (is_natural_loop(&info)) ? get_loop_node(info.l_itervar_phi, 0) : NULL;
 893
 894   assert(loop_head != NULL && is_Block(loop_head));
 895
 896   /* We assume, the loop head has exactly one backedge.  The position of
 897      the backedge is in the following variable: */
 898   backedge_pos = (is_backedge(loop_head, 0)) ? 0:1;
 899
 900   /* A set with the nodes to copy. */
 901   loop_nodes = new_set(set_cmp, 8);
 902   /* A set with the loop outs for exceptions. */
 903   loop_outs =  new_set(set_cmp, 8);
 904
 905   /* A set containing all predecessors
 906      of the end block from the unrolling loop. */
 907   loop_endblock_outs = new_set(set_cmp, 8);
 908   /* Find all nodes of the unrolling loop. */
 909   find_loop_nodes(info.itervar_phi, get_loop_node(info.l_itervar_phi, 0), loop_nodes);
 910
 911   /* Decide if the loop head to be copy.*/
 912   copy_loop_head = loop_head_nodes(loop_nodes, &info);
 913
 914   /* Copy all nodes of the unrolling loop, that muss be copy. */
 915   copy_loop_body(loop_nodes, &info, unroll_factor);
 916
 917   backedge_jmp = get_Block_cfgpred(loop_head, backedge_pos);
 918
 919   /* Set the backedge of the loop head. */
 920   for (value = set_first(loop_nodes); value != NULL; value = set_next(loop_nodes)) {
 921     if (value->irn == backedge_jmp)
 922       set_Block_cfgpred(loop_head, backedge_pos, value->copy[unroll_factor-2]);
 923   }
 924   set_loop_outs(loop_nodes, loop_outs, loop_endblock_outs, &info, unroll_factor);
 925 }
 926
 927 /* Performs loop unrolling for the passed graph. */
 928 void optimize_loop_unrolling(ir_graph *irg /* unroll factor, max body size */)
 929 {
 930   ir_graph *rem;
 931   int unroll_done = 0;
 932
 933   if ( !get_opt_loop_unrolling()) return;
 934
 935   rem = current_ir_graph;
 936   current_ir_graph = irg;
 937
 938   /* -- Precompute some information -- */
 939
 940   /* Call algorithm that computes the loop information */
 941   // construct_backedges(irg);
 942   compute_loop_info(irg);
 943   /* Call algorithm that computes the backedges */
 944   // construct_cf_backedges(irg);
 945
 946   /* Call algorithm that computes the dominator trees. */
 947   assure_doms(irg);
 948
 949   /* Call algorithm that computes the out edges */
 950   assure_irg_outs(irg);
 951
 952   collect_phiprojs(irg);
 953
 954   /* -- Search expressions that can be optimized -- */
 955   irg_walk_graph(irg, NULL, do_loop_unroll, &unroll_done);
 956
 957   if (unroll_done) {
 958     /* unrolling was done, all info is invalid */
 959     set_irg_doms_inconsistent(irg);
 960     set_irg_outs_inconsistent(irg);
 961     set_irg_loopinfo_state(current_ir_graph, loopinfo_cf_inconsistent);
 962     set_trouts_inconsistent();
 963     set_irg_callee_info_state(irg, irg_callee_info_inconsistent);
 964   }
 965
 966   current_ir_graph = rem;
 967 }