3 * @file loop_unrolling.c
6 * File name: ir/opt/loop_unrolling.c
7 * Purpose: Make loop unrolling.
8 * Author: Beyhan Veliev
12 * Copyright: (c) 2004 Universität Karlsruhe
13 * Licence: This file protected by GPL - GNU GENERAL PUBLIC LICENSE.
29 # include "loop_unrolling.h"
35 # include "irloop_t.h"
36 # include "irgopt_t.h"
41 # include "strength_red.h"
42 # include "compute_loop_info.h"
45 /* We will unroll maximal 4-times. */
48 /* We will know if the head of the loop to be copy.
49 * Default "0" don't copy.*/
50 static int copy_loop_head;
53 ir_node *irn ; /* Node of the loop to be unrolling*/
54 ir_node *copy[MAX_UNROLL] ; /* The copy of the node */
58 * Compare two elements of the copies_t set
60 static int set_cmp(const void *elt, const void *key, size_t size)
62 const copies_t *c1 = elt;
63 const copies_t *c2 = key;
65 return c1->irn != c2->irn;
68 static INLINE int * new_backedge_arr(struct obstack *obst, int size)
70 int *res = NEW_ARR_D (int, obst, size);
71 memset(res, 0, sizeof(int) * size);
75 static INLINE void new_backedge_info(ir_node *n) {
76 switch(get_irn_opcode(n)) {
78 n->attr.block.cg_backedge = NULL;
79 n->attr.block.backedge = new_backedge_arr(current_ir_graph->obst, get_irn_arity(n));
82 n->attr.phi_backedge = new_backedge_arr(current_ir_graph->obst, get_irn_arity(n));
85 n->attr.filter.backedge = new_backedge_arr(current_ir_graph->obst, get_irn_arity(n));
92 * Remember the new node in the old node by using a field all nodes have.
95 set_new_node (ir_node *old, ir_node *new)
101 * Copies the node to the new obstack. The Ins of the new node point to
102 * the predecessors on the old obstack. For block/phi nodes not all
103 * predecessors might be copied. n->link points to the new node.
104 * For Phi and Block nodes the function allocates in-arrays with an arity
105 * only for useful predecessors. The arity is determined by counting
106 * the non-bad predecessors of the block.
108 * @param n The node to be copied
109 * @param env if non-NULL, the node number attribute will be copied to the new node
112 copy_irn (ir_node *n, void *env)
118 ir_op *op = get_irn_op(n);
119 int copy_node_nr = 0;
122 irg = current_ir_graph;
124 /* The end node looses it's flexible in array. This doesn't matter,
125 as dead node elimination builds End by hand, inlineing doesn't use
127 /* assert(op == op_End || ((_ARR_DESCR(n->in))->cookie != ARR_F_MAGIC)); */
130 /* node copied already */
133 new_arity = get_irn_arity(n);
135 nn = new_ir_node(get_irn_dbg_info(n),
137 NULL, /* no block yet, will be set later */
144 /* Copy the attributes. These might point to additional data. If this
145 was allocated on the old obstack the pointers now are dangling. This
146 frees e.g. the memory of the graph_arr allocated in new_immBlock. */
147 copy_node_attr(n, nn);
148 new_backedge_info(nn);
153 /* for easier debugging, we want to copy the node numbers too */
154 nn->node_nr = n->node_nr;
159 /*Check if phi in the loop head is.
161 * @param phi Must be a phi node from the loop.
162 * @param loop_head The loop head .
164 static int is_Phi_in_loop_head(ir_node *phi, ir_node *loop_head) {
165 assert(is_Block(loop_head));
166 return is_Phi(phi) && (get_nodes_block(phi) == loop_head);
170 * Copies predecessors of node to copies of node.
171 * If the predecessor is a loop invariant, then the copy get it
172 * as predecessor, else the copy of the predecessor.
174 * @param l_n A set, where the node of the loop are saved.
175 * @param value A element of the set.
176 * @param info Contains information about the induction variable.
177 * @param unroll_factor A integer 2 <= unroll_factor <= 4.
178 * @param env Free environment pointer.
181 set_preds (set *l_n, copies_t *value, induct_var_info *info, int unroll_factor, void *env)
184 copies_t key, *value_pred;
185 if(value->copy[0] == NULL ||
186 get_irn_op(value->irn) != get_irn_op(value->copy[0]))
188 // The head of the unrolling loop.
189 ir_node *loop_head = get_loop_node(info->l_itervar_phi, 0);
190 irn_arity = get_irn_arity(value->irn);
192 for (i = 0; i < irn_arity; i++) {
193 ir_node *pred = get_irn_n(value->irn, i);
196 value_pred = set_find(l_n, &key, sizeof(key), HASH_PTR(pred));
198 if (value->irn != loop_head && !is_Phi_in_loop_head(value->irn, loop_head)) {
199 if (value_pred == NULL) {
200 /* Is loop invariant. */
201 for (p = 0; p < unroll_factor - 1; p++)
202 set_irn_n(value->copy[p], i, pred);
203 /* pred is a loop invariant. The copies of the successors get it as predecessor. */
206 for (p = 0; p < unroll_factor - 1; p++)
207 set_irn_n(value->copy[p], i, value_pred->copy[p]);
208 /* value_pred->irn is a node from the unrolling loop.
209 * The copies of the successors get the
210 * copies of value_pred as predecessor.
217 /* Set the backedge of phi in the loop head.The backedge of phis in the loop head
218 * must now point to the value defined
219 * in the last copy of the loop body.
221 * @param l_n A set, where the node of the loop are saved.
222 * @param value A phi in the loop head.
223 * @param info Contains information about the induction variable.
224 * @param unroll_factor A integer 2 <= unroll_factor <= 4.
227 set_phi_backedge(set *l_n, copies_t *value, induct_var_info *info, int unroll_factor)
229 copies_t key, *value_pred;
231 /* info->op_pred_pos is the backedge position. */
232 key.irn = get_irn_n(value->irn, info->op_pred_pos);
233 value_pred = set_find(l_n, &key, sizeof(key), HASH_PTR(key.irn));
235 /* value->copy[unroll_factor - 2] is the last copy. */
236 set_Phi_pred(value->irn, info->op_pred_pos, value_pred->copy[unroll_factor - 2]);
240 * Test for a loop head.
242 * Returns true if the node has predecessors in the loop _and_ out of
243 * the loop. Then it is a loop head: The loop can be entered through
246 * @param n The node to be tested. Must be a block.
247 * @param info Contains the loop information.
250 is_loop_head(ir_node *n, induct_var_info *info)
253 int some_outof_loop = 0, some_in_loop = 0;
255 assert(get_irn_op(n) == op_Block);
256 arity = get_Block_n_cfgpreds(n);
258 for (i = 0; i < arity; i++) {
259 ir_node *pred = get_Block_cfgpred(n, i);
261 if (is_loop_invariant(pred, get_loop_node(info->l_itervar_phi, 0))) {
267 return some_outof_loop && some_in_loop;
272 * Test whether the passed loop is a natural loop.
274 * Returns true if the loop has only one loop header and only a single
277 * @param info Contains the loop information.
280 is_natural_loop(induct_var_info *info)
285 l_n_node = get_loop_n_nodes (info->l_itervar_phi);
287 for (i = 1; i < (l_n_node); i ++) {
288 l_node = get_loop_node (info->l_itervar_phi, i);
289 if (is_loop_head(l_node, info)) return 0;
291 if (has_backedges(l_node) && i != l_n_node-1) return 0;
298 * Search for all nodes of a loop.
300 * @param node The induction variable of the loop.
301 * @param loop_head The head of the loop.
302 * @param l_n A set, where the node of the loop are saved.
305 find_loop_nodes(ir_node *node, ir_node *loop_head, set *l_n)
308 copies_t key, *value;
310 /* Add this node to the list. */
313 /* Initialize all copies of the added node with NULL.*/
314 for (i = 0; i < 4; ++i)
316 value = set_insert(l_n, &key, sizeof(key), HASH_PTR(key.irn));
318 /* Add all outs of this node to the list, if they are within the loop. */
319 for (i = get_irn_n_outs(node) - 1; i >= 0; --i) {
320 ir_node *pred = get_irn_out(node, i);
323 if (!is_loop_invariant(pred, loop_head) &&
324 set_find( l_n, &key, sizeof(key), HASH_PTR(key.irn)) == NULL ) {
325 find_loop_nodes(pred, loop_head, l_n);
329 /* Add all ins if they are within the loop. */
330 for (i = get_irn_arity(node) - 1; i >=0; --i) {
331 ir_node *pred = get_irn_n(node, i);
334 if (!is_loop_invariant(pred, loop_head) &&
335 set_find( l_n, &key, sizeof(key), HASH_PTR(key.irn)) == NULL ) {
336 find_loop_nodes(pred, loop_head, l_n);
342 * Make a new loop head if copy_head = 1.
344 * @param l_n A set, where the node of the loop are saved.
345 * @param info Contains the loop information.
346 * @param value A element of the set.
347 * @param unroll_factor A integer 2 <= unroll_factor <= 4.
348 * @param copy_head if non-zero, the loop head will be copied
352 new_loop_head(set *l_n, induct_var_info *info, copies_t *value, int unroll_factor, int copy_head)
355 copies_t block, *value_backedge_jmp, *backedge_jmp_block;
356 /* The predecessor of the loop head in the backedge position */
357 ir_node *backedge_jmp = get_Block_cfgpred(value->irn, info->op_pred_pos);
359 block.irn = backedge_jmp;
360 value_backedge_jmp = set_find(l_n, &block, sizeof(block), HASH_PTR(block.irn));
363 /* The first copy of the loop head must point to the loop head.*/
364 value->copy[0] = new_Block(1, &backedge_jmp);
365 if(info->l_itervar_phi->flags & do_loop){
366 value_backedge_jmp->copy[0] = new_r_Jmp(current_ir_graph, value->copy[0]);
367 for (i = 1; i < unroll_factor - 1; ++i) {
368 /* all other copies must point to the copy before it in the array. */
369 value->copy[i] = new_Block(1, &backedge_jmp);
370 value_backedge_jmp->copy[i] = new_r_Jmp(current_ir_graph, value->copy[i]);
372 for (i = 1; i < unroll_factor - 1; ++i){
373 set_nodes_block(value_backedge_jmp->copy[i-1], get_nodes_block(backedge_jmp));
374 set_Block_cfgpred (value->copy[i], 0, value_backedge_jmp->copy[i-1]);
377 for (i = 1; i < unroll_factor - 1; ++i) {
378 /* all other copies must point to the copy before it in the array. */
379 set_nodes_block(value_backedge_jmp->copy[i-1], get_nodes_block(backedge_jmp));
380 value->copy[i] = new_Block(1, &value_backedge_jmp->copy[i-1]);
384 /* If the loop head must not be copy. block.irn is the successor of the loop head.*/
385 block.irn = get_nodes_block(backedge_jmp);
386 backedge_jmp_block = set_find(l_n, &block, sizeof(block), HASH_PTR(block.irn));
388 /* The first copy of block.irn point to it.
389 The another copies muss point to the copy before it in the array.*/
390 set_irn_n(backedge_jmp_block->copy[0], 0, value_backedge_jmp->irn) ;
392 for (i = 1; i < unroll_factor - 1; ++i)
393 set_irn_n(backedge_jmp_block->copy[i], 0, value_backedge_jmp->copy[i - 1]);
397 /* Set all copies of the induction variable.
399 * @param phi A phi node in the loop head block.
400 * @param phi_pred The predecessor of the phi along the backedge.
401 * @param unroll_factor A integer 2 <= unroll_factor <= 4.
404 set_Phi_copies(copies_t *phi, copies_t *phi_pred, int unroll_factor)
408 if(phi_pred != NULL){
409 /* The first copy of Phi node get the node along the backedge as predecessor. The next copies
410 the copies of this node.*/
411 phi->copy[0] = phi_pred->irn;
412 for (p = 1; p < unroll_factor - 1; ++p) {
413 /* If two phi nodes are in cycle. */
414 if (phi_pred->copy[p - 1] == NULL && get_irn_op(phi_pred->irn) == op_Phi) {
416 phi->copy[p] = phi->irn;
418 phi->copy[p] = phi_pred->irn;
420 phi->copy[p] = phi_pred->copy[p - 1];
423 /* The predecessors of phi are loop invariant and the copies von phi
424 must be phi it self.*/
425 for(p = 0; p < unroll_factor - 1; ++p)
426 phi->copy[p] = phi->irn;
430 * Decide if the loop head must be copied. A head with important nodes
433 * @param l_n A set, where the node of the loop are saved.
434 * @param info Contains information about the induction variable.
436 * @return non-zero if the loop head must be copied
439 loop_head_nodes(set *l_n, induct_var_info *info)
443 ir_node *loop_head = get_loop_node(info->l_itervar_phi, 0);
445 for (value = set_first(l_n); value != NULL; value = set_next(l_n))
446 if (is_no_Block(value->irn) && get_nodes_block(value->irn) == loop_head)
447 /* If the loop head contains just this nodes, than must not be copy. */
448 switch (get_irn_opcode(value->irn)) {
465 /** Copy all loop nodes.
467 * @param l_n Contains all nodes of the loop.
468 * @param info Contains the loop information.
469 * @param unroll_factor A integer 2 <= unroll_factor <= 4.
472 copy_loop_body(set *l_n, induct_var_info *info, int unroll_factor)
475 copies_t *value, *info_op, *phi, *loop_h = NULL, key, *value_block;
476 ir_node *proj_b, *cond, *proj_1, *proj_0;
477 proj_b = get_irn_out(info->cmp, 0);
478 cond = get_irn_out(proj_b, 0);
479 proj_0 = get_irn_out(cond, 0);
480 proj_1 = get_irn_out(cond, 1);
482 ir_node *loop_head = get_loop_node(info->l_itervar_phi, 0);
483 /* I will create some nodes and need the optimization to
485 opt = get_opt_optimize();
488 for (value = set_first(l_n); value != NULL; value = set_next(l_n)) {
489 if(value->irn == loop_head)
491 else if (is_Phi_in_loop_head(value->irn, loop_head))
493 else if (copy_loop_head){
494 if(value->irn == proj_0){
495 for (i = 0; i < unroll_factor - 1; i++)
496 /* In the copies of the loop head we replace the cmp branche with a jmp node.
497 This is just for "for" loops."do" loops are handled in function
499 value->copy[i] = new_r_Jmp(current_ir_graph, get_nodes_block(value->irn));
500 }else if(value->irn != proj_b && value->irn != cond &&
501 value->irn != proj_1)
502 /* The loop head must be copied. */
503 for (i = 0; i < unroll_factor - 1; i++){
504 copy_irn(value->irn, NULL);
505 value->copy[i] = get_irn_link(value->irn);
508 /* The loop head and its nodes must not be copied. */
509 if((value->irn->op == op_Block &&
510 value->irn != loop_head) ||
511 (value->irn->op != op_Block &&
512 get_nodes_block(value->irn) != loop_head)) {
513 for (i = 0; i<unroll_factor -1; i++){
514 copy_irn(value->irn, NULL);
515 value->copy[i] = get_irn_link(value->irn);
521 /* Treat the loop head block */
522 new_loop_head(l_n, info, loop_h, unroll_factor, copy_loop_head);
523 /* I have created the nodes and I set the optimization
525 set_opt_optimize(opt);
527 /* Similarly treat the Phis in the loop head block. info->op is the node
528 along the backedges. */
530 info_op = set_find( l_n, &key, sizeof(key), HASH_PTR(key.irn));
531 assert(info_op->irn == get_Phi_pred(info->itervar_phi, info->op_pred_pos));
532 for (i = 0; i < get_irn_n_outs(loop_head); ++i) {
533 ir_node *phi = get_irn_out(loop_head, i);
536 copies_t *phi_pred, *phi_op;
538 key.irn = get_Phi_pred(phi, info->op_pred_pos); // info->op;
539 phi_pred = set_find(l_n, &key, sizeof(key), HASH_PTR(key.irn));
541 phi_op = set_find(l_n, &key, sizeof(key), HASH_PTR(key.irn));
542 set_Phi_copies(phi_op, phi_pred, unroll_factor);
546 for (value = set_first(l_n); value != NULL; value = set_next(l_n)) {
548 /* Set the predecessors of the copies. */
550 set_preds(l_n, value, info, unroll_factor, NULL);
551 }else if ((value->irn->op != op_Block) && get_nodes_block(value->irn) != loop_head)
552 set_preds(l_n, value, info, unroll_factor, NULL);
554 if (is_Phi_in_loop_head(value->irn, loop_head) &&
555 has_backedges(value->irn))
556 /* Set the backedge of phis in the loop head. */
557 set_phi_backedge(l_n, value, info, unroll_factor);
559 if ((value->irn->op != op_Block) && !is_Phi_in_loop_head(value->irn, loop_head) &&
560 value->copy[0] != NULL ) {
561 ir_node *nodes_block = get_nodes_block(value->irn);
563 if (!copy_loop_head && nodes_block == loop_head)
566 key.irn = nodes_block;
567 value_block = set_find( l_n, &key, sizeof(key), HASH_PTR(key.irn));
568 /* Set the copy of the node in the accordant copy of its block. */
569 for(i = 0; i < unroll_factor - 1; i++){
570 set_nodes_block(value->copy[i], value_block->copy[i]);
574 /* I muss repaire some control flow edges, when the loop head
576 if (copy_loop_head && !(info->l_itervar_phi->flags & do_loop)){
578 value = set_find( l_n, &key, sizeof(key), HASH_PTR(key.irn));
579 key.irn = get_irn_out(proj_0, 0);
580 info_op = set_find( l_n, &key, sizeof(key), HASH_PTR(key.irn));
581 for (i = 0; i < unroll_factor - 1; i++){
582 set_Block_cfgpred(info_op->copy[i], 0, value->copy[i]);
587 * Returns non-zero if a Proj node from the loop has a predecessor that
588 * can throw an exception.
590 * @param node A Proj node from the loop.
592 * @return non-zero if the predecessor of the Proj node can throw an exception
594 static int is_exception_possible(ir_node *node)
596 ir_node *pred = get_Proj_pred(node);
598 /* only fragile ops can throw an exception */
599 if (! is_fragile_op(pred))
603 * fragile ops that are known NOT to throw
604 * an exception if their pin_state is op_pin_state_floats
606 return get_irn_pinned(pred) != op_pin_state_floats;
610 * If a node from the loop is predecessor of the end block, then the end block must get all copies
611 * of this node as predecessors. This is possible with function calls in the unrolling loop.
613 * @param end_block The end block.
614 * @param loop_head The head of the unrolling loop.
615 * @param l_n Contains all nodes of the loop.
616 * @param loop_endblock_outs The set loop_endblock_outs contains all predecessors
617 * of the end block from the unrolling loop.
618 * @param unroll_factor A integer 2 <= unroll_factor <= 4.
621 new_end_block(ir_node *end_block, ir_node *loop_head, set *l_n, set *loop_endblock_outs, int unroll_factor)
623 copies_t key, *value;
624 int set_el, new_preds, all_new_preds, i, q;
625 int old_preds = get_Block_n_cfgpreds(end_block); /* All old predecessors of the end block. */
628 for (i = 0; i < old_preds; i++) {
629 ir_node *pred = get_Block_cfgpred(end_block, i);
632 value = set_find( l_n, &key, sizeof(key), HASH_PTR(key.irn));
634 /* If a predecessor of the end block is a Proj from the unrolling loop (for function calls) .*/
635 if (get_irn_op(pred) == op_Proj && is_exception_possible(pred) && value != NULL &&
636 !(!copy_loop_head && get_nodes_block(pred) == loop_head))
637 value = set_insert(loop_endblock_outs, &key, sizeof(key), HASH_PTR(key.irn));
640 /* The set loop_endblock_outs contains all predecessors of the end block from the unrolling loop,
641 * set_el the amount of such nodes.
643 set_el = set_count (loop_endblock_outs);
645 /* If the end block haven't such predecessors, we are finished. */
648 new_preds = (unroll_factor - 1) * set_el; /* All new predecessors of the end block */
649 all_new_preds = old_preds + new_preds; /* All predecessors of this block. */
650 all_in = alloca(sizeof(*all_in) * all_new_preds); /* A array with size for all predecessors of this block. */
652 for (i = 0; i < old_preds; i++)
653 all_in[i] = get_Block_cfgpred(end_block, i); /* The old predecessors. */
655 value = set_first(loop_endblock_outs);
657 for (; value != NULL; value = set_next(loop_endblock_outs)) {
658 key.irn = value->irn;
659 value = set_find( l_n, &key, sizeof(key), HASH_PTR(key.irn));
660 for (q = 0; q < unroll_factor - 1 ; q++) {
661 all_in[i++] = value->copy[q]; /* The new predecessors. */
666 /* Replace the old predecessors of the end block wit´h the new ones. */
667 set_irn_in(end_block, all_new_preds, all_in);
670 /** new_after_loop_block
672 * The after loop block must examine the possible exceptions in the loop.
673 * If a (Proj) node from the loop is predecessor of this block, then the
674 * after loop block must have as well all copies of this node as predecessors.
676 * @param l_n Contains all nodes of the loop.
677 * @param block A block after the loop.
678 * @param loop_in A node from the loop, that is predecessor of the end block.
679 * @param unroll_factor An integer 2 <= unroll_factor <= 4.
682 new_after_loop_block (set *l_n, ir_node* block, copies_t *loop_in, int unroll_factor)
684 copies_t key, *value;
685 int i, p, q, s, old_preds, new_preds, all_new_preds ;
688 /* The node from the unrolling loop must be a Proj. */
689 if (loop_in->irn->op != op_Proj) return;
691 old_preds = get_Block_n_cfgpreds(block); /* All old predecessors of this block. */
692 new_preds = old_preds * (unroll_factor - 1); /* All new predecessors of this block. */
693 all_new_preds = old_preds + new_preds; /* All predecessors of this block. */
695 all_in = alloca(sizeof(*all_in) * all_new_preds); /* An array with size for all predecessors of this block. */
697 for (i = 0 ; i < old_preds; i++)
698 all_in[i] = get_Block_cfgpred(block, i); /* The old predecessors. */
701 for (i = 0; i < old_preds; i++){
703 value = set_find( l_n, &key, sizeof(key), HASH_PTR(key.irn));
705 for (s = 0; s < (unroll_factor - 1); s++) {
706 all_in[q++] = value->copy[p++]; /* The new predecessors. */
709 /* Replace the old predecessors of the end block whit the new one. */
710 set_irn_in(block, all_new_preds, all_in);
713 /** new_after_loop_node
715 * An after loop node (phi or call) must examine the possible exceptions in the loop.
716 * If a (Proj) node from the loop is predecessor of this node, then the after loop
717 * node must have as well all copies of this node as predecessors.
719 * @param l_n Contains all nodes of the loop.
720 * @param loop_outs Contains nodes after the loop,that have as predecessor a node from the loop.
721 * @param node A node after the loop.
722 * @param loop_in A node (Proj) from the loop, that is predecessor of *node.
723 * @param unroll_factor An integer 2 <= unroll_factor <= 4.
726 new_after_loop_node(set *l_n, set *loop_outs, ir_node *node, copies_t *loop_in, int unroll_factor)
728 ir_node *pred, *block_pred = NULL, *node_block, *new_phi;
729 int phi = 0, old_preds, new_preds, all_new_preds, p, q, i, s;
730 copies_t key, *value = NULL;
733 old_preds = get_irn_arity(node); /* All old predecessors of this node. */
734 new_preds =old_preds * (unroll_factor - 1); /* All new predecessors of this block. */
735 all_new_preds = old_preds + new_preds; /* All predecessors of this block. */
737 all_in = alloca(sizeof(*all_in) * all_new_preds); /* An array with size for all predecessors of this block. */
739 /* Verification Predecessors, successors and operation of node and loop_in.
740 * loop_in must be a Proj node.
742 if (loop_in->irn->op != op_Proj) return;
743 /* Node must be operation Phi with mode memory or a Call node. */
744 if (get_irn_op(node) == op_Phi &&
745 get_irn_mode(node) == mode_M){
746 /* If node is a Phi node,then must have a Call node as successor. */
747 for (i = 0; i < get_irn_n_outs(node); i++)
748 if (get_irn_op(get_irn_out(node, i)) == op_Call) {
756 /* The predecessor of loop_in must not be a loop invariant. */
757 pred = get_Proj_pred(loop_in->irn);
759 value = set_find( l_n, &key, sizeof(key), HASH_PTR(key.irn));
762 node_block = get_nodes_block(node);
764 /* The block of node must also have a (Proj) predecessor from the unrolling loop. */
765 for (i = 0; i < get_Block_n_cfgpreds(node_block); i++) {
766 block_pred = get_Block_cfgpred(node_block, i);
768 if (get_irn_op(block_pred) == op_Proj) {
769 if (get_Proj_pred(block_pred) == pred)
777 if (! block_pred) return;
779 key.irn = block_pred;
780 value = set_find(l_n, &key, sizeof(key), HASH_PTR(key.irn));
785 new_after_loop_block(l_n, node_block, value, unroll_factor);
787 for (i = 0; i < old_preds; ++i)
788 all_in[i] = get_irn_n(node, i); /* The old predecessors. */
791 for (i = 0; i < old_preds; ++i) {
793 value = set_find( l_n, &key, sizeof(key), HASH_PTR(key.irn));
795 for (s = 0; s < (unroll_factor - 1); ++s) {
796 all_in[q] = value->copy[p]; /* The new predecessors. */
801 /* A new phi node with the new predecessors. */
802 new_phi = new_r_Phi(current_ir_graph, get_nodes_block(node), all_new_preds,all_in,
806 exchange(node, new_phi);
808 for (i = 0; i < get_irn_arity(node); ++i)
809 if (get_irn_n(node, i) == pred)
810 set_irn_n(node, i, new_phi);
813 /* The set loop_outs contains the visited nodes and their blocks. */
815 value = set_insert(loop_outs, &key, sizeof(key), HASH_PTR(key.irn));
816 key.irn = get_nodes_block(node);
817 value = set_insert(loop_outs, &key, sizeof(key), HASH_PTR(key.irn));
821 /* Set the outs of the unrolling loop. All loop outs of a node muss now
822 * point to the last copy of it. Just phi nodes in the loop head and proj
823 * nodes save it outs. The all copies of some Projs have too outs.
825 * @param l_n Contains all nodes of the loop.
826 * @param loop_outs The set contains the visited and changed loop outs.
827 * @param loop_endblock_outs The set loop_endblock_outs contains all predecessors
828 * of the end block from the unrolling loop.
829 * @param info Contains the loop information.
830 * @param unroll_factor A integer 2 <= unroll_factor <= 4.
833 set_loop_outs(set *l_n, set *loop_outs, set *loop_endblock_outs, induct_var_info *info, int unroll_factor)
835 copies_t *value, key;
837 ir_node *loop_head = get_loop_node(info->l_itervar_phi, 0);
838 ir_node *end_block = get_irg_end_block(current_ir_graph);
840 for (value = set_first(l_n); value != NULL; value = set_next(l_n)){
841 if(get_irn_node_nr(value->irn) == 35047)
843 if (! is_Phi_in_loop_head(value->irn, loop_head) &&
844 (get_irn_opcode(value->irn) == iro_Proj && value->copy[0] != NULL))
845 for (i = 0; i < get_irn_n_outs(value->irn); ++i) {
846 key.irn = get_irn_out(value->irn, i);
848 /* Search for loop outs. */
849 if (set_find( l_n, &key, sizeof(key), HASH_PTR(key.irn)) == NULL)
850 if ((key.irn->op == op_Block && get_Block_dom_depth(key.irn) >
851 get_Block_dom_depth(loop_head)) ||
852 (key.irn->op != op_Block && get_Block_dom_depth(get_nodes_block(key.irn)) >
853 get_Block_dom_depth(loop_head))) {
855 for (p = 0; p < get_irn_arity(key.irn); p++)
856 if (value->irn == get_irn_n(key.irn, p)) {
857 if (get_irn_op(value->irn) == op_Proj && is_exception_possible(value->irn)) {
858 if (set_find(loop_outs, &key, sizeof(key), HASH_PTR(key.irn)) == NULL) {
859 /* If the loop out is for exceptions in the loop. */
860 if ((get_irn_op(key.irn) == op_Phi && get_irn_mode(key.irn) == mode_M) ||
861 get_irn_op(key.irn) == op_Call)
862 new_after_loop_node(l_n,loop_outs, key.irn, value, unroll_factor);
868 /* Loop outs in the loop head must be not changed.*/
869 if(get_nodes_block(value->irn) != loop_head)
870 set_irn_n(key.irn, p, value->copy[unroll_factor-2]);
875 /* This function searches for loop outs associated with function call in the unrolling loop. */
876 new_end_block (end_block, loop_head, l_n, loop_endblock_outs, unroll_factor);
880 * Unroll the loop body with a factor that must be power of two.
881 * Called from a post walker.
883 * @param n An IR node.
884 * @param env points to a result
886 static void do_loop_unroll(ir_node *n, void *env)
888 int *unroll_done = env;
889 induct_var_info info;
891 set *loop_nodes, *loop_outs, *loop_endblock_outs;
892 ir_node *phi_init, *loop_head;
893 ir_node *backedge_jmp;
894 int l_sons = 0, unroll_factor = 0;
895 tarval *init, *iter_end, *iter_increment,*tarval_null, *tarval_one, *tarval_three, *tarval_two, *diff, *iter_number;
899 info.itervar_phi = n;
901 /* The IR node must be a induction variable. */
902 if (get_irn_op(n) == op_Phi) {
903 if (is_induction_variable (&info) == NULL)
909 /* Brute force limiting of loop body size. */
910 if (get_loop_n_nodes(info.l_itervar_phi) > 2 ) return;
912 /* Only unroll loops that compare against a constant for exiting. */
913 if (info.cmp == NULL) return;
915 /* We only want to unroll innermost loops. */
916 l_sons = get_loop_n_sons(info.l_itervar_phi);
919 /* "do" loops are not implementit gut for this reason
920 at the time we don't unroll them.*/
921 if(info.l_itervar_phi->flags & do_loop)
924 phi_init = get_Phi_pred(info.itervar_phi, info.init_pred_pos);
926 if ((!(info.l_itervar_phi->flags & loop_is_count_loop)) ||
927 (info.l_itervar_phi->flags & loop_is_endless) ||
928 (info.l_itervar_phi->flags & loop_is_dead) ||
929 (info.l_itervar_phi->flags & once))
932 init = info.l_itervar_phi->loop_iter_start;
933 iter_end = info.l_itervar_phi->loop_iter_end;
934 iter_increment = info.l_itervar_phi->loop_iter_increment;
935 tarval_null = get_mode_null(get_tarval_mode(iter_increment));
936 tarval_one = get_mode_one(get_tarval_mode(init));
938 if(tarval_is_double(init) || tarval_is_double(iter_end) || tarval_is_double(iter_increment))
941 if((tarval_is_negative(init) && tarval_is_negative(iter_end)) ||
942 (!tarval_is_negative(init) && !tarval_is_negative(iter_end)) ||
943 (tarval_is_null(init) || tarval_is_null(iter_end)))
944 diff = tarval_abs(tarval_sub(iter_end, init));
946 diff = tarval_add(tarval_abs(iter_end),tarval_abs(init));
948 iter_increment = tarval_abs(iter_increment);
950 if(!(info.l_itervar_phi->flags & do_loop))
951 diff = tarval_add(diff, tarval_one);
952 /* Test for the value of unroll factor. */
953 if (tarval_cmp(tarval_mod(diff,iter_increment), tarval_null) == pn_Cmp_Eq)
954 iter_number = tarval_div(diff, iter_increment);
957 tarval_two = tarval_add(tarval_one, tarval_one);
958 tarval_three = tarval_add(tarval_two,tarval_one);
960 if (tarval_cmp(tarval_mod(iter_number, tarval_three), tarval_null) == pn_Cmp_Eq)
962 else if (tarval_cmp(tarval_mod(iter_number, tarval_two), tarval_null) == pn_Cmp_Eq)
966 if (get_firm_verbosity())
967 printf("\nloop unrolling with factor %d \n", unroll_factor);
969 /* ok, we will do unrolling */
972 /* The unroll factor must be less than 4. */
973 assert(unroll_factor <= MAX_UNROLL);
975 loop_head = (is_natural_loop(&info)) ? get_loop_node(info.l_itervar_phi, 0) : NULL;
977 assert(loop_head != NULL && is_Block(loop_head));
979 /* We assume, the loop head has exactly one backedge. The position of
980 the backedge is in the following variable: */
981 backedge_pos = (is_backedge(loop_head, 0)) ? 0:1;
983 /* A set with the nodes to copy. */
984 loop_nodes = new_set(set_cmp, 8);
985 /* A set with the loop outs for exceptions. */
986 loop_outs = new_set(set_cmp, 8);
988 /* A set containing all predecessors
989 of the end block from the unrolling loop. */
990 loop_endblock_outs = new_set(set_cmp, 8);
991 /* Find all nodes of the unrolling loop. */
992 find_loop_nodes(info.itervar_phi, get_loop_node(info.l_itervar_phi, 0), loop_nodes);
994 /* Decide if the loop head to be copy.*/
995 copy_loop_head = loop_head_nodes(loop_nodes, &info);
997 /* Copy all nodes of the unrolling loop, that muss be copy. */
998 copy_loop_body(loop_nodes, &info, unroll_factor);
1000 backedge_jmp = get_Block_cfgpred(loop_head, backedge_pos);
1002 /* Set the backedge of the loop head. */
1003 for (value = set_first(loop_nodes); value != NULL; value = set_next(loop_nodes)) {
1004 if (value->irn == backedge_jmp)
1005 set_Block_cfgpred(loop_head, backedge_pos, value->copy[unroll_factor-2]);
1007 set_loop_outs(loop_nodes, loop_outs, loop_endblock_outs, &info, unroll_factor);
1010 /* Performs loop unrolling for the passed graph. */
1011 void optimize_loop_unrolling(ir_graph *irg /* unroll factor, max body size */)
1014 int unroll_done = 0;
1016 if ( !get_opt_loop_unrolling()) return;
1018 rem = current_ir_graph;
1019 current_ir_graph = irg;
1021 /* -- Precompute some information -- */
1023 /* Call algorithm that computes the loop information */
1024 // construct_backedges(irg);
1025 compute_loop_info(irg);
1026 /* Call algorithm that computes the backedges */
1027 // construct_cf_backedges(irg);
1028 dump_loop_tree(current_ir_graph, "-deadlooptree");
1030 /* Call algorithm that computes the dominator trees. */
1033 /* Call algorithm that computes the out edges */
1034 compute_irg_outs(irg);
1036 collect_phiprojs(irg);
1038 /* -- Search expressions that can be optimized -- */
1039 irg_walk_graph(irg, NULL, do_loop_unroll, &unroll_done);
1042 /* unrolling was done, all info is invalid */
1043 set_irg_dom_inconsistent(irg);
1044 set_irg_outs_inconsistent(irg);
1045 set_irg_loopinfo_state(current_ir_graph, loopinfo_cf_inconsistent);
1046 set_trouts_inconsistent();
1047 set_irg_callee_info_state(irg, irg_callee_info_inconsistent);
1050 current_ir_graph = rem;