2 * Copyright (C) 1995-2007 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @file loop_unrolling.c
25 * File name: ir/opt/loop_unrolling.c
26 * Purpose: Make loop unrolling.
27 * Author: Beyhan Veliev
31 * Copyright: (c) 2004 Universität Karlsruhe
39 #include "iroptimize.h"
50 #include "strength_red_t.h"
51 #include "compute_loop_info.h"
56 /* We will unroll maximal 4-times. */
59 /* We will know if the head of the loop to be copy.
60 * Default "0" don't copy.*/
61 static int copy_loop_head;
64 ir_node *irn ; /* Node of the loop to be unrolling*/
65 ir_node *copy[MAX_UNROLL] ; /* The copy of the node */
69 * Compare two elements of the copies_t set
71 static int set_cmp(const void *elt, const void *key, size_t size)
73 const copies_t *c1 = elt;
74 const copies_t *c2 = key;
76 return c1->irn != c2->irn;
80 * Remember the new node in the old node by using a field all nodes have.
83 set_new_node (ir_node *old, ir_node *new)
88 /*Check if phi in the loop head is.
90 * @param phi Must be a phi node from the loop.
91 * @param loop_head The loop head .
93 static int is_Phi_in_loop_head(ir_node *phi, ir_node *loop_head) {
94 assert(is_Block(loop_head));
95 return is_Phi(phi) && (get_nodes_block(phi) == loop_head);
99 * Copies predecessors of node to copies of node.
100 * If the predecessor is a loop invariant, then the copy get it
101 * as predecessor, else the copy of the predecessor.
103 * @param l_n A set, where the node of the loop are saved.
104 * @param value A element of the set.
105 * @param info Contains information about the induction variable.
106 * @param unroll_factor A integer 2 <= unroll_factor <= 4.
107 * @param env Free environment pointer.
110 set_preds (set *l_n, copies_t *value, induct_var_info *info, int unroll_factor, void *env)
114 copies_t key, *value_pred;
115 if(value->copy[0] == NULL ||
116 get_irn_op(value->irn) != get_irn_op(value->copy[0]))
119 /* The head of the unrolling loop. */
120 loop_head = get_loop_node(info->l_itervar_phi, 0);
121 irn_arity = get_irn_arity(value->irn);
123 for (i = 0; i < irn_arity; i++) {
124 ir_node *pred = get_irn_n(value->irn, i);
127 value_pred = set_find(l_n, &key, sizeof(key), HASH_PTR(pred));
129 if (value->irn != loop_head && !is_Phi_in_loop_head(value->irn, loop_head)) {
130 if (value_pred == NULL) {
131 /* Is loop invariant. */
132 for (p = 0; p < unroll_factor - 1; p++)
133 set_irn_n(value->copy[p], i, pred);
134 /* pred is a loop invariant. The copies of the successors get it as predecessor. */
137 for (p = 0; p < unroll_factor - 1; p++)
138 set_irn_n(value->copy[p], i, value_pred->copy[p]);
139 /* value_pred->irn is a node from the unrolling loop.
140 * The copies of the successors get the
141 * copies of value_pred as predecessor.
148 /* Set the backedge of phi in the loop head.The backedge of phis in the loop head
149 * must now point to the value defined
150 * in the last copy of the loop body.
152 * @param l_n A set, where the node of the loop are saved.
153 * @param value A phi in the loop head.
154 * @param info Contains information about the induction variable.
155 * @param unroll_factor A integer 2 <= unroll_factor <= 4.
158 set_phi_backedge(set *l_n, copies_t *value, induct_var_info *info, int unroll_factor)
160 copies_t key, *value_pred;
162 /* info->op_pred_pos is the backedge position. */
163 key.irn = get_irn_n(value->irn, info->op_pred_pos);
164 value_pred = set_find(l_n, &key, sizeof(key), HASH_PTR(key.irn));
166 /* value->copy[unroll_factor - 2] is the last copy. */
167 set_Phi_pred(value->irn, info->op_pred_pos, value_pred->copy[unroll_factor - 2]);
171 * Test for a loop head.
173 * Returns true if the node has predecessors in the loop _and_ out of
174 * the loop. Then it is a loop head: The loop can be entered through
177 * @param n The node to be tested. Must be a block.
178 * @param info Contains the loop information.
181 is_loop_head(ir_node *n, induct_var_info *info)
184 int some_outof_loop = 0, some_in_loop = 0;
186 assert(get_irn_op(n) == op_Block);
187 arity = get_Block_n_cfgpreds(n);
189 for (i = 0; i < arity; i++) {
190 ir_node *pred = get_Block_cfgpred(n, i);
192 if (is_loop_invariant(pred, get_loop_node(info->l_itervar_phi, 0))) {
198 return some_outof_loop && some_in_loop;
203 * Test whether the passed loop is a natural loop.
205 * Returns true if the loop has only one loop header and only a single
208 * @param info Contains the loop information.
211 is_natural_loop(induct_var_info *info)
216 l_n_node = get_loop_n_nodes (info->l_itervar_phi);
218 for (i = 1; i < (l_n_node); i ++) {
219 l_node = get_loop_node (info->l_itervar_phi, i);
220 if (is_loop_head(l_node, info)) return 0;
222 if (has_backedges(l_node) && i != l_n_node-1) return 0;
229 * Search for all nodes of a loop.
231 * @param node The induction variable of the loop.
232 * @param loop_head The head of the loop.
233 * @param l_n A set, where the node of the loop are saved.
236 find_loop_nodes(ir_node *node, ir_node *loop_head, set *l_n)
239 copies_t key, *value;
241 /* Add this node to the list. */
244 /* Initialize all copies of the added node with NULL.*/
245 for (i = 0; i < 4; ++i)
247 value = set_insert(l_n, &key, sizeof(key), HASH_PTR(key.irn));
249 /* Add all outs of this node to the list, if they are within the loop. */
250 for (i = get_irn_n_outs(node) - 1; i >= 0; --i) {
251 ir_node *pred = get_irn_out(node, i);
254 if (!is_loop_invariant(pred, loop_head) &&
255 set_find( l_n, &key, sizeof(key), HASH_PTR(key.irn)) == NULL ) {
256 find_loop_nodes(pred, loop_head, l_n);
260 /* Add all ins if they are within the loop. */
261 for (i = get_irn_arity(node) - 1; i >=0; --i) {
262 ir_node *pred = get_irn_n(node, i);
265 if (!is_loop_invariant(pred, loop_head) &&
266 set_find( l_n, &key, sizeof(key), HASH_PTR(key.irn)) == NULL ) {
267 find_loop_nodes(pred, loop_head, l_n);
273 * Make a new loop head if copy_head = 1.
275 * @param l_n A set, where the node of the loop are saved.
276 * @param info Contains the loop information.
277 * @param value A element of the set.
278 * @param unroll_factor A integer 2 <= unroll_factor <= 4.
279 * @param copy_head if non-zero, the loop head will be copied
283 new_loop_head(set *l_n, induct_var_info *info, copies_t *value, int unroll_factor, int copy_head)
286 copies_t block, *value_backedge_jmp, *backedge_jmp_block;
287 /* The predecessor of the loop head in the backedge position */
288 ir_node *backedge_jmp = get_Block_cfgpred(value->irn, info->op_pred_pos);
290 block.irn = backedge_jmp;
291 value_backedge_jmp = set_find(l_n, &block, sizeof(block), HASH_PTR(block.irn));
294 /* The first copy of the loop head must point to the loop head.*/
295 value->copy[0] = new_Block(1, &backedge_jmp);
296 if(info->l_itervar_phi->flags & do_loop){
297 value_backedge_jmp->copy[0] = new_r_Jmp(current_ir_graph, value->copy[0]);
298 for (i = 1; i < unroll_factor - 1; ++i) {
299 /* all other copies must point to the copy before it in the array. */
300 value->copy[i] = new_Block(1, &backedge_jmp);
301 value_backedge_jmp->copy[i] = new_r_Jmp(current_ir_graph, value->copy[i]);
303 for (i = 1; i < unroll_factor - 1; ++i){
304 set_nodes_block(value_backedge_jmp->copy[i-1], get_nodes_block(backedge_jmp));
305 set_Block_cfgpred (value->copy[i], 0, value_backedge_jmp->copy[i-1]);
308 for (i = 1; i < unroll_factor - 1; ++i) {
309 /* all other copies must point to the copy before it in the array. */
310 set_nodes_block(value_backedge_jmp->copy[i-1], get_nodes_block(backedge_jmp));
311 value->copy[i] = new_Block(1, &value_backedge_jmp->copy[i-1]);
315 /* If the loop head must not be copy. block.irn is the successor of the loop head.*/
316 block.irn = get_nodes_block(backedge_jmp);
317 backedge_jmp_block = set_find(l_n, &block, sizeof(block), HASH_PTR(block.irn));
319 /* The first copy of block.irn point to it.
320 The another copies muss point to the copy before it in the array.*/
321 set_irn_n(backedge_jmp_block->copy[0], 0, value_backedge_jmp->irn) ;
323 for (i = 1; i < unroll_factor - 1; ++i)
324 set_irn_n(backedge_jmp_block->copy[i], 0, value_backedge_jmp->copy[i - 1]);
328 /* Set all copies of the induction variable.
330 * @param phi A phi node in the loop head block.
331 * @param phi_pred The predecessor of the phi along the backedge.
332 * @param unroll_factor A integer 2 <= unroll_factor <= 4.
335 set_Phi_copies(copies_t *phi, copies_t *phi_pred, int unroll_factor)
339 if(phi_pred != NULL){
340 /* The first copy of Phi node get the node along the backedge as predecessor. The next copies
341 the copies of this node.*/
342 phi->copy[0] = phi_pred->irn;
343 for (p = 1; p < unroll_factor - 1; ++p) {
344 /* If two phi nodes are in cycle. */
345 if (phi_pred->copy[p - 1] == NULL && get_irn_op(phi_pred->irn) == op_Phi) {
347 phi->copy[p] = phi->irn;
349 phi->copy[p] = phi_pred->irn;
351 phi->copy[p] = phi_pred->copy[p - 1];
354 /* The predecessors of phi are loop invariant and the copies von phi
355 must be phi it self.*/
356 for(p = 0; p < unroll_factor - 1; ++p)
357 phi->copy[p] = phi->irn;
361 * Decide if the loop head must be copied. A head with important nodes
364 * @param l_n A set, where the node of the loop are saved.
365 * @param info Contains information about the induction variable.
367 * @return non-zero if the loop head must be copied
370 loop_head_nodes(set *l_n, induct_var_info *info)
374 ir_node *loop_head = get_loop_node(info->l_itervar_phi, 0);
376 for (value = set_first(l_n); value != NULL; value = set_next(l_n))
377 if (is_no_Block(value->irn) && get_nodes_block(value->irn) == loop_head)
378 /* If the loop head contains just this nodes, than must not be copy. */
379 switch (get_irn_opcode(value->irn)) {
396 /** Copy all loop nodes.
398 * @param l_n Contains all nodes of the loop.
399 * @param info Contains the loop information.
400 * @param unroll_factor A integer 2 <= unroll_factor <= 4.
403 copy_loop_body(set *l_n, induct_var_info *info, int unroll_factor)
406 copies_t *value, *info_op, *phi, *loop_h = NULL, key, *value_block;
407 ir_node *proj_b, *cond, *proj_1, *proj_0, *loop_head;
408 proj_b = get_irn_out(info->cmp, 0);
409 cond = get_irn_out(proj_b, 0);
410 proj_0 = get_irn_out(cond, 0);
411 proj_1 = get_irn_out(cond, 1);
413 loop_head = get_loop_node(info->l_itervar_phi, 0);
414 /* I will create some nodes and need the optimization to
416 opt = get_optimize();
419 for (value = set_first(l_n); value != NULL; value = set_next(l_n)) {
420 if(value->irn == loop_head)
422 else if (is_Phi_in_loop_head(value->irn, loop_head))
424 else if (copy_loop_head){
425 if(value->irn == proj_0){
426 for (i = 0; i < unroll_factor - 1; i++)
427 /* In the copies of the loop head we replace the cmp branche with a jmp node.
428 This is just for "for" loops."do" loops are handled in function
430 value->copy[i] = new_r_Jmp(current_ir_graph, get_nodes_block(value->irn));
431 }else if(value->irn != proj_b && value->irn != cond &&
432 value->irn != proj_1)
433 /* The loop head must be copied. */
434 for (i = 0; i < unroll_factor - 1; i++){
435 copy_irn_to_irg(value->irn, current_ir_graph);
436 value->copy[i] = get_irn_link(value->irn);
439 /* The loop head and its nodes must not be copied. */
440 if((value->irn->op == op_Block &&
441 value->irn != loop_head) ||
442 (value->irn->op != op_Block &&
443 get_nodes_block(value->irn) != loop_head)) {
444 for (i = 0; i<unroll_factor -1; i++) {
445 copy_irn_to_irg(value->irn, current_ir_graph);
446 value->copy[i] = get_irn_link(value->irn);
452 /* Treat the loop head block */
453 new_loop_head(l_n, info, loop_h, unroll_factor, copy_loop_head);
454 /* I have created the nodes and I set the optimization
458 /* Similarly treat the Phis in the loop head block. info->op is the node
459 along the backedges. */
461 info_op = set_find( l_n, &key, sizeof(key), HASH_PTR(key.irn));
462 assert(info_op->irn == get_Phi_pred(info->itervar_phi, info->op_pred_pos));
463 for (i = 0; i < get_irn_n_outs(loop_head); ++i) {
464 ir_node *phi = get_irn_out(loop_head, i);
467 copies_t *phi_pred, *phi_op;
469 key.irn = get_Phi_pred(phi, info->op_pred_pos); // info->op;
470 phi_pred = set_find(l_n, &key, sizeof(key), HASH_PTR(key.irn));
472 phi_op = set_find(l_n, &key, sizeof(key), HASH_PTR(key.irn));
473 set_Phi_copies(phi_op, phi_pred, unroll_factor);
477 for (value = set_first(l_n); value != NULL; value = set_next(l_n)) {
479 /* Set the predecessors of the copies. */
481 set_preds(l_n, value, info, unroll_factor, NULL);
482 }else if ((value->irn->op != op_Block) && get_nodes_block(value->irn) != loop_head)
483 set_preds(l_n, value, info, unroll_factor, NULL);
485 if (is_Phi_in_loop_head(value->irn, loop_head) &&
486 has_backedges(value->irn))
487 /* Set the backedge of phis in the loop head. */
488 set_phi_backedge(l_n, value, info, unroll_factor);
490 if ((value->irn->op != op_Block) && !is_Phi_in_loop_head(value->irn, loop_head) &&
491 value->copy[0] != NULL ) {
492 ir_node *nodes_block = get_nodes_block(value->irn);
494 if (!copy_loop_head && nodes_block == loop_head)
497 key.irn = nodes_block;
498 value_block = set_find( l_n, &key, sizeof(key), HASH_PTR(key.irn));
499 /* Set the copy of the node in the accordant copy of its block. */
500 for(i = 0; i < unroll_factor - 1; i++){
501 set_nodes_block(value->copy[i], value_block->copy[i]);
505 /* I must repair some control flow edges, when the loop head
507 if (copy_loop_head && !(info->l_itervar_phi->flags & do_loop)){
509 value = set_find( l_n, &key, sizeof(key), HASH_PTR(key.irn));
510 key.irn = get_irn_out(proj_0, 0);
511 info_op = set_find( l_n, &key, sizeof(key), HASH_PTR(key.irn));
512 for (i = 0; i < unroll_factor - 1; i++){
513 set_Block_cfgpred(info_op->copy[i], 0, value->copy[i]);
519 * Returns non-zero if a Proj node from the loop has a predecessor that
520 * can throw an exception.
522 * @param node A Proj node from the loop.
524 * @return non-zero if the predecessor of the Proj node can throw an exception
526 static int is_exception_possible(ir_node *node)
528 ir_node *pred = get_Proj_pred(node);
530 /* only fragile ops can throw an exception */
531 if (! is_fragile_op(pred))
535 * fragile ops that are known NOT to throw
536 * an exception if their pin_state is op_pin_state_floats
538 return get_irn_pinned(pred) != op_pin_state_floats;
542 * If a node from the loop is predecessor of the end block, then the end block must get all copies
543 * of this node as predecessors. This is possible with function calls in the unrolling loop.
545 * @param end_block The end block.
546 * @param loop_head The head of the unrolling loop.
547 * @param l_n Contains all nodes of the loop.
548 * @param loop_endblock_outs The set loop_endblock_outs contains all predecessors
549 * of the end block from the unrolling loop.
550 * @param unroll_factor A integer 2 <= unroll_factor <= 4.
553 new_end_block(ir_node *end_block, ir_node *loop_head, set *l_n, set *loop_endblock_outs, int unroll_factor)
555 copies_t key, *value;
556 int set_el, new_preds, all_new_preds, i, q;
557 int old_preds = get_Block_n_cfgpreds(end_block); /* All old predecessors of the end block. */
560 for (i = 0; i < old_preds; i++) {
561 ir_node *pred = get_Block_cfgpred(end_block, i);
564 value = set_find( l_n, &key, sizeof(key), HASH_PTR(key.irn));
566 /* If a predecessor of the end block is a Proj from the unrolling loop (for function calls) .*/
567 if (get_irn_op(pred) == op_Proj && is_exception_possible(pred) && value != NULL &&
568 !(!copy_loop_head && get_nodes_block(pred) == loop_head))
569 value = set_insert(loop_endblock_outs, &key, sizeof(key), HASH_PTR(key.irn));
572 /* The set loop_endblock_outs contains all predecessors of the end block from the unrolling loop,
573 * set_el the amount of such nodes.
575 set_el = set_count (loop_endblock_outs);
577 /* If the end block haven't such predecessors, we are finished. */
580 new_preds = (unroll_factor - 1) * set_el; /* All new predecessors of the end block */
581 all_new_preds = old_preds + new_preds; /* All predecessors of this block. */
582 all_in = alloca(sizeof(*all_in) * all_new_preds); /* A array with size for all predecessors of this block. */
584 for (i = 0; i < old_preds; i++)
585 all_in[i] = get_Block_cfgpred(end_block, i); /* The old predecessors. */
587 value = set_first(loop_endblock_outs);
589 for (; value != NULL; value = set_next(loop_endblock_outs)) {
590 key.irn = value->irn;
591 value = set_find( l_n, &key, sizeof(key), HASH_PTR(key.irn));
592 for (q = 0; q < unroll_factor - 1 ; q++) {
593 all_in[i++] = value->copy[q]; /* The new predecessors. */
598 /* Replace the old predecessors of the end block wit´h the new ones. */
599 set_irn_in(end_block, all_new_preds, all_in);
602 /** new_after_loop_block
604 * The after loop block must examine the possible exceptions in the loop.
605 * If a (Proj) node from the loop is predecessor of this block, then the
606 * after loop block must have as well all copies of this node as predecessors.
608 * @param l_n Contains all nodes of the loop.
609 * @param block A block after the loop.
610 * @param loop_in A node from the loop, that is predecessor of the end block.
611 * @param unroll_factor An integer 2 <= unroll_factor <= 4.
614 new_after_loop_block (set *l_n, ir_node* block, copies_t *loop_in, int unroll_factor)
616 copies_t key, *value;
617 int i, p, q, s, old_preds, new_preds, all_new_preds ;
620 /* The node from the unrolling loop must be a Proj. */
621 if (loop_in->irn->op != op_Proj) return;
623 old_preds = get_Block_n_cfgpreds(block); /* All old predecessors of this block. */
624 new_preds = old_preds * (unroll_factor - 1); /* All new predecessors of this block. */
625 all_new_preds = old_preds + new_preds; /* All predecessors of this block. */
627 all_in = alloca(sizeof(*all_in) * all_new_preds); /* An array with size for all predecessors of this block. */
629 for (i = 0 ; i < old_preds; i++)
630 all_in[i] = get_Block_cfgpred(block, i); /* The old predecessors. */
633 for (i = 0; i < old_preds; i++){
635 value = set_find( l_n, &key, sizeof(key), HASH_PTR(key.irn));
637 for (s = 0; s < (unroll_factor - 1); s++) {
638 all_in[q++] = value->copy[p++]; /* The new predecessors. */
641 /* Replace the old predecessors of the end block whit the new one. */
642 set_irn_in(block, all_new_preds, all_in);
645 /** new_after_loop_node
647 * An after loop node (phi or call) must examine the possible exceptions in the loop.
648 * If a (Proj) node from the loop is predecessor of this node, then the after loop
649 * node must have as well all copies of this node as predecessors.
651 * @param l_n Contains all nodes of the loop.
652 * @param loop_outs Contains nodes after the loop,that have as predecessor a node from the loop.
653 * @param node A node after the loop.
654 * @param loop_in A node (Proj) from the loop, that is predecessor of *node.
655 * @param unroll_factor An integer 2 <= unroll_factor <= 4.
658 new_after_loop_node(set *l_n, set *loop_outs, ir_node *node, copies_t *loop_in, int unroll_factor)
660 ir_node *pred, *block_pred = NULL, *node_block, *new_phi;
661 int phi = 0, old_preds, new_preds, all_new_preds, p, q, i, s;
662 copies_t key, *value = NULL;
665 old_preds = get_irn_arity(node); /* All old predecessors of this node. */
666 new_preds =old_preds * (unroll_factor - 1); /* All new predecessors of this block. */
667 all_new_preds = old_preds + new_preds; /* All predecessors of this block. */
669 all_in = alloca(sizeof(*all_in) * all_new_preds); /* An array with size for all predecessors of this block. */
671 /* Verification Predecessors, successors and operation of node and loop_in.
672 * loop_in must be a Proj node.
674 if (loop_in->irn->op != op_Proj) return;
675 /* Node must be operation Phi with mode memory or a Call node. */
676 if (get_irn_op(node) == op_Phi &&
677 get_irn_mode(node) == mode_M){
678 /* If node is a Phi node,then must have a Call node as successor. */
679 for (i = 0; i < get_irn_n_outs(node); i++)
680 if (get_irn_op(get_irn_out(node, i)) == op_Call) {
688 /* The predecessor of loop_in must not be a loop invariant. */
689 pred = get_Proj_pred(loop_in->irn);
691 value = set_find( l_n, &key, sizeof(key), HASH_PTR(key.irn));
694 node_block = get_nodes_block(node);
696 /* The block of node must also have a (Proj) predecessor from the unrolling loop. */
697 for (i = 0; i < get_Block_n_cfgpreds(node_block); i++) {
698 block_pred = get_Block_cfgpred(node_block, i);
700 if (get_irn_op(block_pred) == op_Proj) {
701 if (get_Proj_pred(block_pred) == pred)
709 if (! block_pred) return;
711 key.irn = block_pred;
712 value = set_find(l_n, &key, sizeof(key), HASH_PTR(key.irn));
717 new_after_loop_block(l_n, node_block, value, unroll_factor);
719 for (i = 0; i < old_preds; ++i)
720 all_in[i] = get_irn_n(node, i); /* The old predecessors. */
723 for (i = 0; i < old_preds; ++i) {
725 value = set_find( l_n, &key, sizeof(key), HASH_PTR(key.irn));
727 for (s = 0; s < (unroll_factor - 1); ++s) {
728 all_in[q] = value->copy[p]; /* The new predecessors. */
733 /* A new phi node with the new predecessors. */
734 new_phi = new_r_Phi(current_ir_graph, get_nodes_block(node), all_new_preds,all_in,
738 exchange(node, new_phi);
740 for (i = 0; i < get_irn_arity(node); ++i)
741 if (get_irn_n(node, i) == pred)
742 set_irn_n(node, i, new_phi);
745 /* The set loop_outs contains the visited nodes and their blocks. */
747 value = set_insert(loop_outs, &key, sizeof(key), HASH_PTR(key.irn));
748 key.irn = get_nodes_block(node);
749 value = set_insert(loop_outs, &key, sizeof(key), HASH_PTR(key.irn));
753 /* Set the outs of the unrolling loop. All loop outs of a node muss now
754 * point to the last copy of it. Just phi nodes in the loop head and proj
755 * nodes save it outs. The all copies of some Projs have too outs.
757 * @param l_n Contains all nodes of the loop.
758 * @param loop_outs The set contains the visited and changed loop outs.
759 * @param loop_endblock_outs The set loop_endblock_outs contains all predecessors
760 * of the end block from the unrolling loop.
761 * @param info Contains the loop information.
762 * @param unroll_factor A integer 2 <= unroll_factor <= 4.
765 set_loop_outs(set *l_n, set *loop_outs, set *loop_endblock_outs, induct_var_info *info, int unroll_factor)
767 copies_t *value, key;
769 ir_node *loop_head = get_loop_node(info->l_itervar_phi, 0);
770 ir_node *end_block = get_irg_end_block(current_ir_graph);
772 for (value = set_first(l_n); value != NULL; value = set_next(l_n)){
773 if(get_irn_node_nr(value->irn) == 35047)
775 if (! is_Phi_in_loop_head(value->irn, loop_head) &&
776 (get_irn_opcode(value->irn) == iro_Proj && value->copy[0] != NULL))
777 for (i = 0; i < get_irn_n_outs(value->irn); ++i) {
778 key.irn = get_irn_out(value->irn, i);
780 /* Search for loop outs. */
781 if (set_find( l_n, &key, sizeof(key), HASH_PTR(key.irn)) == NULL)
782 if ((key.irn->op == op_Block && get_Block_dom_depth(key.irn) >
783 get_Block_dom_depth(loop_head)) ||
784 (key.irn->op != op_Block && get_Block_dom_depth(get_nodes_block(key.irn)) >
785 get_Block_dom_depth(loop_head))) {
787 for (p = 0; p < get_irn_arity(key.irn); p++)
788 if (value->irn == get_irn_n(key.irn, p)) {
789 if (get_irn_op(value->irn) == op_Proj && is_exception_possible(value->irn)) {
790 if (set_find(loop_outs, &key, sizeof(key), HASH_PTR(key.irn)) == NULL) {
791 /* If the loop out is for exceptions in the loop. */
792 if ((get_irn_op(key.irn) == op_Phi && get_irn_mode(key.irn) == mode_M) ||
793 get_irn_op(key.irn) == op_Call)
794 new_after_loop_node(l_n,loop_outs, key.irn, value, unroll_factor);
800 /* Loop outs in the loop head must be not changed.*/
801 if(get_nodes_block(value->irn) != loop_head)
802 set_irn_n(key.irn, p, value->copy[unroll_factor-2]);
807 /* This function searches for loop outs associated with function call in the unrolling loop. */
808 new_end_block (end_block, loop_head, l_n, loop_endblock_outs, unroll_factor);
812 * Unroll the loop body with a factor that must be power of two.
813 * Called from a post walker.
815 * @param n An IR node.
816 * @param env points to a result
818 static void do_loop_unroll(ir_node *n, void *env)
820 int *unroll_done = env;
821 induct_var_info info;
823 set *loop_nodes, *loop_outs, *loop_endblock_outs;
824 ir_node *phi_init, *loop_head;
825 ir_node *backedge_jmp;
826 int l_sons = 0, unroll_factor = 0;
827 tarval *init, *iter_end, *iter_increment,*tarval_null, *tarval_one, *tarval_three, *tarval_two, *diff, *iter_number;
831 info.itervar_phi = n;
833 /* The IR node must be a induction variable. */
834 if (get_irn_op(n) == op_Phi) {
835 if (is_induction_variable (&info) == NULL)
841 /* Brute force limiting of loop body size. */
842 if (get_loop_n_nodes(info.l_itervar_phi) > 2 ) return;
844 /* Only unroll loops that compare against a constant for exiting. */
845 if (info.cmp == NULL) return;
847 /* We only want to unroll innermost loops. */
848 l_sons = get_loop_n_sons(info.l_itervar_phi);
851 /* "do" loops are not implementit gut for this reason
852 at the time we don't unroll them.*/
853 if(info.l_itervar_phi->flags & do_loop)
856 phi_init = get_Phi_pred(info.itervar_phi, info.init_pred_pos);
858 if ((!(info.l_itervar_phi->flags & loop_is_count_loop)) ||
859 (info.l_itervar_phi->flags & loop_is_endless) ||
860 (info.l_itervar_phi->flags & loop_is_dead) ||
861 (info.l_itervar_phi->flags & once))
864 init = info.l_itervar_phi->loop_iter_start;
865 iter_end = info.l_itervar_phi->loop_iter_end;
866 iter_increment = info.l_itervar_phi->loop_iter_increment;
867 tarval_null = get_mode_null(get_tarval_mode(iter_increment));
868 tarval_one = get_mode_one(get_tarval_mode(init));
870 if(tarval_is_double(init) || tarval_is_double(iter_end) || tarval_is_double(iter_increment))
873 if((tarval_is_negative(init) && tarval_is_negative(iter_end)) ||
874 (!tarval_is_negative(init) && !tarval_is_negative(iter_end)) ||
875 (tarval_is_null(init) || tarval_is_null(iter_end)))
876 diff = tarval_abs(tarval_sub(iter_end, init));
878 diff = tarval_add(tarval_abs(iter_end),tarval_abs(init));
880 iter_increment = tarval_abs(iter_increment);
882 if(!(info.l_itervar_phi->flags & do_loop))
883 diff = tarval_add(diff, tarval_one);
884 /* Test for the value of unroll factor. */
885 if (tarval_cmp(tarval_mod(diff,iter_increment), tarval_null) == pn_Cmp_Eq)
886 iter_number = tarval_div(diff, iter_increment);
889 tarval_two = tarval_add(tarval_one, tarval_one);
890 tarval_three = tarval_add(tarval_two,tarval_one);
892 if (tarval_cmp(tarval_mod(iter_number, tarval_three), tarval_null) == pn_Cmp_Eq)
894 else if (tarval_cmp(tarval_mod(iter_number, tarval_two), tarval_null) == pn_Cmp_Eq)
898 if (get_firm_verbosity())
899 printf("\nloop unrolling with factor %d \n", unroll_factor);
901 /* ok, we will do unrolling */
904 /* The unroll factor must be less than 4. */
905 assert(unroll_factor <= MAX_UNROLL);
907 loop_head = (is_natural_loop(&info)) ? get_loop_node(info.l_itervar_phi, 0) : NULL;
909 assert(loop_head != NULL && is_Block(loop_head));
911 /* We assume, the loop head has exactly one backedge. The position of
912 the backedge is in the following variable: */
913 backedge_pos = (is_backedge(loop_head, 0)) ? 0:1;
915 /* A set with the nodes to copy. */
916 loop_nodes = new_set(set_cmp, 8);
917 /* A set with the loop outs for exceptions. */
918 loop_outs = new_set(set_cmp, 8);
920 /* A set containing all predecessors
921 of the end block from the unrolling loop. */
922 loop_endblock_outs = new_set(set_cmp, 8);
923 /* Find all nodes of the unrolling loop. */
924 find_loop_nodes(info.itervar_phi, get_loop_node(info.l_itervar_phi, 0), loop_nodes);
926 /* Decide if the loop head to be copy.*/
927 copy_loop_head = loop_head_nodes(loop_nodes, &info);
929 /* Copy all nodes of the unrolling loop, that muss be copy. */
930 copy_loop_body(loop_nodes, &info, unroll_factor);
932 backedge_jmp = get_Block_cfgpred(loop_head, backedge_pos);
934 /* Set the backedge of the loop head. */
935 for (value = set_first(loop_nodes); value != NULL; value = set_next(loop_nodes)) {
936 if (value->irn == backedge_jmp)
937 set_Block_cfgpred(loop_head, backedge_pos, value->copy[unroll_factor-2]);
939 set_loop_outs(loop_nodes, loop_outs, loop_endblock_outs, &info, unroll_factor);
942 /* Performs loop unrolling for the passed graph. */
943 void optimize_loop_unrolling(ir_graph *irg /* unroll factor, max body size */)
948 if ( !get_opt_loop_unrolling()) return;
950 rem = current_ir_graph;
951 current_ir_graph = irg;
953 /* -- Precompute some information -- */
955 /* Call algorithm that computes the loop information */
956 // construct_backedges(irg);
957 compute_loop_info(irg);
958 /* Call algorithm that computes the backedges */
959 // construct_cf_backedges(irg);
961 /* Call algorithm that computes the dominator trees. */
964 /* Call algorithm that computes the out edges */
965 assure_irg_outs(irg);
967 collect_phiprojs(irg);
969 /* -- Search expressions that can be optimized -- */
970 irg_walk_graph(irg, NULL, do_loop_unroll, &unroll_done);
973 /* unrolling was done, all info is invalid */
974 set_irg_doms_inconsistent(irg);
975 set_irg_outs_inconsistent(irg);
976 set_irg_loopinfo_state(current_ir_graph, loopinfo_cf_inconsistent);
977 set_trouts_inconsistent();
978 set_irg_callee_info_state(irg, irg_callee_info_inconsistent);
981 current_ir_graph = rem;