2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief Loop peeling and unrolling
23 * @author Christian Helmer
38 #include "array_t.h" /* automatic array */
39 #include "beutil.h" /* get_block */
42 //#include "irnode_t.h"
45 DEBUG_ONLY(static firm_dbg_module_t *dbg);
48 * Convenience macro for iterating over every phi node of the given block.
49 * Requires phi list per block.
51 #define for_each_phi(block, phi) \
52 for ( (phi) = get_Block_phis( (block) ); (phi) ; (phi) = get_Phi_next( (phi) ) )
55 static ir_loop *cur_loop;
57 /* The loop walker should be possible to abort if nothing can be done anymore */
58 typedef unsigned irg_walk_func_abortable(ir_node *, void *);
60 /* condition for breaking a copy_walk */
61 typedef unsigned walker_condition(ir_node *);
63 /* stores node and position of a predecessor */
64 typedef struct out_edges {
69 /* access complex values through the nodes links */
70 typedef struct node_info {
73 ir_node *link; /* temporary links for ssa creation */
74 ir_node **ins; /* ins for phi nodes, during rewiring of blocks */
75 struct node_info *freelistnext; /* linked list to free all node_infos */
78 static node_info *link_node_state_list; /* head of the linked list to free all node_infos */
80 static out_edges *cur_loop_outs; /* A walker may start visiting the current loop with these nodes. */
81 static out_edges *cur_head_outs; /* A walker may start visiting the cur head with these nodes. */
83 static ir_node *loop_cf_head = NULL; /* Loop head node */
84 static unsigned loop_cf_head_valid = 1; /* A loop may have one head, otherwise we do not touch it. */
87 static ir_node *loop_inv_head = NULL;
89 static ir_node *loop_peeled_head = NULL;
91 /* Loop analysis informations */
92 typedef struct loop_info_t {
95 unsigned invariant_loads; /* number of load nodes */
96 unsigned stores; /* number of store nodes */
97 unsigned blocks; /* number of blocks in the loop */
98 unsigned opnodes_n; /* nodes that should result in an instruction */
99 unsigned opnodes_head;
102 /* Information about the current loop */
103 static loop_info_t loop_info;
105 /* A walker may start visiting a condition chain with these nodes. */
106 static out_edges *cond_chain_entries;
108 static unsigned head_inversion_node_count;
109 static unsigned head_inversion_node_limit;
110 //static unsigned head_inversion_block_count;
114 * ============= AUXILIARY FUNCTIONS =====================================
118 * Creates object on the heap, and adds it to a linked list to free it later.
120 static node_info *new_node_info(void) {
121 node_info *l = XMALLOCZ(node_info);
122 l->freelistnext = link_node_state_list;
123 link_node_state_list = l;
129 static node_info *get_node_info(ir_node *n)
131 return ((node_info *)get_irn_link(n));
134 /* Allocates a node_info struct for the given node. For use with a walker. */
135 static void alloc_node_info(ir_node *node, void *env)
137 node_info *state = new_node_info();
139 set_irn_link(node, (void *)state);
142 static void free_node_info(void)
145 next = link_node_state_list;
146 while(next->freelistnext) {
147 node_info *cur = next;
148 next = cur->freelistnext;
154 * Use the linked list to reset the reused values of all node_infos
155 * Reset in particular the copy attribute as copy_walk uses it to determine a present copy
157 static void reset_node_infos(void)
160 next = link_node_state_list;
161 while(next->freelistnext) {
162 node_info *cur = next;
163 next = cur->freelistnext;
171 static ir_node *get_copy(ir_node *n)
173 return ((node_info *)get_irn_link(n))->copy;
176 /* Links the node to its copy */
177 static void set_copy(ir_node *n, ir_node *copy)
179 ((node_info *)get_irn_link(n) )->copy = copy;
182 /* Returns 0 if the node or block is not in cur_loop */
183 static unsigned is_in_loop(ir_node *node)
185 return (get_irn_loop(get_block(node)) == cur_loop);
188 /* Returns if the given be is an alien edge. This is the case when the pred is not in the loop. */
189 static unsigned is_alien_edge(ir_node *n, int i)
191 return(!is_in_loop(get_irn_n(n, i)));
194 /* used for walker */
195 static void unmark_block(ir_node *node, void * env)
198 // DB((dbg, LEVEL_1, "UNMARK %ld\n", node->node_nr));
200 set_Block_mark(node, 0);
203 static unsigned is_nodesblock_marked(ir_node* node)
205 return (get_Block_mark(get_block(node)));
209 * Add newpred at position pos to node and also add the corresponding value to the phis.
210 * Requires block phi list.
212 static void duplicate_preds(ir_node* node, unsigned pos, ir_node* newpred)
219 assert(is_Block(node) && "duplicate_preds is only allowed for blocks");
221 DB((dbg, LEVEL_5, "duplicate_preds(node %ld, pos %d, newpred %ld)\n", get_irn_node_nr(node), pos, get_irn_node_nr(newpred)));
223 block_arity = get_irn_arity(node);
225 NEW_ARR_A(ir_node*, ins, block_arity + 1 );
226 for (i = 0; i < block_arity; ++i)
227 ins[i] = get_irn_n(node, i);
228 ins[block_arity] = newpred;
230 set_irn_in(node, block_arity + 1, ins);
232 for_each_phi(node, phi) {
233 int phi_arity = get_irn_arity(phi);
234 DB((dbg, LEVEL_5, "duplicate_preds: fixing phi %ld\n", get_irn_node_nr(phi)));
236 NEW_ARR_A(ir_node *, ins, block_arity + 1);
237 for(i=0; i < phi_arity; ++i) {
238 DB((dbg, LEVEL_5, "in %ld\n", get_irn_node_nr(get_irn_n(phi, i))));
239 ins[i] = get_irn_n(phi, i);
241 ins[block_arity] = get_irn_n(phi, pos);
242 set_irn_in(phi, block_arity + 1, ins);
246 /* Walks through all nodes of cur_loop */
247 static unsigned loop_walker_rec(ir_node *node,
248 irg_walk_func_abortable *pre,
249 irg_walk_func_abortable *post, void * env)
253 ir_graph *irg = get_current_ir_graph();
255 /* RETURN if we walked out of the loop*/
256 if (!is_in_loop(node))
260 unsigned stop = pre(node, env);
265 set_irn_visited(node, get_irg_visited(irg));
267 if (get_irn_op(node) != op_Block) {
268 ir_node *pred = get_irn_n(node, -1);
269 if (get_irn_visited(pred) < get_irg_visited(irg))
271 stop = loop_walker_rec(pred, pre, post, env);
277 for (i = get_irn_arity(node) - 1; i >= 0; --i) {
278 ir_node *pred = get_irn_n(node, i);
279 if (get_irn_visited(pred) < get_irg_visited(irg)) {
280 stop = loop_walker_rec(pred, pre, post, env);
287 return post(node, env);
292 * Walks through loop nodes.
293 * The entries of the loop (all edges pointing into the loop) have to be given.
295 static unsigned loop_walker(out_edges *entries,
296 irg_walk_func_abortable *pre, irg_walk_func_abortable *post, void * env)
301 for (i=0; !stop && i<ARR_LEN(entries); i++) {
306 pred = get_irn_n( entry.node , entry.pred_irn_n);
308 stop = loop_walker_rec( pred, pre, post, env);
314 /* Adds all nodes pointing into the loop to loop_entries and also finds the loops head */
315 static void get_loop_outs_and_info(ir_node *node, void *env)
317 unsigned node_in_loop, pred_in_loop;
321 arity = get_irn_arity(node);
322 for (i = 0; i < arity; i++) {
323 ir_node *pred = get_irn_n(node, i);
325 pred_in_loop = is_in_loop(pred);
326 node_in_loop = is_in_loop(node);
328 /* collect some loop information */
330 if ( !is_Store(node) )
332 if ( !is_Load(node) )
334 if ( !is_Call(node) )
336 if ( !is_Block(node) && !is_Proj(node) && !is_Phi(node) )
337 ++loop_info.opnodes_n;
340 //Find the loops head/the blocks with cfpred outside of the loop
341 if (is_Block(node) && node_in_loop && !pred_in_loop && loop_cf_head_valid) {
342 ir_node *cfgpred = get_Block_cfgpred(node, i);
343 if ( !is_in_loop(cfgpred) ) {
344 //DB((dbg, LEVEL_1, "potential head %+F\n", node));
345 /* another head? We do not touch this. */
346 if (loop_cf_head && loop_cf_head != node) {
347 loop_cf_head_valid = 0;
354 if ( pred_in_loop && !node_in_loop ) {
357 entry.pred_irn_n = i;
358 ARR_APP1(out_edges, cur_loop_outs, entry);
364 * Finds invariant loads and marks them as invariant.
365 * (has to be post walk)
367 static unsigned get_invariants(ir_node *node, void *env)
370 int arity = get_irn_arity(node);
373 /* RETURN, no preds to visit */
374 if (arity == 0) return 0;
377 assert(arity>=2 && "expected load node to have in[1] (address)");
379 ir_node *pred = get_irn_n(node, 1);
380 if ( (get_Load_volatility(node) == volatility_non_volatile) &
384 || get_node_info(node)->invariant ) ) {
385 get_node_info(node)->invariant = 1;
386 ++loop_info.invariant_loads;
389 get_node_info(node)->invariant = 0;
394 /* find loop variant preds */
395 for(i = 0; i < arity; ++i) {
396 ir_node *pred = get_irn_n(node, i);
398 if ( is_in_loop(pred) /* outside loop is loop invariant */
399 && !is_Const(pred) /* constants */
400 && !is_SymConst(pred) /* SymConst */
401 && !get_node_info(node)->invariant ) { /* pred is marked as invariant */
407 get_node_info(node)->invariant = 1;
409 get_node_info(node)->invariant = 0;
416 static ir_node *ssa_second_def;
417 static ir_node *ssa_second_def_block;
420 * Walks the graph bottom up, searching for definitions and create phis.
421 * (Does not handle the special case where the second definition is in the block of the user
422 * of the original definition because it is not necessary here.)
424 static ir_node *search_def_and_create_phis(ir_node *block, ir_mode *mode)
432 DB((dbg, LEVEL_5, "ssa sdacp: block %ld\n", get_irn_node_nr(block)));
434 /* Prevents creation of phi that would be bad anyway.
435 * Dead and bad blocks. */
436 if (get_irn_arity(block) < 1 || is_Bad(block))
439 if (block == ssa_second_def_block) {
440 DB((dbg, LEVEL_5, "ssa found second definition: use second def %ld\n", get_irn_node_nr(ssa_second_def)));
441 return ssa_second_def;
444 /* already processed this block? */
445 if (irn_visited(block)) {
446 ir_node *value = get_node_info(block)->link;
447 DB((dbg, LEVEL_5, "ssa already visited: use linked %ld\n", get_irn_node_nr(value)));
451 irg = get_irn_irg(block);
452 assert(block != get_irg_start_block(irg));
454 /* a Block with only 1 predecessor needs no Phi */
455 n_cfgpreds = get_Block_n_cfgpreds(block);
456 if (n_cfgpreds == 1) {
457 ir_node *pred_block = get_Block_cfgpred_block(block, 0);
460 DB((dbg, LEVEL_5, "ssa 1 pred: walk pred %ld\n", get_irn_node_nr(pred_block)));
462 value = search_def_and_create_phis(pred_block, mode);
463 get_node_info(block)->link = value;
464 mark_irn_visited(block);
469 /* create a new Phi */
470 NEW_ARR_A(ir_node*, in, n_cfgpreds);
471 for(i = 0; i < n_cfgpreds; ++i)
472 in[i] = new_Unknown(mode);
474 phi = new_r_Phi(block, n_cfgpreds, in, mode);
476 /* Important: always keep block phi list up to date. */
477 add_Block_phi(block, phi);
478 /* EVERY node is assumed to have a node_info linked. */
479 alloc_node_info(phi, NULL);
481 DB((dbg, LEVEL_5, "ssa phi creation: link new phi %ld to block %ld\n", get_irn_node_nr(phi), get_irn_node_nr(block)));
483 get_node_info(block)->link = phi;
484 mark_irn_visited(block);
486 /* set Phi predecessors */
487 for(i = 0; i < n_cfgpreds; ++i) {
488 ir_node *pred_block = get_Block_cfgpred_block(block, i);
489 ir_node *pred_val = search_def_and_create_phis(pred_block, mode);
490 DB((dbg, LEVEL_5, "ssa phi pred:phi %ld, pred %ld\n", get_irn_node_nr(phi), get_irn_node_nr(pred_val)));
491 set_irn_n(phi, i, pred_val);
498 * Given a set of values this function constructs SSA-form for the users of the
499 * first value (the users are determined through the out-edges of the value).
500 * Uses the irn_visited flags. Works without using the dominance tree.
502 static void construct_ssa(ir_node *orig_block, ir_node *orig_val,
503 ir_node *second_block, ir_node *second_val)
507 const ir_edge_t *edge;
508 const ir_edge_t *next;
510 assert(orig_block && orig_val && second_block && second_val &&
511 "no parameter of construct_ssa may be NULL");
513 /* no need to do anything */
514 if (orig_val == second_val)
517 irg = get_irn_irg(orig_val);
519 ir_reserve_resources(irg, IR_RESOURCE_IRN_VISITED);
520 inc_irg_visited(irg);
522 mode = get_irn_mode(orig_val);
523 get_node_info(orig_block)->link = orig_val;
524 mark_irn_visited(orig_block);
526 ssa_second_def_block = second_block;
527 ssa_second_def = second_val;
529 /* Only fix the users of the first, i.e. the original node */
530 foreach_out_edge_safe(orig_val, edge, next) {
531 ir_node *user = get_edge_src_irn(edge);
532 int j = get_edge_src_pos(edge);
533 ir_node *user_block = get_nodes_block(user);
540 DB((dbg, LEVEL_5, "original user %ld\n", get_irn_node_nr(user)));
543 ir_node *pred_block = get_Block_cfgpred_block(user_block, j);
544 newval = search_def_and_create_phis(pred_block, mode);
546 newval = search_def_and_create_phis(user_block, mode);
549 /* If we get a bad node the user keeps the original in. No second definition needed. */
550 if (newval != user && !is_Bad(newval))
551 set_irn_n(user, j, newval);
554 ir_free_resources(irg, IR_RESOURCE_IRN_VISITED);
557 /* get the number of backedges without alien bes */
558 static int get_backedge_n(ir_node *loophead, unsigned with_alien)
562 int arity = get_irn_arity(loophead);
563 for (i = 0; i < arity; ++i) {
564 ir_node *pred = get_irn_n(loophead, i);
565 if (is_backedge(loophead, i) && ( with_alien || is_in_loop(pred)) )
572 * Sets the nodes backedges, according to its predecessors link.
574 static void fix_backedge_info(ir_node *node)
577 for (i = 0; i < get_irn_arity(node); ++i)
579 ir_node *pred = get_irn_n(node, i);
580 if (get_node_info(pred)->link != NULL)
581 set_backedge(node, i);
583 set_not_backedge(node, i);
589 * ============= PEELING =====================================
594 * Rewires the heads after peeling.
596 static void peel_fix_heads(void)
598 ir_node **loopheadnins, **peelheadnins;
599 ir_node *loophead = loop_cf_head;
600 ir_node *peelhead = get_copy(loophead);
602 int headarity = get_irn_arity(loophead);
609 int backedges_n = get_backedge_n(loophead, 0);
611 int lhead_arity = 2 * backedges_n;
612 int phead_arity = headarity - backedges_n;
615 NEW_ARR_A(ir_node *, loopheadnins, lhead_arity );
616 NEW_ARR_A(ir_node *, peelheadnins, phead_arity );
618 for_each_phi(loophead, phi) {
619 NEW_ARR_A(ir_node *, get_node_info(phi)->ins, lhead_arity);
621 for_each_phi(peelhead, phi) {
622 NEW_ARR_A(ir_node *, get_node_info(phi)->ins, phead_arity);
625 for (i = 0; i < headarity; i++)
627 ir_node *orgjmp = get_irn_n(loophead, i);
628 ir_node *copyjmp = get_copy(orgjmp);
631 * Rewire the head blocks ins and their phi ins.
632 * Requires phi list per block.
634 if (is_backedge(loophead, i) && !is_alien_edge(loophead, i)) {
635 loopheadnins[lheadin_c] = orgjmp;
636 /* marks out edge as backedge */
637 get_node_info(orgjmp)->link = orgjmp;
638 for_each_phi(loophead, phi) {
639 get_node_info( phi )->ins[lheadin_c] = get_irn_n( phi, i) ;
643 loopheadnins[lheadin_c] = copyjmp; /* former bes of the peeled code origin now from the loophead */
644 /* marks out edge as normal edge */
645 get_node_info(copyjmp)->link = NULL;
646 /* get_irn_n( get_copy_of(phi), i) <!=> get_copy_of(get_irn_n( phi, i))
647 * Order is crucial! Predecessors outside of the loop are non existent.
648 * The copy (cloned with its ins!) has pred i,
649 * but phis pred i might not have a copy of itself.
651 for_each_phi(loophead, phi) {
652 //printf("normalbe phi %ld @ %d -> %ld\n", phi->node_nr, i, get_irn_n( get_copy_of(phi), i)->node_nr);
653 get_node_info( phi )->ins[lheadin_c] = get_irn_n( get_copy(phi), i) ;
657 peelheadnins[pheadin_c] = orgjmp;
658 /* marks out edge as normal edge */
659 get_node_info(orgjmp)->link = NULL;
660 for_each_phi(peelhead, phi) {
661 get_node_info( phi )->ins[pheadin_c] = get_irn_n(phi, i);
668 assert(pheadin_c == ARR_LEN(peelheadnins) &&
669 lheadin_c == ARR_LEN(loopheadnins) &&
670 "the constructed head arities do not match the predefined arities");
673 * assign the ins to the nodes
675 set_irn_in(loophead, ARR_LEN(loopheadnins), loopheadnins);
676 set_irn_in(peelhead, ARR_LEN(peelheadnins), peelheadnins);
678 /* Fixes the backedge information according to the link.
679 * Following loop optimizations might depend on it. */
680 fix_backedge_info(loophead);
681 fix_backedge_info(peelhead);
683 for_each_phi(loophead, phi) {
684 ir_node **ins = get_node_info( phi )->ins;
685 set_irn_in(phi, lhead_arity, ins);
688 for_each_phi(peelhead, phi) {
689 ir_node **ins = get_node_info( phi )->ins;
690 set_irn_in(phi, phead_arity, ins);
695 * Create a raw copy (ins are still the old ones) of the given node.
697 static ir_node *rawcopy_node(ir_node *node)
702 cp = exact_copy(node);
704 cpstate = new_node_info();
705 set_irn_link(cp, cpstate);
706 mark_irn_visited(cp);
712 ///* This walker copies all walked nodes. The walk_condition determines the nodes to walk. */
713 //static void keepalives_walk(ir_node *node, walker_condition *walk_condition)
717 // ir_graph *irg = current_ir_graph;
720 // * break condition and cycle resolver, creating temporary node copies
722 // if (get_irn_visited(node) >= get_irg_visited(irg)) {
727 // mark_irn_visited(node);
729 // if (!is_Block(node)) {
730 // ir_node *pred = get_nodes_block(node);
731 // if (walk_condition(pred))
732 // keepalives_walk( pred, walk_condition );
735 // arity = get_irn_arity(node);
737 // for (i = get_irn_arity(node) - 1; i >= 0; --i) {
738 // ir_node *pred = get_irn_n(node, i);
740 // if (walk_condition(pred))
741 // keepalives_walk( pred, walk_condition );
744 // add_End_keepalive(get_irg_end(current_ir_graph), node);
749 * This walker copies all walked nodes.
750 * If the walk_condition is true for a node, it is walked.
751 * All nodes node_info->copy attributes has to be NULL prior to every to every walk.
753 static void copy_walk(ir_node *node, walker_condition *walk_condition)
759 ir_graph *irg = current_ir_graph;
760 node_info *node_info = get_node_info(node);
763 * break condition and cycle resolver, creating temporary node copies
765 if (get_irn_visited(node) >= get_irg_visited(irg)) {
766 /* Here we rely on nodestate's copy being initialized with NULL */
767 DB((dbg, LEVEL_5, "copy_walk: We have already visited %ld\n", get_irn_node_nr(node)));
768 if (node_info->copy == NULL) {
769 if (!is_Const(node) && !is_SymConst(node)) {
770 cp = rawcopy_node(node);
773 node_info->copy = cp;
775 DB((dbg, LEVEL_5, "The TEMP copy of %ld is created %ld\n", get_irn_node_nr(node), get_irn_node_nr(cp)));
780 // add_End_keepalive(get_irg_end(current_ir_graph), node);
783 mark_irn_visited(node);
785 if (!is_Block(node)) {
786 ir_node *pred = get_nodes_block(node);
787 if (walk_condition(pred))
788 DB((dbg, LEVEL_5, "walk block %ld\n", get_irn_node_nr(pred)));
789 copy_walk( pred, walk_condition );
792 arity = get_irn_arity(node);
794 NEW_ARR_A(ir_node *, cpin, arity);
796 for (i = get_irn_arity(node) - 1; i >= 0; --i) {
797 ir_node *pred = get_irn_n(node, i);
799 if (walk_condition(pred)) {
800 DB((dbg, LEVEL_5, "walk node %ld\n", get_irn_node_nr(pred)));
801 copy_walk( pred, walk_condition );
802 cpin[i] = get_copy(pred);
803 DB((dbg, LEVEL_5, "copy of %ld gets new in %ld which is copy of %ld\n",
804 get_irn_node_nr(node), get_irn_node_nr(get_copy(pred)), get_irn_node_nr(pred)));
810 /* copy node / finalize temp node */
811 if (node_info->copy == NULL) {
812 /* No temporary copy existent */
814 /* Do not copy constants TODO right? */
815 if (!is_Const(node) && !is_SymConst(node)) {
816 cp = rawcopy_node(node);
819 node_info->copy = cp;
821 DB((dbg, LEVEL_5, "The FINAL copy of %ld is CREATED %ld\n", get_irn_node_nr(node), get_irn_node_nr(cp)));
823 /* temporary copy is existent but without correct ins */
825 DB((dbg, LEVEL_5, "The FINAL copy of %ld is EXISTENT %ld\n", get_irn_node_nr(node), get_irn_node_nr(cp)));
828 if (!is_Block(node)) {
829 ir_node *cpblock = get_copy(get_nodes_block(node));
831 set_nodes_block(cp, cpblock );
832 /* fix the phi information in attr.phis */
834 add_Block_phi(cpblock, cp);
836 /* macroblock info has not been copied */
837 set_Block_MacroBlock(cp, cp);
840 set_irn_in(cp, ARR_LEN(cpin), cpin);
843 /* Loop peeling, and fix the cf for the loop entry nodes, which have now more preds */
844 static void peel(out_edges *loop_outs)
847 ir_node **entry_buffer;
850 ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED);
852 NEW_ARR_A(ir_node *, entry_buffer, ARR_LEN(loop_outs));
854 /* duplicate loop walk */
855 // cur_head = loop_cf_head;
856 inc_irg_visited(current_ir_graph);
858 for(i = 0; i < ARR_LEN(loop_outs); i++) {
859 out_edges entry = loop_outs[i];
860 ir_node *node = entry.node;
861 ir_node *pred = get_irn_n(entry.node, entry.pred_irn_n);
863 if (is_Block(node)) {
864 copy_walk( pred, is_in_loop );
865 duplicate_preds(node, entry.pred_irn_n, get_copy(pred) );
867 copy_walk( pred, is_in_loop );
868 if (!is_End(node)) /* leave out keepalives */
869 /* Node is user of a value defined inside the loop.
870 * We'll need a phi since we duplicated the loop. */
871 /* cannot construct_ssa here, because it needs another walker */
872 entry_buffer[entry_c++] = pred;
876 ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED);
878 /* Rewires the 2 heads */
881 /* Generate phis for values from peeled code and original loop */
882 for(i = 0; i < entry_c; i++)
884 ir_node *cppred, *block, *cpblock, *pred;
886 /* It is not possible to use
887 * pred = get_irn_n(entry.node, entry.pred_irn_n);
888 * because we might have changed the nodes predecessors in construct_ssa
890 pred = entry_buffer[i];
891 cppred = get_copy(pred);
892 block = get_nodes_block(pred);
893 cpblock = get_nodes_block(cppred);
894 construct_ssa(block, pred, cpblock, cppred);
899 * Populates head_entries with (node, pred_pos) tuple
900 * whereas the node's pred at pred_pos is in the head but not the node itself.
901 * Head and condition chain blocks must be marked.
903 static void get_head_entries(ir_node *node, void *env)
906 int arity = get_irn_arity(node);
909 for(i = 0; i < arity; ++i) {
910 /* node is not in the head, but the predecessor is.
911 * (head or loop chain nodes are marked) */
912 if (!is_nodesblock_marked(node) && is_nodesblock_marked(get_irn_n(node, i))) {
915 entry.pred_irn_n = i;
917 "Found head chain entry %ld @%d because !inloop %ld and inloop %ld\n",
918 node->node_nr, i, node->node_nr, get_irn_n(node, i)->node_nr));
919 ARR_APP1(out_edges, cur_head_outs, entry);
925 * Find condition chains, and add them to be inverted, until the node count exceeds the limit.
926 * A block belongs to the chain if a condition branches out of the loop.
927 * Returns if the given block belongs to the condition chain.
928 * FIXME prevent collecting ALL loop blocks (may happen if all blocks jump out of the loop)
930 static unsigned condition_chains(ir_node *block) {
931 const ir_edge_t *edge;
933 //unsigned over_limit = 0;
937 /* we need all outs, including keeps (TODO firm function for that??) */
938 foreach_out_edge_kind(block, edge, EDGE_KIND_NORMAL) {
942 /* We do not want to collect more nodes from condition chains, than the limit allows us to. */
943 if (head_inversion_node_count + nodes_n > head_inversion_node_limit) {
945 set_Block_mark(block, 0);
946 // printf(" %ld over limit\n", block->node_nr);
952 /* First: check our successors, and add all succs that are outside of the loop to the list */
953 foreach_block_succ(block, edge) {
954 ir_node *src = get_edge_src_irn( edge );
955 int pos = get_edge_src_pos( edge );
957 if (!is_in_loop(src)) {
958 //printf(" src %ld @ %d into block %ld \n", src->node_nr, pos, block->node_nr);
962 entry.pred_irn_n = pos;
963 ARR_APP1(out_edges, cond_chain_entries, entry);
964 mark_irn_visited(src);
968 /* this block is not part of the chain,
969 * because the chain would become too big or we have no succ outside of the loop */
971 set_Block_mark(block, 0);
974 set_Block_mark(block, 1);
975 DB((dbg, LEVEL_5, "block %ld is part of condition chain\n", get_irn_node_nr(block)));
976 head_inversion_node_count += nodes_n;
979 /* Second: walk all successors, and add them to the list if they are not part of the chain */
980 foreach_block_succ(block, edge) {
982 ir_node *src = get_edge_src_irn( edge );
983 int pos = get_edge_src_pos( edge );
985 /* already done cases */
986 if (!is_in_loop( src ) || (get_irn_visited(src) >= get_irg_visited(current_ir_graph)))
989 mark_irn_visited(src);
991 inchain = condition_chains( src );
993 /* if successor is not part of chain we need to collect its outs */
997 entry.pred_irn_n = pos;
998 ARR_APP1(out_edges, cond_chain_entries, entry);
1007 static void inversion_fix_heads(void)
1009 ir_node **loopheadnins, **invheadnins;
1010 ir_node *loophead = loop_cf_head;
1011 ir_node *invhead = get_copy(loophead);
1013 int headarity = get_irn_arity(loophead);
1020 int backedges_n = get_backedge_n(loophead, 0);
1021 int lhead_arity = headarity - backedges_n;
1022 int ihead_arity = backedges_n;
1024 /* new in arrays for all phis in the head blocks */
1025 NEW_ARR_A(ir_node *, loopheadnins, lhead_arity);
1026 NEW_ARR_A(ir_node *, invheadnins, ihead_arity);
1028 for_each_phi(loophead, phi) {
1029 NEW_ARR_A(ir_node *, get_node_info(phi)->ins, lhead_arity);
1031 for_each_phi(invhead, phi) {
1032 NEW_ARR_A(ir_node *, get_node_info(phi)->ins, ihead_arity);
1035 for (i = 0; i < headarity; i++) {
1036 ir_node *pred = get_irn_n(loophead, i);
1039 * Rewire the head blocks ins and their phi ins.
1040 * Requires phi list per block.
1042 if ( is_backedge(loophead, i) ) {
1043 invheadnins[iheadin_c] = pred;
1044 for_each_phi(invhead, phi) {
1045 get_node_info( phi )->ins[iheadin_c] = get_irn_n( phi, i) ;
1049 /* just copy these edges */
1050 loopheadnins[lheadin_c] = pred;
1051 for_each_phi(loophead, phi) {
1052 get_node_info( phi )->ins[lheadin_c] = get_irn_n(phi, i);
1058 /* assign the ins to the head blocks */
1059 set_irn_in(loophead, ARR_LEN(loopheadnins), loopheadnins);
1060 set_irn_in(invhead, ARR_LEN(invheadnins), invheadnins);
1062 /* assign the ins for the phis */
1063 for_each_phi(loophead, phi) {
1064 ir_node **ins = get_node_info(phi)->ins;
1065 set_irn_in(phi, lhead_arity, ins);
1068 for_each_phi(invhead, phi) {
1069 ir_node **ins = get_node_info(phi)->ins;
1070 set_irn_in(phi, ihead_arity, ins);
1075 static void loop_inversion_walk(out_edges *head_entries)
1080 ir_node **entry_buffer;
1081 ir_node **head_phi_assign;
1083 NEW_ARR_A(ir_node *, entry_buffer, ARR_LEN(head_entries));
1085 head_phi_assign = NEW_ARR_F(ir_node *, 0);
1087 /* Find assignments in the condition chain, to construct_ssa for them after the loop inversion. */
1088 for_each_phi( loop_cf_head , phi) {
1089 for(i=0; i<get_irn_arity(phi); ++i) {
1090 ir_node *def = get_irn_n(phi, i);
1091 if ( is_nodesblock_marked(def) ) {
1092 ARR_APP1(ir_node *, head_phi_assign, def);
1097 ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED);
1099 /* duplicate condition chain */
1100 inc_irg_visited(current_ir_graph);
1102 for(i = 0; i < ARR_LEN(head_entries); ++i) {
1103 out_edges entry = head_entries[i];
1104 ir_node *node = entry.node;
1105 ir_node *pred = get_irn_n(entry.node, entry.pred_irn_n);
1107 // add_End_keepalive(get_irg_end(current_ir_graph), pred);
1109 if (is_Block(node)) {
1110 DB((dbg, LEVEL_5, "\nINIT walk block %ld\n", get_irn_node_nr(pred)));
1111 copy_walk(pred, is_nodesblock_marked);
1112 duplicate_preds(node, entry.pred_irn_n, get_copy(pred) );
1114 DB((dbg, LEVEL_5, "\nInit walk node %ld\n", get_irn_node_nr(pred)));
1115 copy_walk( pred, is_nodesblock_marked );
1117 /* ignore keepalives */
1119 /* Node is user of a value assigned inside the loop.
1120 * We'll need a phi since we duplicated the head. */
1121 entry_buffer[entry_c++] = pred;
1125 ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED);
1127 inversion_fix_heads();
1129 DEBUG_ONLY(dump_ir_block_graph(current_ir_graph, "-headsfixed"));
1131 /* Generate phis for users of values assigned in the condition chain and read in the loops body */
1132 for(i = 0; i < entry_c; i++) {
1133 ir_node *cppred, *block, *cpblock, *pred;
1135 /* It is not possible to use
1136 * pred = get_irn_n(entry.node, entry.pred_irn_n);
1137 * because we might have changed the nodes predecessors in construct_ssa
1139 pred = entry_buffer[i];
1140 cppred = get_copy(pred);
1141 block = get_nodes_block(pred);
1142 cpblock = get_nodes_block(cppred);
1144 "construct_ssa (loop out value) original %ld and clone %ld\n",
1145 get_irn_node_nr(pred), get_irn_node_nr(cppred)));
1146 construct_ssa(block, pred, cpblock, cppred);
1150 // char *s = "-SSA_";
1151 // char *n = strdup(" ");
1152 // n[0] = 'a' + (char)i;
1155 // dump_ir_block_graph(current_ir_graph, res );
1158 /* Generate phis for values that are assigned in the condition chain
1159 * but not read in the loops body.
1161 for(i = 0; i < ARR_LEN(head_phi_assign); ++i) {
1162 ir_node *def_block, *inhead_phi_def, *inv_def_block, *inv_inhead_phi_def;
1163 /* Note: construct_ssa only fixes the FIRST nodes usage. */
1164 inhead_phi_def = head_phi_assign[i];
1165 inv_inhead_phi_def = get_copy(inhead_phi_def);
1166 def_block = get_nodes_block(inhead_phi_def);
1167 inv_def_block = get_nodes_block(inv_inhead_phi_def);
1169 "construct_ssa (condition chain out values) original %ld and clone %ld\n",
1170 get_irn_node_nr(inv_inhead_phi_def), get_irn_node_nr(inhead_phi_def)));
1171 construct_ssa(inv_def_block, inv_inhead_phi_def, def_block, inhead_phi_def);
1173 loop_cf_head = get_copy(loop_cf_head);
1177 * Decide if loop inversion, peeling or unrolling should be performed.
1178 * Inversion creates abnormal looking loops. Be careful with optimizations after that.
1180 static void decision_maker(void)
1182 unsigned do_peel = 0;
1183 unsigned do_inversion = 1;
1185 unsigned max_loop_opnodes = 200000;
1187 head_inversion_node_limit = 99910;
1189 cur_loop_outs = NEW_ARR_F(out_edges, 0);
1191 /* Find loop entries walk, find head */
1192 inc_irg_visited( current_ir_graph );
1193 irg_walk_graph( current_ir_graph, get_loop_outs_and_info, NULL, NULL );
1195 /* RETURN if there is no valid head */
1196 if (!loop_cf_head || !loop_cf_head_valid) {
1197 DB((dbg, LEVEL_1, "No valid loop head. Nothing done.\n"));
1201 /* RETURN if there is a call in the loop */
1202 if (loop_info.calls)
1205 /* Loop complexity too high */
1206 if (loop_info.opnodes_n > max_loop_opnodes)
1209 // foreach_out_edge(loop_cf_head, edge) {
1210 // ir_node *node = get_edge_src_irn(edge);
1211 // if ( !is_Block(node) && !is_Proj(node) && !is_Phi(node) )
1212 // ++loop_info.opnodes_head;
1215 inc_irg_visited(current_ir_graph);
1216 loop_walker( loop_outs, NULL, get_invariants, NULL );
1218 /* This could be improved with knowledge about variable range. */
1219 if (loop_info.stores == 0 && loop_info.invariant_loads > 0)
1230 peel(cur_loop_outs);
1234 DEBUG_ONLY(dump_ir_block_graph(current_ir_graph, "-peeled1"));
1236 DEL_ARR_F(cur_loop_outs);
1238 /* Loop inversion */
1239 /* Search for condition chains. We may not do this before peeling, as peeling changes things. */
1240 ir_reserve_resources(current_ir_graph, IR_RESOURCE_BLOCK_MARK);
1241 irg_walk_graph(current_ir_graph, unmark_block, NULL, NULL);
1243 cond_chain_entries = NEW_ARR_F(out_edges, 0);
1244 head_inversion_node_count = 0;
1245 inc_irg_visited(current_ir_graph);
1246 set_Block_mark(loop_cf_head, 1);
1247 mark_irn_visited(loop_cf_head);
1248 /* find condition chains */
1249 condition_chains(loop_cf_head);
1251 // TODO assume number of phis to be created. prevent inversion...
1253 /* Loop inversion */
1254 if (loop_info.blocks < 2) {
1256 DB((dbg, LEVEL_2, "Loop contains %d (less than 2) blocks => No Inversion done.\n", loop_info.blocks));
1260 cur_head_outs = NEW_ARR_F(out_edges, 0);
1262 /* get all edges pointing into the head or condition chain */
1263 irg_walk_graph(current_ir_graph, get_head_entries, NULL, NULL);
1265 loop_inversion_walk(cur_head_outs);
1267 DEL_ARR_F(cur_head_outs);
1270 DEBUG_ONLY(dump_ir_block_graph(current_ir_graph, "-inversed2"));
1273 DEL_ARR_F(cond_chain_entries);
1274 ir_free_resources(current_ir_graph, IR_RESOURCE_BLOCK_MARK);
1278 static void analyze_loop(ir_loop *loop)
1280 /* Init new for every loop */
1283 loop_cf_head = NULL;
1284 loop_cf_head_valid = 1;
1285 loop_inv_head = NULL;
1286 loop_peeled_head = NULL;
1288 loop_info.calls = 0;
1289 loop_info.invariant_loads = 0;
1290 loop_info.loads = 0;
1291 loop_info.stores = 0;
1292 loop_info.opnodes_n = 0;
1293 loop_info.blocks = 0;
1295 DB((dbg, LEVEL_1, " >>>> current loop includes node %ld <<<\n", get_irn_node_nr(get_loop_node(loop, 0))));
1299 DB((dbg, LEVEL_1, " <<<< end of loop with node %ld >>>>\n", get_irn_node_nr(get_loop_node(loop, 0))));
1302 /* Find most inner loops and send them to analyze_loop */
1303 static void analyze_inner_loop(ir_loop *loop)
1305 /* descend into sons */
1306 int sons = get_loop_n_sons(loop);
1312 for(s=0; s<sons; s++) {
1313 analyze_inner_loop( get_loop_son(loop, s) );
1321 void loop_optimization(ir_graph *irg)
1326 DB((dbg, LEVEL_1, " >>> loop optimization (Startnode %ld) <<<\n", get_irn_node_nr(get_irg_start(irg))));
1329 link_node_state_list = NULL;
1332 ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1333 collect_phiprojs(irg);
1334 ir_free_resources(irg, IR_RESOURCE_IRN_LINK);
1336 set_current_ir_graph(irg);
1337 assure_cf_loop(irg);
1339 /* allocate node_info for additional information on nodes */
1340 ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK);
1341 irg_walk_graph(current_ir_graph, alloc_node_info, NULL, NULL);
1343 loop = get_irg_loop(irg);
1344 sons = get_loop_n_sons(loop);
1346 for (nr=0; nr<sons; nr++) {
1347 analyze_inner_loop(get_loop_son(loop, nr));
1352 ir_free_resources(irg, IR_RESOURCE_PHI_LIST|IR_RESOURCE_IRN_LINK);
1354 DB((dbg, LEVEL_1, " >>> loop optimization done (Startnode %ld)<<<\n", get_irn_node_nr(get_irg_start(irg))));
1357 void firm_init_loop(void) {
1358 FIRM_DBG_REGISTER(dbg, "firm.opt.loop");