2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief Loop peeling and unrolling
23 * @author Christian Helmer
38 #include "array_t.h" /* automatic array */
39 #include "beutil.h" /* get_block */
40 #include "irloop_t.h" /* set_irn_loop */
43 //#include "irnode_t.h"
46 DEBUG_ONLY(static firm_dbg_module_t *dbg);
49 * Convenience macro for iterating over every phi node of the given block.
50 * Requires phi list per block.
52 #define for_each_phi(block, phi) \
53 for ( (phi) = get_Block_phis( (block) ); (phi) ; (phi) = get_Phi_next( (phi) ) )
56 static ir_loop *cur_loop;
58 /* The loop walker should be possible to abort if nothing can be done anymore */
59 typedef unsigned irg_walk_func_abortable(ir_node *, void *);
61 /* condition for breaking a copy_walk */
62 typedef unsigned walker_condition(ir_node *);
64 /* stores node and position of a predecessor */
65 typedef struct out_edges {
70 /* access complex values through the nodes links */
71 typedef struct node_info {
74 ir_node *link; /* temporary links for ssa creation */
75 ir_node **ins; /* ins for phi nodes, during rewiring of blocks */
76 struct node_info *freelistnext; /* linked list to free all node_infos */
79 static node_info *link_node_state_list; /* head of the linked list to free all node_infos */
81 static out_edges *cur_loop_outs; /* A walker may start visiting the current loop with these nodes. */
82 static out_edges *cur_head_outs; /* A walker may start visiting the cur head with these nodes. */
84 static ir_node *loop_cf_head = NULL; /* Loop head node */
85 static unsigned loop_cf_head_valid = 1; /* A loop may have one head, otherwise we do not touch it. */
88 static ir_node *loop_inv_head = NULL;
90 static ir_node *loop_peeled_head = NULL;
92 /* Loop analysis informations */
93 typedef struct loop_info_t {
96 unsigned invariant_loads; /* number of load nodes */
97 unsigned stores; /* number of store nodes */
98 unsigned blocks; /* number of blocks in the loop */
99 unsigned opnodes_n; /* nodes that should result in an instruction */
100 unsigned opnodes_head;
103 /* Information about the current loop */
104 static loop_info_t loop_info;
106 /* A walker may start visiting a condition chain with these nodes. */
107 static out_edges *cond_chain_entries;
109 static unsigned head_inversion_node_count;
110 static unsigned head_inversion_node_limit;
111 static unsigned head_inversion_block_count;
115 * ============= AUXILIARY FUNCTIONS =====================================
119 * Creates object on the heap, and adds it to a linked list to free it later.
121 static node_info *new_node_info(void) {
122 node_info *l = XMALLOCZ(node_info);
123 l->freelistnext = link_node_state_list;
124 link_node_state_list = l;
130 static node_info *get_node_info(ir_node *n)
132 return ((node_info *)get_irn_link(n));
135 /* Allocates a node_info struct for the given node. For use with a walker. */
136 static void alloc_node_info(ir_node *node, void *env)
138 node_info *state = new_node_info();
140 set_irn_link(node, (void *)state);
143 static void free_node_info(void)
146 next = link_node_state_list;
147 while(next->freelistnext) {
148 node_info *cur = next;
149 next = cur->freelistnext;
155 * Use the linked list to reset the reused values of all node_infos
156 * Reset in particular the copy attribute as copy_walk uses it to determine a present copy
158 static void reset_node_infos(void)
161 next = link_node_state_list;
162 while(next->freelistnext) {
163 node_info *cur = next;
164 next = cur->freelistnext;
172 static ir_node *get_copy(ir_node *n)
174 return ((node_info *)get_irn_link(n))->copy;
177 /* Links the node to its copy */
178 static void set_copy(ir_node *n, ir_node *copy)
180 ((node_info *)get_irn_link(n) )->copy = copy;
183 /* Returns 0 if the node or block is not in cur_loop */
184 static unsigned is_in_loop(ir_node *node)
186 return (get_irn_loop(get_block(node)) == cur_loop);
189 /* Returns if the given be is an alien edge. This is the case when the pred is not in the loop. */
190 static unsigned is_alien_edge(ir_node *n, int i)
192 return(!is_in_loop(get_irn_n(n, i)));
195 /* used for walker */
196 static void unmark_block(ir_node *node, void * env)
199 DB((dbg, LEVEL_4, "UNMARK ..."));
200 DB((dbg, LEVEL_4, " UNMARK %ld\n", get_irn_node_nr(node)));
202 set_Block_mark(node, 0);
205 static unsigned is_nodesblock_marked(ir_node* node)
207 return (get_Block_mark(get_block(node)));
210 /* Returns the number of blocks in a loop. */
211 int get_loop_n_blocks(ir_loop *loop) {
214 elements = get_loop_n_elements(loop);
216 for(e=0; e<elements; e++) {
217 loop_element elem = get_loop_element(loop, e);
218 if (is_ir_node(elem.kind) && is_Block(elem.node) )
225 * Add newpred at position pos to node and also add the corresponding value to the phis.
226 * Requires block phi list.
228 static void duplicate_preds(ir_node* node, unsigned pos, ir_node* newpred)
235 assert(is_Block(node) && "duplicate_preds is only allowed for blocks");
237 DB((dbg, LEVEL_4, "duplicate_preds(node %ld, pos %d, newpred %ld)\n", get_irn_node_nr(node), pos, get_irn_node_nr(newpred)));
239 block_arity = get_irn_arity(node);
241 NEW_ARR_A(ir_node*, ins, block_arity + 1 );
242 for (i = 0; i < block_arity; ++i)
243 ins[i] = get_irn_n(node, i);
244 ins[block_arity] = newpred;
246 set_irn_in(node, block_arity + 1, ins);
248 for_each_phi(node, phi) {
249 int phi_arity = get_irn_arity(phi);
250 DB((dbg, LEVEL_4, "duplicate_preds: fixing phi %ld\n", get_irn_node_nr(phi)));
252 NEW_ARR_A(ir_node *, ins, block_arity + 1);
253 for(i=0; i < phi_arity; ++i) {
254 DB((dbg, LEVEL_4, "in %ld\n", get_irn_node_nr(get_irn_n(phi, i))));
255 ins[i] = get_irn_n(phi, i);
257 ins[block_arity] = get_irn_n(phi, pos);
258 set_irn_in(phi, block_arity + 1, ins);
262 /* Adds all nodes pointing into the loop to loop_entries and also finds the loops head */
263 static void get_loop_outs_and_info(ir_node *node, void *env)
265 unsigned node_in_loop, pred_in_loop;
269 arity = get_irn_arity(node);
270 for (i = 0; i < arity; i++) {
271 ir_node *pred = get_irn_n(node, i);
273 pred_in_loop = is_in_loop(pred);
274 node_in_loop = is_in_loop(node);
276 /* collect some loop information */
278 if ( !is_Store(node) )
280 if ( !is_Load(node) )
282 if ( !is_Call(node) )
284 if ( !is_Block(node) && !is_Proj(node) && !is_Phi(node) )
285 ++loop_info.opnodes_n;
288 //Find the loops head/the blocks with cfpred outside of the loop
289 if (is_Block(node) && node_in_loop && !pred_in_loop && loop_cf_head_valid) {
290 ir_node *cfgpred = get_Block_cfgpred(node, i);
291 if ( !is_in_loop(cfgpred) ) {
292 //DB((dbg, LEVEL_1, "potential head %+F\n", node));
293 /* another head? We do not touch this. */
294 if (loop_cf_head && loop_cf_head != node) {
295 loop_cf_head_valid = 0;
302 if ( pred_in_loop && !node_in_loop ) {
305 entry.pred_irn_n = i;
306 ARR_APP1(out_edges, cur_loop_outs, entry);
312 * Finds invariant loads and marks them as invariant.
313 * (has to be post walk)
315 static unsigned get_invariants(ir_node *node, void *env)
318 int arity = get_irn_arity(node);
321 /* RETURN, no preds to visit */
322 if (arity == 0) return 0;
325 ir_node *pred = get_Load_ptr(node);
326 if ( (get_Load_volatility(node) == volatility_non_volatile) &
330 || get_node_info(node)->invariant ) ) {
331 get_node_info(node)->invariant = 1;
332 ++loop_info.invariant_loads;
335 get_node_info(node)->invariant = 0;
340 /* find loop variant preds */
341 for(i = 0; i < arity; ++i) {
342 ir_node *pred = get_irn_n(node, i);
344 if ( is_in_loop(pred) /* outside loop is loop invariant */
345 && !is_Const(pred) /* constants */
346 && !is_SymConst(pred) /* SymConst */
347 && !get_node_info(node)->invariant ) { /* pred is marked as invariant */
353 get_node_info(node)->invariant = 1;
355 get_node_info(node)->invariant = 0;
362 static ir_node *ssa_second_def;
363 static ir_node *ssa_second_def_block;
366 * Walks the graph bottom up, searching for definitions and create phis.
367 * (Does not handle the special case where the second definition is in the block of the user
368 * of the original definition because it is not necessary here.)
370 static ir_node *search_def_and_create_phis(ir_node *block, ir_mode *mode)
378 DB((dbg, LEVEL_4, "ssa sdacp: block %ld\n", get_irn_node_nr(block)));
380 /* Prevents creation of phi that would be bad anyway.
381 * Dead and bad blocks. */
382 if (get_irn_arity(block) < 1 || is_Bad(block))
385 if (block == ssa_second_def_block) {
386 DB((dbg, LEVEL_4, "ssa found second definition: use second def %ld\n", get_irn_node_nr(ssa_second_def)));
387 return ssa_second_def;
390 /* already processed this block? */
391 if (irn_visited(block)) {
392 ir_node *value = get_node_info(block)->link;
393 DB((dbg, LEVEL_4, "ssa already visited: use linked %ld\n", get_irn_node_nr(value)));
397 irg = get_irn_irg(block);
398 assert(block != get_irg_start_block(irg));
400 /* a Block with only 1 predecessor needs no Phi */
401 n_cfgpreds = get_Block_n_cfgpreds(block);
402 if (n_cfgpreds == 1) {
403 ir_node *pred_block = get_Block_cfgpred_block(block, 0);
406 DB((dbg, LEVEL_4, "ssa 1 pred: walk pred %ld\n", get_irn_node_nr(pred_block)));
408 value = search_def_and_create_phis(pred_block, mode);
409 get_node_info(block)->link = value;
410 mark_irn_visited(block);
415 /* create a new Phi */
416 NEW_ARR_A(ir_node*, in, n_cfgpreds);
417 for(i = 0; i < n_cfgpreds; ++i)
418 in[i] = new_Unknown(mode);
420 phi = new_r_Phi(block, n_cfgpreds, in, mode);
422 /* Important: always keep block phi list up to date. */
423 add_Block_phi(block, phi);
424 /* EVERY node is assumed to have a node_info linked. */
425 alloc_node_info(phi, NULL);
427 DB((dbg, LEVEL_4, "ssa phi creation: link new phi %ld to block %ld\n", get_irn_node_nr(phi), get_irn_node_nr(block)));
429 get_node_info(block)->link = phi;
430 mark_irn_visited(block);
432 /* set Phi predecessors */
433 for(i = 0; i < n_cfgpreds; ++i) {
434 ir_node *pred_block = get_Block_cfgpred_block(block, i);
435 ir_node *pred_val = search_def_and_create_phis(pred_block, mode);
436 DB((dbg, LEVEL_4, "ssa phi pred:phi %ld, pred %ld\n", get_irn_node_nr(phi), get_irn_node_nr(pred_val)));
437 set_irn_n(phi, i, pred_val);
444 * Given a set of values this function constructs SSA-form for the users of the
445 * first value (the users are determined through the out-edges of the value).
446 * Uses the irn_visited flags. Works without using the dominance tree.
448 static void construct_ssa(ir_node *orig_block, ir_node *orig_val,
449 ir_node *second_block, ir_node *second_val)
453 const ir_edge_t *edge;
454 const ir_edge_t *next;
456 assert(orig_block && orig_val && second_block && second_val &&
457 "no parameter of construct_ssa may be NULL");
459 /* no need to do anything */
460 if (orig_val == second_val)
463 irg = get_irn_irg(orig_val);
465 ir_reserve_resources(irg, IR_RESOURCE_IRN_VISITED);
466 inc_irg_visited(irg);
468 mode = get_irn_mode(orig_val);
469 get_node_info(orig_block)->link = orig_val;
470 mark_irn_visited(orig_block);
472 ssa_second_def_block = second_block;
473 ssa_second_def = second_val;
475 /* Only fix the users of the first, i.e. the original node */
476 foreach_out_edge_safe(orig_val, edge, next) {
477 ir_node *user = get_edge_src_irn(edge);
478 int j = get_edge_src_pos(edge);
479 ir_node *user_block = get_nodes_block(user);
486 DB((dbg, LEVEL_4, "original user %ld\n", get_irn_node_nr(user)));
489 ir_node *pred_block = get_Block_cfgpred_block(user_block, j);
490 newval = search_def_and_create_phis(pred_block, mode);
492 newval = search_def_and_create_phis(user_block, mode);
495 /* If we get a bad node the user keeps the original in. No second definition needed. */
496 if (newval != user && !is_Bad(newval))
497 set_irn_n(user, j, newval);
500 ir_free_resources(irg, IR_RESOURCE_IRN_VISITED);
503 /* get the number of backedges without alien bes */
504 static int get_backedge_n(ir_node *loophead, unsigned with_alien)
508 int arity = get_irn_arity(loophead);
509 for (i = 0; i < arity; ++i) {
510 ir_node *pred = get_irn_n(loophead, i);
511 if (is_backedge(loophead, i) && ( with_alien || is_in_loop(pred)) )
518 * Sets the nodes backedges, according to its predecessors link.
520 static void fix_backedge_info(ir_node *node)
523 for (i = 0; i < get_irn_arity(node); ++i)
525 ir_node *pred = get_irn_n(node, i);
526 if (get_node_info(pred)->link != NULL)
527 set_backedge(node, i);
529 set_not_backedge(node, i);
535 * ============= PEELING =====================================
540 * Rewires the heads after peeling.
542 static void peel_fix_heads(void)
544 ir_node **loopheadnins, **peelheadnins;
545 ir_node *loophead = loop_cf_head;
546 ir_node *peelhead = get_copy(loophead);
548 int headarity = get_irn_arity(loophead);
555 int backedges_n = get_backedge_n(loophead, 0);
557 int lhead_arity = 2 * backedges_n;
558 int phead_arity = headarity - backedges_n;
561 NEW_ARR_A(ir_node *, loopheadnins, lhead_arity );
562 NEW_ARR_A(ir_node *, peelheadnins, phead_arity );
564 for_each_phi(loophead, phi) {
565 NEW_ARR_A(ir_node *, get_node_info(phi)->ins, lhead_arity);
567 for_each_phi(peelhead, phi) {
568 NEW_ARR_A(ir_node *, get_node_info(phi)->ins, phead_arity);
571 for (i = 0; i < headarity; i++)
573 ir_node *orgjmp = get_irn_n(loophead, i);
574 ir_node *copyjmp = get_copy(orgjmp);
577 * Rewire the head blocks ins and their phi ins.
578 * Requires phi list per block.
580 if (is_backedge(loophead, i) && !is_alien_edge(loophead, i)) {
581 loopheadnins[lheadin_c] = orgjmp;
582 /* marks out edge as backedge */
583 get_node_info(orgjmp)->link = orgjmp;
584 for_each_phi(loophead, phi) {
585 get_node_info( phi )->ins[lheadin_c] = get_irn_n( phi, i) ;
589 loopheadnins[lheadin_c] = copyjmp; /* former bes of the peeled code origin now from the loophead */
590 /* marks out edge as normal edge */
591 get_node_info(copyjmp)->link = NULL;
592 /* get_irn_n( get_copy_of(phi), i) <!=> get_copy_of(get_irn_n( phi, i))
593 * Order is crucial! Predecessors outside of the loop are non existent.
594 * The copy (cloned with its ins!) has pred i,
595 * but phis pred i might not have a copy of itself.
597 for_each_phi(loophead, phi) {
598 //printf("normalbe phi %ld @ %d -> %ld\n", phi->node_nr, i, get_irn_n( get_copy_of(phi), i)->node_nr);
599 get_node_info( phi )->ins[lheadin_c] = get_irn_n( get_copy(phi), i) ;
603 peelheadnins[pheadin_c] = orgjmp;
604 /* marks out edge as normal edge */
605 get_node_info(orgjmp)->link = NULL;
606 for_each_phi(peelhead, phi) {
607 get_node_info( phi )->ins[pheadin_c] = get_irn_n(phi, i);
614 assert(pheadin_c == ARR_LEN(peelheadnins) &&
615 lheadin_c == ARR_LEN(loopheadnins) &&
616 "the constructed head arities do not match the predefined arities");
619 * assign the ins to the nodes
621 set_irn_in(loophead, ARR_LEN(loopheadnins), loopheadnins);
622 set_irn_in(peelhead, ARR_LEN(peelheadnins), peelheadnins);
624 /* Fixes the backedge information according to the link.
625 * Following loop optimizations might depend on it. */
626 fix_backedge_info(loophead);
627 fix_backedge_info(peelhead);
629 for_each_phi(loophead, phi) {
630 ir_node **ins = get_node_info( phi )->ins;
631 set_irn_in(phi, lhead_arity, ins);
634 for_each_phi(peelhead, phi) {
635 ir_node **ins = get_node_info( phi )->ins;
636 set_irn_in(phi, phead_arity, ins);
641 * Create a raw copy (ins are still the old ones) of the given node.
643 static ir_node *rawcopy_node(ir_node *node)
648 cp = exact_copy(node);
650 cpstate = new_node_info();
651 set_irn_link(cp, cpstate);
652 mark_irn_visited(cp);
658 ///* This walker copies all walked nodes. The walk_condition determines the nodes to walk. */
659 //static void keepalives_walk(ir_node *node, walker_condition *walk_condition)
663 // ir_graph *irg = current_ir_graph;
666 // * break condition and cycle resolver, creating temporary node copies
668 // if (get_irn_visited(node) >= get_irg_visited(irg)) {
673 // mark_irn_visited(node);
675 // if (!is_Block(node)) {
676 // ir_node *pred = get_nodes_block(node);
677 // if (walk_condition(pred))
678 // keepalives_walk( pred, walk_condition );
681 // arity = get_irn_arity(node);
683 // for (i = get_irn_arity(node) - 1; i >= 0; --i) {
684 // ir_node *pred = get_irn_n(node, i);
686 // if (walk_condition(pred))
687 // keepalives_walk( pred, walk_condition );
690 // add_End_keepalive(get_irg_end(current_ir_graph), node);
695 * This walker copies all walked nodes.
696 * If the walk_condition is true for a node, it is walked.
697 * All nodes node_info->copy attributes has to be NULL prior to every to every walk.
699 static void copy_walk(ir_node *node, walker_condition *walk_condition)
705 ir_graph *irg = current_ir_graph;
706 node_info *node_info = get_node_info(node);
709 * break condition and cycle resolver, creating temporary node copies
711 if (get_irn_visited(node) >= get_irg_visited(irg)) {
712 /* Here we rely on nodestate's copy being initialized with NULL */
713 DB((dbg, LEVEL_4, "copy_walk: We have already visited %ld\n", get_irn_node_nr(node)));
714 if (node_info->copy == NULL) {
715 // if (!is_Const(node) && !is_SymConst(node)) {
716 cp = rawcopy_node(node);
719 // node_info->copy = cp;
721 DB((dbg, LEVEL_4, "The TEMP copy of %ld is created %ld\n", get_irn_node_nr(node), get_irn_node_nr(cp)));
726 // add_End_keepalive(get_irg_end(current_ir_graph), node);
729 mark_irn_visited(node);
731 if (!is_Block(node)) {
732 ir_node *pred = get_nodes_block(node);
733 if (walk_condition(pred))
734 DB((dbg, LEVEL_4, "walk block %ld\n", get_irn_node_nr(pred)));
735 copy_walk( pred, walk_condition );
738 arity = get_irn_arity(node);
740 NEW_ARR_A(ir_node *, cpin, arity);
742 for (i = get_irn_arity(node) - 1; i >= 0; --i) {
743 ir_node *pred = get_irn_n(node, i);
745 if (walk_condition(pred)) {
746 DB((dbg, LEVEL_4, "walk node %ld\n", get_irn_node_nr(pred)));
747 copy_walk( pred, walk_condition );
748 cpin[i] = get_copy(pred);
749 DB((dbg, LEVEL_4, "copy of %ld gets new in %ld which is copy of %ld\n",
750 get_irn_node_nr(node), get_irn_node_nr(get_copy(pred)), get_irn_node_nr(pred)));
756 /* copy node / finalize temp node */
757 if (node_info->copy == NULL) {
758 /* No temporary copy existent */
760 /* Do not copy constants TODO right? */
761 // if (!is_Const(node) && !is_SymConst(node)) {
762 cp = rawcopy_node(node);
765 // node_info->copy = cp;
767 DB((dbg, LEVEL_4, "The FINAL copy of %ld is CREATED %ld\n", get_irn_node_nr(node), get_irn_node_nr(cp)));
769 /* temporary copy is existent but without correct ins */
771 DB((dbg, LEVEL_4, "The FINAL copy of %ld is EXISTENT %ld\n", get_irn_node_nr(node), get_irn_node_nr(cp)));
774 if (!is_Block(node)) {
775 ir_node *cpblock = get_copy(get_nodes_block(node));
777 set_nodes_block(cp, cpblock );
778 /* fix the phi information in attr.phis */
780 add_Block_phi(cpblock, cp);
782 /* macroblock info has not been copied */
783 set_Block_MacroBlock(cp, cp);
787 //set_irn_loop(cp, cur_loop);
788 set_irn_in(cp, ARR_LEN(cpin), cpin);
791 /* Loop peeling, and fix the cf for the loop entry nodes, which have now more preds */
792 static void peel(out_edges *loop_outs)
795 ir_node **entry_buffer;
798 ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED);
800 NEW_ARR_A(ir_node *, entry_buffer, ARR_LEN(loop_outs));
802 /* duplicate loop walk */
803 // cur_head = loop_cf_head;
804 inc_irg_visited(current_ir_graph);
806 for(i = 0; i < ARR_LEN(loop_outs); i++) {
807 out_edges entry = loop_outs[i];
808 ir_node *node = entry.node;
809 ir_node *pred = get_irn_n(entry.node, entry.pred_irn_n);
811 if (is_Block(node)) {
812 copy_walk( pred, is_in_loop );
813 duplicate_preds(node, entry.pred_irn_n, get_copy(pred) );
815 copy_walk( pred, is_in_loop );
816 if (!is_End(node)) /* leave out keepalives */
817 /* Node is user of a value defined inside the loop.
818 * We'll need a phi since we duplicated the loop. */
819 /* cannot construct_ssa here, because it needs another walker */
820 entry_buffer[entry_c++] = pred;
824 ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED);
826 /* Rewires the 2 heads */
829 /* Generate phis for values from peeled code and original loop */
830 for(i = 0; i < entry_c; i++)
832 ir_node *cppred, *block, *cpblock, *pred;
834 /* It is not possible to use
835 * pred = get_irn_n(entry.node, entry.pred_irn_n);
836 * because we might have changed the nodes predecessors in construct_ssa
838 pred = entry_buffer[i];
839 cppred = get_copy(pred);
840 block = get_nodes_block(pred);
841 cpblock = get_nodes_block(cppred);
842 construct_ssa(block, pred, cpblock, cppred);
847 * Populates head_entries with (node, pred_pos) tuple
848 * whereas the node's pred at pred_pos is in the head but not the node itself.
849 * Head and condition chain blocks must be marked.
851 static void get_head_entries(ir_node *node, void *env)
854 int arity = get_irn_arity(node);
857 DB((dbg, LEVEL_5, "get head entries \n"));
859 for(i = 0; i < arity; ++i) {
860 /* node is not in the head, but the predecessor is.
861 * (head or loop chain nodes are marked) */
862 DB((dbg, LEVEL_5, "... "));
863 DB((dbg, LEVEL_5, "node %ld marked %d (0) pred %d marked %d (1) \n",
864 node->node_nr, is_nodesblock_marked(node),i, is_nodesblock_marked(get_irn_n(node, i))));
865 if (!is_nodesblock_marked(node) && is_nodesblock_marked(get_irn_n(node, i))) {
868 entry.pred_irn_n = i;
870 "Found head chain entry %ld @%d because !inloop %ld and inloop %ld\n",
871 node->node_nr, i, node->node_nr, get_irn_n(node, i)->node_nr));
872 ARR_APP1(out_edges, cur_head_outs, entry);
878 * Find condition chains, and add them to be inverted, until the node count exceeds the limit.
879 * A block belongs to the chain if a condition branches out of the loop.
880 * Returns if the given block belongs to the condition chain.
881 * FIXME prevent collecting ALL loop blocks (may happen if all blocks jump out of the loop)
883 static unsigned condition_chains(ir_node *block) {
884 const ir_edge_t *edge;
888 printf("cd %ld\n", block->node_nr);
890 /* we need all outs, including keeps (TODO firm function for that??) */
891 foreach_out_edge_kind(block, edge, EDGE_KIND_NORMAL) {
895 /* We do not want to collect more nodes from condition chains, than the limit allows us to.
896 * Also, leave at least one block as body. */
897 if (head_inversion_node_count + nodes_n > head_inversion_node_limit
898 || loop_info.blocks == head_inversion_block_count + 1) {
899 set_Block_mark(block, 0);
900 printf(" %ld over limit\n", block->node_nr);
904 printf("blocks ++ %ld\n", block->node_nr);
905 // ++loop_info.blocks;
907 /* First: check our successors, and add all succs that are outside of the loop to the list */
908 foreach_block_succ(block, edge) {
909 ir_node *src = get_edge_src_irn( edge );
910 int pos = get_edge_src_pos( edge );
912 printf("check %ld\n", src->node_nr);
915 printf(" src %ld in loop %ld curlooop %ld \n", src->node_nr, src->loop->loop_nr, cur_loop->loop_nr);
916 if (!is_in_loop(src)) {
918 printf(" src %ld @ %d into block %ld \n", src->node_nr, pos, block->node_nr);
922 entry.pred_irn_n = pos;
923 ARR_APP1(out_edges, cond_chain_entries, entry);
924 mark_irn_visited(src);
928 /* this block is not part of the chain,
929 * because the chain would become too big or we have no succ outside of the loop */
931 printf("mark is 0 %ld\n", block->node_nr);
932 set_Block_mark(block, 0);
935 printf("mark is 1 %ld\n", block->node_nr);
936 set_Block_mark(block, 1);
937 ++head_inversion_block_count;
938 DB((dbg, LEVEL_4, "block %ld is part of condition chain\n", get_irn_node_nr(block)));
939 head_inversion_node_count += nodes_n;
942 /* Second: walk all successors, and add them to the list if they are not part of the chain */
943 foreach_block_succ(block, edge) {
945 ir_node *src = get_edge_src_irn( edge );
946 int pos = get_edge_src_pos( edge );
948 /* already done cases */
949 if (!is_in_loop( src ) || (get_irn_visited(src) >= get_irg_visited(current_ir_graph))) {
950 // printf("!inloop || visited %ld\n", block->node_nr);
954 mark_irn_visited(src);
955 DB((dbg, LEVEL_4, "condition chain walk %ld\n", get_irn_node_nr(src)));
956 inchain = condition_chains( src );
958 /* if successor is not part of chain we need to collect its outs */
962 entry.pred_irn_n = pos;
963 ARR_APP1(out_edges, cond_chain_entries, entry);
972 static void inversion_fix_heads(void)
974 ir_node **loopheadnins, **invheadnins;
975 ir_node *loophead = loop_cf_head;
976 ir_node *invhead = get_copy(loophead);
978 int headarity = get_irn_arity(loophead);
985 int backedges_n = get_backedge_n(loophead, 0);
986 int lhead_arity = headarity - backedges_n;
987 int ihead_arity = backedges_n;
989 /* new in arrays for all phis in the head blocks */
990 NEW_ARR_A(ir_node *, loopheadnins, lhead_arity);
991 NEW_ARR_A(ir_node *, invheadnins, ihead_arity);
993 for_each_phi(loophead, phi) {
994 NEW_ARR_A(ir_node *, get_node_info(phi)->ins, lhead_arity);
996 for_each_phi(invhead, phi) {
997 NEW_ARR_A(ir_node *, get_node_info(phi)->ins, ihead_arity);
1000 for (i = 0; i < headarity; i++) {
1001 ir_node *pred = get_irn_n(loophead, i);
1004 * Rewire the head blocks ins and their phi ins.
1005 * Requires phi list per block.
1007 if ( is_backedge(loophead, i) && !is_alien_edge(loophead, i) ) {
1008 invheadnins[iheadin_c] = pred;
1009 for_each_phi(invhead, phi) {
1010 get_node_info( phi )->ins[iheadin_c] = get_irn_n( phi, i) ;
1014 /* just copy these edges */
1015 loopheadnins[lheadin_c] = pred;
1016 for_each_phi(loophead, phi) {
1017 get_node_info( phi )->ins[lheadin_c] = get_irn_n(phi, i);
1023 /* assign the ins to the head blocks */
1024 set_irn_in(loophead, ARR_LEN(loopheadnins), loopheadnins);
1025 set_irn_in(invhead, ARR_LEN(invheadnins), invheadnins);
1027 /* assign the ins for the phis */
1028 for_each_phi(loophead, phi) {
1029 ir_node **ins = get_node_info(phi)->ins;
1030 set_irn_in(phi, lhead_arity, ins);
1033 for_each_phi(invhead, phi) {
1034 ir_node **ins = get_node_info(phi)->ins;
1035 set_irn_in(phi, ihead_arity, ins);
1040 static void loop_inversion_walk(out_edges *head_entries)
1045 ir_node **entry_buffer;
1046 ir_node **head_phi_assign;
1048 NEW_ARR_A(ir_node *, entry_buffer, ARR_LEN(head_entries));
1050 head_phi_assign = NEW_ARR_F(ir_node *, 0);
1052 /* Find assignments in the condition chain, to construct_ssa for them after the loop inversion. */
1053 for_each_phi( loop_cf_head , phi) {
1054 for(i=0; i<get_irn_arity(phi); ++i) {
1055 ir_node *def = get_irn_n(phi, i);
1056 if ( is_nodesblock_marked(def) ) {
1057 ARR_APP1(ir_node *, head_phi_assign, def);
1062 ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED);
1064 /* duplicate condition chain */
1065 inc_irg_visited(current_ir_graph);
1067 for(i = 0; i < ARR_LEN(head_entries); ++i) {
1068 out_edges entry = head_entries[i];
1069 ir_node *node = entry.node;
1070 ir_node *pred = get_irn_n(entry.node, entry.pred_irn_n);
1072 // add_End_keepalive(get_irg_end(current_ir_graph), pred);
1074 if (is_Block(node)) {
1075 DB((dbg, LEVEL_4, "\nINIT walk block %ld\n", get_irn_node_nr(pred)));
1076 copy_walk(pred, is_nodesblock_marked);
1077 duplicate_preds(node, entry.pred_irn_n, get_copy(pred) );
1079 DB((dbg, LEVEL_4, "\nInit walk node %ld\n", get_irn_node_nr(pred)));
1080 copy_walk( pred, is_nodesblock_marked );
1082 /* ignore keepalives */
1084 /* Node is user of a value assigned inside the loop.
1085 * We'll need a phi since we duplicated the head. */
1086 entry_buffer[entry_c++] = pred;
1090 ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED);
1092 inversion_fix_heads();
1094 /* Generate phis for users of values assigned in the condition chain and read in the loops body */
1095 for(i = 0; i < entry_c; i++) {
1096 ir_node *cppred, *block, *cpblock, *pred;
1098 /* It is not possible to use
1099 * pred = get_irn_n(entry.node, entry.pred_irn_n);
1100 * because we might have changed the nodes predecessors in construct_ssa
1102 pred = entry_buffer[i];
1103 cppred = get_copy(pred);
1104 block = get_nodes_block(pred);
1105 cpblock = get_nodes_block(cppred);
1107 "construct_ssa (loop out value) original %ld and clone %ld\n",
1108 get_irn_node_nr(pred), get_irn_node_nr(cppred)));
1109 construct_ssa(block, pred, cpblock, cppred);
1113 // char *s = "-SSA_";
1114 // char *n = strdup(" ");
1115 // n[0] = 'a' + (char)i;
1118 // dump_ir_block_graph(current_ir_graph, res );
1121 /* Generate phis for values that are assigned in the condition chain
1122 * but not read in the loops body.
1124 for(i = 0; i < ARR_LEN(head_phi_assign); ++i) {
1125 ir_node *def_block, *inhead_phi_def, *inv_def_block, *inv_inhead_phi_def;
1126 /* Note: construct_ssa only fixes the FIRST nodes usage. */
1127 inhead_phi_def = head_phi_assign[i];
1128 inv_inhead_phi_def = get_copy(inhead_phi_def);
1129 def_block = get_nodes_block(inhead_phi_def);
1130 inv_def_block = get_nodes_block(inv_inhead_phi_def);
1132 "construct_ssa (condition chain out values) original %ld and clone %ld\n",
1133 get_irn_node_nr(inv_inhead_phi_def), get_irn_node_nr(inhead_phi_def)));
1134 construct_ssa(inv_def_block, inv_inhead_phi_def, def_block, inhead_phi_def);
1136 loop_cf_head = get_copy(loop_cf_head);
1140 * Decide if loop inversion, peeling or unrolling should be performed.
1141 * Inversion creates abnormal looking loops. Be careful with optimizations after that.
1143 static void decision_maker(void)
1145 unsigned do_peel = 0;
1146 unsigned do_inversion = 1;
1148 /* unsigned max_loop_opnodes = 2000000; */
1150 head_inversion_node_limit = 99910;
1152 cur_loop_outs = NEW_ARR_F(out_edges, 0);
1154 /* Find loop entries walk, find head */
1155 inc_irg_visited( current_ir_graph );
1156 irg_walk_graph( current_ir_graph, get_loop_outs_and_info, NULL, NULL );
1158 /* RETURN if there is no valid head */
1159 if (!loop_cf_head || !loop_cf_head_valid) {
1160 DB((dbg, LEVEL_1, "No valid loop head. Nothing done.\n"));
1164 /* RETURN if there is a call in the loop */
1165 if (loop_info.calls)
1168 /* Loop complexity too high */
1169 if (loop_info.opnodes_n > max_loop_opnodes)
1172 // foreach_out_edge(loop_cf_head, edge) {
1173 // ir_node *node = get_edge_src_irn(edge);
1174 // if ( !is_Block(node) && !is_Proj(node) && !is_Phi(node) )
1175 // ++loop_info.opnodes_head;
1178 inc_irg_visited(current_ir_graph);
1179 loop_walker( loop_outs, NULL, get_invariants, NULL );
1181 /* This could be improved with knowledge about variable range. */
1182 if (loop_info.stores == 0 && loop_info.invariant_loads > 0)
1186 (void) get_invariants;
1194 peel(cur_loop_outs);
1198 DEBUG_ONLY(dump_ir_block_graph(current_ir_graph, "-peeled1"));
1200 DEL_ARR_F(cur_loop_outs);
1202 /* Loop inversion */
1203 /* Search for condition chains. We may not do this before peeling, as peeling changes things. */
1204 ir_reserve_resources(current_ir_graph, IR_RESOURCE_BLOCK_MARK);
1205 irg_walk_graph(current_ir_graph, unmark_block, NULL, NULL);
1207 loop_info.blocks = get_loop_n_blocks(cur_loop);
1208 cond_chain_entries = NEW_ARR_F(out_edges, 0);
1209 head_inversion_node_count = 0;
1210 head_inversion_block_count = 0;
1211 inc_irg_visited(current_ir_graph);
1212 set_Block_mark(loop_cf_head, 1);
1213 mark_irn_visited(loop_cf_head);
1214 /* find condition chains */
1215 condition_chains(loop_cf_head);
1217 DEBUG_ONLY(dump_ir_block_graph(current_ir_graph, "-pre_inversion"));
1219 // TODO assume number of phis to be created. prevent inversion in case ...
1221 /* Loop inversion */
1222 /* We catch endless loops here too,
1223 * because they do not have a condition chain and a maximum of 1 block. */
1224 if (loop_info.blocks < 2) {
1226 DB((dbg, LEVEL_1, "Loop contains %d (less than 2) blocks => No Inversion done.\n", loop_info.blocks));
1229 if (head_inversion_block_count < 1) {
1231 DB((dbg, LEVEL_1, "Loop contains %d (less than 1) invertible blocks => No Inversion done.\n", head_inversion_block_count));
1236 cur_head_outs = NEW_ARR_F(out_edges, 0);
1238 /* get all edges pointing into the head or condition chain */
1239 irg_walk_graph(current_ir_graph, get_head_entries, NULL, NULL);
1240 loop_inversion_walk(cur_head_outs);
1242 DEL_ARR_F(cur_head_outs);
1245 DEBUG_ONLY(dump_ir_block_graph(current_ir_graph, "-inversed2"));
1248 DEL_ARR_F(cond_chain_entries);
1249 ir_free_resources(current_ir_graph, IR_RESOURCE_BLOCK_MARK);
1253 static void analyze_loop(ir_loop *loop)
1255 /* Init new for every loop */
1258 loop_cf_head = NULL;
1259 loop_cf_head_valid = 1;
1260 loop_inv_head = NULL;
1261 loop_peeled_head = NULL;
1263 loop_info.calls = 0;
1264 loop_info.invariant_loads = 0;
1265 loop_info.loads = 0;
1266 loop_info.stores = 0;
1267 loop_info.opnodes_n = 0;
1268 loop_info.blocks = 0;
1270 DB((dbg, LEVEL_1, " >>>> current loop includes node %ld <<<\n", get_irn_node_nr(get_loop_node(loop, 0))));
1274 DB((dbg, LEVEL_1, " <<<< end of loop with node %ld >>>>\n", get_irn_node_nr(get_loop_node(loop, 0))));
1277 /* Find most inner loops and send them to analyze_loop */
1278 static void analyze_inner_loop(ir_loop *loop)
1280 /* descend into sons */
1281 int sons = get_loop_n_sons(loop);
1287 for(s=0; s<sons; s++) {
1288 analyze_inner_loop( get_loop_son(loop, s) );
1296 void loop_optimization(ir_graph *irg)
1301 FIRM_DBG_REGISTER(dbg, "firm.opt.loop");
1303 DB((dbg, LEVEL_1, " >>> loop optimization (Startnode %ld) <<<\n", get_irn_node_nr(get_irg_start(irg))));
1306 link_node_state_list = NULL;
1310 ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1311 collect_phiprojs(irg);
1312 ir_free_resources(irg, IR_RESOURCE_IRN_LINK);
1314 set_current_ir_graph(irg);
1315 assure_cf_loop(irg);
1317 /* allocate node_info for additional information on nodes */
1318 ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK);
1319 irg_walk_graph(current_ir_graph, alloc_node_info, NULL, NULL);
1321 loop = get_irg_loop(irg);
1322 sons = get_loop_n_sons(loop);
1324 for (nr=0; nr<sons; nr++) {
1325 analyze_inner_loop(get_loop_son(loop, nr));
1330 ir_free_resources(irg, IR_RESOURCE_PHI_LIST|IR_RESOURCE_IRN_LINK);
1332 DB((dbg, LEVEL_1, " >>> loop optimization done (Startnode %ld) <<<\n", get_irn_node_nr(get_irg_start(irg))));
1335 void do_loop_inversion(ir_graph *irg)
1337 /* TODO: add the code here that performs loop inversion only */
1338 loop_optimization(irg);
1341 void do_loop_peeling(ir_graph *irg)
1343 /* TODO: add the code here that performs loop peeling only */
1344 loop_optimization(irg);
1347 void firm_init_loop_opt(void)
1349 FIRM_DBG_REGISTER(dbg, "firm.opt.loop");