2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @author Christian Helmer
23 * @brief loop inversion and loop unrolling, loop peeling
39 #include "array_t.h" /* automatic array */
40 #include "beutil.h" /* get_block */
41 #include "irloop_t.h" /* set_irn_loop*/
49 DEBUG_ONLY(static firm_dbg_module_t *dbg);
52 * Convenience macro for iterating over every phi node of the given block.
53 * Requires phi list per block.
55 #define for_each_phi(block, phi) \
56 for ( (phi) = get_Block_phis( (block) ); (phi) ; (phi) = get_Phi_next( (phi) ) )
59 static ir_loop *cur_loop;
61 /* abortable walker function */
62 typedef unsigned irg_walk_func_abortable(ir_node *, void *);
64 /* condition for walking a node during a copy_walk */
65 typedef unsigned walker_condition(ir_node *);
67 /* node and position of a predecessor */
68 typedef struct out_edges {
73 /* access complex values through the nodes links */
74 typedef struct node_info {
77 ir_node *link; /* temporary links for ssa creation */
78 ir_node **ins; /* ins for phi nodes, during rewiring of blocks */
80 struct node_info *freelistnext; /* linked list to free all node_infos */
83 static node_info *link_node_state_list; /* head of the linked list to free all node_infos */
85 static out_edge *cur_loop_outs; /* A walker may start visiting the current loop with these nodes. */
86 static out_edge *cur_head_outs; /* A walker may start visiting the cur head with these nodes. */
88 static ir_node *loop_cf_head = NULL; /* Loop head node */
89 static unsigned loop_cf_head_valid = 1; /* A loop may have one head, otherwise we do not touch it. */
94 static ir_node *loop_inv_head = NULL;
96 static ir_node *loop_peeled_head = NULL;
98 /* Loop analysis informations */
99 typedef struct loop_info_t {
100 unsigned blocks; /* number of blocks in the loop */
101 unsigned calls; /* number of calls */
102 unsigned loads; /* number of load nodes */
103 unsigned outs; /* outs without keepalives */
105 unsigned invariant_loads;
106 unsigned stores; /* number of store nodes */
107 unsigned opnodes_n; /* nodes that probably result in an instruction */
108 unsigned do_invariant_opt;
112 /* Information about the current loop */
113 static loop_info_t loop_info;
115 /* A walker may start visiting a condition chain with these nodes. */
116 static out_edge *cond_chain_entries;
118 /* Number of unrolling */
121 static unsigned head_inversion_node_count;
122 static unsigned inversion_head_node_limit;
123 static unsigned head_inversion_block_count;
125 static unsigned enable_peeling;
126 static unsigned enable_inversion;
127 static unsigned enable_unrolling;
131 * ============= AUXILIARY FUNCTIONS =====================================
136 * Creates object on the heap, and adds it to a linked list to free it later.
138 static node_info *new_node_info(void) {
139 node_info *l = XMALLOCZ(node_info);
140 l->freelistnext = link_node_state_list;
141 link_node_state_list = l;
147 static node_info *get_node_info(ir_node *n)
149 return ((node_info *)get_irn_link(n));
152 /* Allocates a node_info struct for the given node. For use with a walker. */
153 static void alloc_node_info(ir_node *node, void *env)
157 state = new_node_info();
158 set_irn_link(node, (void *)state);
161 static void free_node_info(void)
165 n = link_node_state_list;
167 node_info *next = n->freelistnext;
172 link_node_state_list = NULL;
176 * Use the linked list to reset the reused values of all node_infos
177 * Reset in particular the copy attribute as copy_walk uses it to determine a present copy
179 static void reset_node_infos(void)
182 next = link_node_state_list;
183 while(next->freelistnext) {
184 node_info *cur = next;
185 next = cur->freelistnext;
192 /* Returns the nodes node_info link. */
193 static ir_node *get_link(ir_node *n)
195 return ((node_info *)get_irn_link(n))->link;
198 /* Sets the nodes node_info link. */
199 static void set_link(ir_node *n, ir_node *link)
201 ((node_info *)get_irn_link(n))->link = link;
204 /* Returns a nodes copy. */
205 static ir_node *get_copy(ir_node *n)
207 return ((node_info *)get_irn_link(n))->copy;
210 /* Sets a nodes copy. */
211 static void set_copy(ir_node *n, ir_node *copy)
213 ((node_info *)get_irn_link(n) )->copy = copy;
217 * Convenience macro for iterating over every copy in a linked list
220 #define for_each_copy(node) \
221 for ( ; (node) ; (node) = get_copy(node))
224 * Convenience macro for iterating over every copy in 2 linked lists
225 * of copies in parallel.
227 #define for_each_copy2(high1, low1, high2, low2) \
228 for ( ; (low1) && (low2); (high1) = (low1), (low1) = get_copy(low1), \
229 (high2) = (low2), (low2) = get_copy(low2))
232 * Returns 0 if the node or block is not in cur_loop.
234 static unsigned is_in_loop(ir_node *node)
236 return (get_irn_loop(get_block(node)) == cur_loop);
239 /* Returns if the given be is an alien edge. This is the case when the pred is not in the loop. */
240 static unsigned is_alien_edge(ir_node *n, int i)
242 return(!is_in_loop(get_irn_n(n, i)));
245 /* used for block walker */
246 static void reset_block_mark(ir_node *node, void * env)
251 set_Block_mark(node, 0);
254 static unsigned is_nodesblock_marked(ir_node* node)
256 return (get_Block_mark(get_block(node)));
259 /* Returns the number of blocks in a loop. */
260 int get_loop_n_blocks(ir_loop *loop) {
263 elements = get_loop_n_elements(loop);
265 for(e=0; e<elements; e++) {
266 loop_element elem = get_loop_element(loop, e);
267 if (is_ir_node(elem.kind) && is_Block(elem.node))
274 * Add newpred at position pos to node and also add the corresponding value to the phis.
275 * Requires block phi list.
277 static int duplicate_preds(ir_node* node, unsigned pos, ir_node* newpred)
285 assert(is_Block(node) && "duplicate_preds is only allowed for blocks");
287 DB((dbg, LEVEL_5, "duplicate_preds(node %ld, pos %d, newpred %ld)\n",
288 get_irn_node_nr(node), pos, get_irn_node_nr(newpred)));
290 block_arity = get_irn_arity(node);
292 NEW_ARR_A(ir_node*, ins, block_arity + 1);
294 for (i = 0; i < block_arity; ++i) {
295 ins[i] = get_irn_n(node, i);
297 ins[block_arity] = newpred;
299 set_irn_in(node, block_arity + 1, ins);
301 for_each_phi(node, phi) {
302 int phi_arity = get_irn_arity(phi);
303 DB((dbg, LEVEL_5, "duplicate_preds: fixing phi %ld\n", get_irn_node_nr(phi)));
305 NEW_ARR_A(ir_node *, ins, block_arity + 1);
306 for (i=0; i < phi_arity; ++i) {
307 DB((dbg, LEVEL_5, "in %ld\n", get_irn_node_nr(get_irn_n(phi, i))));
308 ins[i] = get_irn_n(phi, i);
310 ins[block_arity] = get_irn_n(phi, pos);
311 set_irn_in(phi, block_arity + 1, ins);
317 * Finds loop head and loop_info.
319 static void get_loop_info(ir_node *node, void *env)
321 unsigned node_in_loop, pred_in_loop;
325 arity = get_irn_arity(node);
326 for (i = 0; i < arity; i++) {
327 ir_node *pred = get_irn_n(node, i);
329 pred_in_loop = is_in_loop(pred);
330 node_in_loop = is_in_loop(node);
332 /* collect some loop information */
338 /* Find the loops head/the blocks with cfpred outside of the loop */
339 if (is_Block(node) && node_in_loop && !pred_in_loop && loop_cf_head_valid) {
340 ir_node *cfgpred = get_Block_cfgpred(node, i);
342 if (!is_in_loop(cfgpred)) {
343 DB((dbg, LEVEL_5, "potential head %+F because inloop and pred %+F not inloop\n", node, pred));
344 /* another head? We do not touch this. */
345 if (loop_cf_head && loop_cf_head != node) {
346 loop_cf_head_valid = 0;
355 /* Adds all nodes pointing into the loop to loop_entries and also finds the loops head */
356 static void get_loop_outs(ir_node *node, void *env)
358 unsigned node_in_loop, pred_in_loop;
362 arity = get_irn_arity(node);
363 for (i = 0; i < arity; i++) {
364 ir_node *pred = get_irn_n(node, i);
366 pred_in_loop = is_in_loop(pred);
367 node_in_loop = is_in_loop(node);
369 if (pred_in_loop && !node_in_loop) {
372 entry.pred_irn_n = i;
373 ARR_APP1(out_edge, cur_loop_outs, entry);
378 static ir_node *ssa_second_def;
379 static ir_node *ssa_second_def_block;
382 * Walks the graph bottom up, searching for definitions and creates phis.
384 static ir_node *search_def_and_create_phis(ir_node *block, ir_mode *mode)
392 DB((dbg, LEVEL_5, "ssa search_def_and_create_phis: block %ld\n", get_irn_node_nr(block)));
394 /* Prevents creation of phi that would be bad anyway.
395 * Dead and bad blocks. */
396 if (get_irn_arity(block) < 1 || is_Bad(block))
399 if (block == ssa_second_def_block) {
400 DB((dbg, LEVEL_5, "ssa found second definition: use second def %ld\n", get_irn_node_nr(ssa_second_def)));
401 return ssa_second_def;
404 /* already processed this block? */
405 if (irn_visited(block)) {
406 ir_node *value = get_link(block);
407 DB((dbg, LEVEL_5, "ssa already visited: use linked %ld\n", get_irn_node_nr(value)));
411 irg = get_irn_irg(block);
412 assert(block != get_irg_start_block(irg));
414 /* a Block with only 1 predecessor needs no Phi */
415 n_cfgpreds = get_Block_n_cfgpreds(block);
416 if (n_cfgpreds == 1) {
417 ir_node *pred_block = get_Block_cfgpred_block(block, 0);
420 DB((dbg, LEVEL_5, "ssa 1 pred: walk pred %ld\n", get_irn_node_nr(pred_block)));
422 value = search_def_and_create_phis(pred_block, mode);
423 set_link(block, value);
424 mark_irn_visited(block);
429 /* create a new Phi */
430 NEW_ARR_A(ir_node*, in, n_cfgpreds);
431 for(i = 0; i < n_cfgpreds; ++i)
432 in[i] = new_Unknown(mode);
434 phi = new_r_Phi(block, n_cfgpreds, in, mode);
436 /* Important: always keep block phi list up to date. */
437 add_Block_phi(block, phi);
438 /* EVERY node is assumed to have a node_info linked. */
439 alloc_node_info(phi, NULL);
441 DB((dbg, LEVEL_5, "ssa phi creation: link new phi %ld to block %ld\n", get_irn_node_nr(phi), get_irn_node_nr(block)));
443 set_link(block, phi);
444 mark_irn_visited(block);
446 /* set Phi predecessors */
447 for(i = 0; i < n_cfgpreds; ++i) {
449 ir_node *pred_block = get_Block_cfgpred_block(block, i);
450 assert(pred_block != NULL);
451 pred_val = search_def_and_create_phis(pred_block, mode);
452 assert(pred_val != NULL);
454 DB((dbg, LEVEL_5, "ssa phi pred:phi %ld, pred %ld\n", get_irn_node_nr(phi), get_irn_node_nr(pred_val)));
455 set_irn_n(phi, i, pred_val);
461 * Given a set of values this function constructs SSA-form for the users of the
462 * first value (the users are determined through the out-edges of the value).
463 * Works without using the dominance tree.
465 static void construct_ssa(ir_node *orig_block, ir_node *orig_val,
466 ir_node *second_block, ir_node *second_val)
470 const ir_edge_t *edge;
471 const ir_edge_t *next;
473 assert(orig_block && orig_val && second_block && second_val &&
474 "no parameter of construct_ssa may be NULL");
476 /* no need to do anything */
477 if (orig_val == second_val)
480 irg = get_irn_irg(orig_val);
482 ir_reserve_resources(irg, IR_RESOURCE_IRN_VISITED);
483 inc_irg_visited(irg);
485 mode = get_irn_mode(orig_val);
486 set_link(orig_block, orig_val);
487 mark_irn_visited(orig_block);
489 ssa_second_def_block = second_block;
490 ssa_second_def = second_val;
492 /* Only fix the users of the first, i.e. the original node */
493 foreach_out_edge_safe(orig_val, edge, next) {
494 ir_node *user = get_edge_src_irn(edge);
495 int j = get_edge_src_pos(edge);
496 ir_node *user_block = get_nodes_block(user);
503 DB((dbg, LEVEL_5, "original user %ld\n", get_irn_node_nr(user)));
506 ir_node *pred_block = get_Block_cfgpred_block(user_block, j);
507 newval = search_def_and_create_phis(pred_block, mode);
509 newval = search_def_and_create_phis(user_block, mode);
512 /* If we get a bad node the user keeps the original in. No second definition needed. */
513 if (newval != user && !is_Bad(newval))
514 set_irn_n(user, j, newval);
517 ir_free_resources(irg, IR_RESOURCE_IRN_VISITED);
521 * Construct SSA for def and all of its copies.
523 static void construct_ssa_n(ir_node *def, ir_node *user)
528 const ir_edge_t *edge;
529 const ir_edge_t *next;
530 irg = get_irn_irg(def);
532 ir_reserve_resources(irg, IR_RESOURCE_IRN_VISITED);
533 inc_irg_visited(irg);
535 mode = get_irn_mode(def);
537 for_each_copy(iter) {
538 set_link(get_nodes_block(iter), iter);
539 mark_irn_visited(get_nodes_block(iter));
541 DB((dbg, LEVEL_5, "ssa_n: Link def %ld to block %ld\n",
542 get_irn_node_nr(iter), get_irn_node_nr(get_nodes_block(iter))));
545 /* Need to search the outs, because we need the in-pos on the user node. */
546 foreach_out_edge_safe(def, edge, next) {
547 ir_node *edge_user = get_edge_src_irn(edge);
548 int edge_src = get_edge_src_pos(edge);
549 ir_node *user_block = get_nodes_block(user);
552 if (edge_user != user)
556 ir_node *pred_block = get_Block_cfgpred_block(user_block, edge_src);
557 newval = search_def_and_create_phis(pred_block, mode);
559 newval = search_def_and_create_phis(user_block, mode);
562 if (newval != user && !is_Bad(newval))
563 set_irn_n(user, edge_src, newval);
566 ir_free_resources(irg, IR_RESOURCE_IRN_VISITED);
570 * Construct SSA for all definitions in arr.
572 void construct_ssa_foreach(ir_node **arr, int arr_n)
575 for(i = 0; i < arr_n; ++i) {
576 ir_node *cppred, *block, *cpblock, *pred;
579 cppred = get_copy(pred);
580 block = get_nodes_block(pred);
581 cpblock = get_nodes_block(cppred);
582 construct_ssa(block, pred, cpblock, cppred);
586 /* get the number of backedges without alien bes */
587 static int get_backedge_n(ir_node *loophead, unsigned with_alien)
591 int arity = get_irn_arity(loophead);
592 for (i = 0; i < arity; ++i) {
593 ir_node *pred = get_irn_n(loophead, i);
594 if (is_backedge(loophead, i) && (with_alien || is_in_loop(pred)))
601 * Rewires the heads after peeling.
603 static void peel_fix_heads(void)
605 ir_node **loopheadnins, **peelheadnins;
606 ir_node *loophead = loop_cf_head;
607 ir_node *peelhead = get_copy(loophead);
609 int headarity = get_irn_arity(loophead);
616 int backedges_n = get_backedge_n(loophead, 0);
618 int lhead_arity = 2 * backedges_n;
619 int phead_arity = headarity - backedges_n;
622 NEW_ARR_A(ir_node *, loopheadnins, lhead_arity );
623 NEW_ARR_A(ir_node *, peelheadnins, phead_arity );
625 for_each_phi(loophead, phi) {
626 NEW_ARR_A(ir_node *, get_node_info(phi)->ins, lhead_arity);
628 for_each_phi(peelhead, phi) {
629 NEW_ARR_A(ir_node *, get_node_info(phi)->ins, phead_arity);
632 for (i = 0; i < headarity; i++)
634 ir_node *orgjmp = get_irn_n(loophead, i);
635 ir_node *copyjmp = get_copy(orgjmp);
638 * Rewire the head blocks ins and their phi ins.
639 * Requires phi list per block.
641 if (is_backedge(loophead, i) && !is_alien_edge(loophead, i)) {
642 loopheadnins[lheadin_c] = orgjmp;
643 for_each_phi(loophead, phi) {
644 get_node_info( phi )->ins[lheadin_c] = get_irn_n( phi, i) ;
648 /* former bes of the peeled code origin now from the loophead */
649 loopheadnins[lheadin_c] = copyjmp;
651 /* get_irn_n( get_copy_of(phi), i ) <!=> get_copy_of( get_irn_n( phi, i) )
652 * Order is crucial! Predecessors outside of the loop are non existent.
653 * The copy (cloned with its ins!) has pred i,
654 * but phis pred i might not have a copy of itself.
656 for_each_phi(loophead, phi) {
657 get_node_info( phi )->ins[lheadin_c] = get_irn_n( get_copy(phi), i) ;
661 peelheadnins[pheadin_c] = orgjmp;
662 for_each_phi(peelhead, phi) {
663 get_node_info( phi )->ins[pheadin_c] = get_irn_n(phi, i);
669 assert(pheadin_c == ARR_LEN(peelheadnins) &&
670 lheadin_c == ARR_LEN(loopheadnins) &&
671 "the constructed head arities do not match the predefined arities");
673 /* assign the ins to the nodes */
674 set_irn_in(loophead, ARR_LEN(loopheadnins), loopheadnins);
675 set_irn_in(peelhead, ARR_LEN(peelheadnins), peelheadnins);
677 for_each_phi(loophead, phi) {
678 ir_node **ins = get_node_info( phi )->ins;
679 set_irn_in(phi, lhead_arity, ins);
682 for_each_phi(peelhead, phi) {
683 ir_node **ins = get_node_info( phi )->ins;
684 set_irn_in(phi, phead_arity, ins);
689 * Create a raw copy (ins are still the old ones) of the given node.
690 * We rely on copies to be NOT visited.
692 static ir_node *rawcopy_node(ir_node *node)
698 cp = exact_copy(node);
700 arity = get_irn_arity(node);
702 for (i = 0; i < arity; ++i) {
703 if (is_backedge(node, i))
708 cpstate = new_node_info();
709 set_irn_link(cp, cpstate);
713 * exact_copy already sets Macroblock.
714 * Why do we need to do this anyway? */
715 set_Block_MacroBlock(cp, cp);
721 * This walker copies all walked nodes.
722 * If the walk_condition is true for a node, it is walked.
723 * All nodes node_info->copy attributes has to be NULL prior to every to every walk.
725 static void copy_walk(ir_node *node, walker_condition *walk_condition, ir_loop *set_loop)
731 ir_graph *irg = current_ir_graph;
732 node_info *node_info = get_node_info(node);
735 * break condition and cycle resolver, creating temporary node copies
737 if (get_irn_visited(node) >= get_irg_visited(irg)) {
738 /* Here we rely on nodestate's copy being initialized with NULL */
739 DB((dbg, LEVEL_5, "copy_walk: We have already visited %ld\n", get_irn_node_nr(node)));
740 if (node_info->copy == NULL) {
741 cp = rawcopy_node(node);
742 DB((dbg, LEVEL_5, "The TEMP copy of %ld is created %ld\n", get_irn_node_nr(node), get_irn_node_nr(cp)));
748 mark_irn_visited(node);
750 if (!is_Block(node)) {
751 ir_node *pred = get_nodes_block(node);
752 if (walk_condition(pred))
753 DB((dbg, LEVEL_5, "walk block %ld\n", get_irn_node_nr(pred)));
754 copy_walk(pred, walk_condition, set_loop);
757 arity = get_irn_arity(node);
759 NEW_ARR_A(ir_node *, cpin, arity);
761 for (i = get_irn_arity(node) - 1; i >= 0; --i) {
762 ir_node *pred = get_irn_n(node, i);
764 if (walk_condition(pred)) {
765 DB((dbg, LEVEL_5, "walk node %ld\n", get_irn_node_nr(pred)));
766 copy_walk(pred, walk_condition, set_loop);
767 cpin[i] = get_copy(pred);
768 DB((dbg, LEVEL_5, "copy of %ld gets new in %ld which is copy of %ld\n",
769 get_irn_node_nr(node), get_irn_node_nr(get_copy(pred)), get_irn_node_nr(pred)));
775 /* copy node / finalize temp node */
776 if (node_info->copy == NULL) {
777 /* No temporary copy existent */
778 cp = rawcopy_node(node);
779 DB((dbg, LEVEL_5, "The FINAL copy of %ld is CREATED %ld\n", get_irn_node_nr(node), get_irn_node_nr(cp)));
781 /* temporary copy is existent but without correct ins */
783 DB((dbg, LEVEL_5, "The FINAL copy of %ld is EXISTENT %ld\n", get_irn_node_nr(node), get_irn_node_nr(cp)));
786 if (!is_Block(node)) {
787 ir_node *cpblock = get_copy(get_nodes_block(node));
789 set_nodes_block(cp, cpblock );
790 /* fix the phi information in attr.phis */
792 add_Block_phi(cpblock, cp);
795 set_irn_loop(cp, set_loop);
796 set_irn_in(cp, ARR_LEN(cpin), cpin);
800 * Loop peeling, and fix the cf for the loop entry nodes, which have now more preds
802 static void peel(out_edge *loop_outs)
805 ir_node **entry_buffer;
808 ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED);
810 NEW_ARR_A(ir_node *, entry_buffer, ARR_LEN(loop_outs));
812 /* duplicate loop walk */
813 inc_irg_visited(current_ir_graph);
815 for(i = 0; i < ARR_LEN(loop_outs); i++) {
816 out_edge entry = loop_outs[i];
817 ir_node *node = entry.node;
818 ir_node *pred = get_irn_n(entry.node, entry.pred_irn_n);
820 if (is_Block(node)) {
821 copy_walk(pred, is_in_loop, NULL);
822 duplicate_preds(node, entry.pred_irn_n, get_copy(pred) );
824 copy_walk(pred, is_in_loop, NULL);
825 /* leave out keepalives */
827 /* Node is user of a value defined inside the loop.
828 * We'll need a phi since we duplicated the loop. */
829 /* Cannot construct_ssa here, because it needs another walker. */
830 entry_buffer[entry_c++] = pred;
834 ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED);
836 /* Rewires the 2 heads */
839 /* Generate phis for values from peeled code and original loop */
840 construct_ssa_foreach(entry_buffer, entry_c);
841 /*for(i = 0; i < entry_c; i++)
843 ir_node *cppred, *block, *cpblock, *pred;
845 pred = entry_buffer[i];
846 cppred = get_copy(pred);
847 block = get_nodes_block(pred);
848 cpblock = get_nodes_block(cppred);
849 construct_ssa(block, pred, cpblock, cppred);
854 * Populates head_entries with (node, pred_pos) tuple
855 * whereas the node's pred at pred_pos is in the head but not the node itself.
856 * Head and condition chain blocks must be marked.
858 static void get_head_outs(ir_node *node, void *env)
861 int arity = get_irn_arity(node);
864 DB((dbg, LEVEL_5, "get head entries %ld \n", get_irn_node_nr(node)));
866 for(i = 0; i < arity; ++i) {
867 /* node is not in the head, but the predecessor is.
868 * (head or loop chain nodes are marked) */
870 DB((dbg, LEVEL_5, "... "));
871 DB((dbg, LEVEL_5, "node %ld marked %d (0) pred %d marked %d (1) \n",
872 node->node_nr, is_nodesblock_marked(node),i, is_nodesblock_marked(get_irn_n(node, i))));
874 if (!is_nodesblock_marked(node) && is_nodesblock_marked(get_irn_n(node, i))) {
877 entry.pred_irn_n = i;
879 "Found head chain entry %ld @%d because !inloop %ld and inloop %ld\n",
880 get_irn_node_nr(node), i, get_irn_node_nr(node), get_irn_node_nr(get_irn_n(node, i))));
881 ARR_APP1(out_edge, cur_head_outs, entry);
887 * Find condition chains, and add them to be inverted, until the node count exceeds the limit.
888 * A block belongs to the chain if a condition branches out of the loop.
889 * Returns 1 if the given block belongs to the condition chain.
891 static unsigned find_condition_chains(ir_node *block) {
892 const ir_edge_t *edge;
896 DB((dbg, LEVEL_5, "condition_chains for block %ld\n", get_irn_node_nr(block)));
898 /* Collect all outs, including keeps.
899 * (TODO firm function for number of out edges?) */
900 foreach_out_edge_kind(block, edge, EDGE_KIND_NORMAL) {
904 /* We do not want to collect more nodes from condition chains, than the limit allows us to.
905 * Also, leave at least one block as body. */
906 if (head_inversion_node_count + nodes_n > inversion_head_node_limit
907 || head_inversion_block_count + 1 == loop_info.blocks) {
908 set_Block_mark(block, 0);
913 /* First: check our successors, and add all succs that are outside of the loop to the list */
914 foreach_block_succ(block, edge) {
915 ir_node *src = get_edge_src_irn( edge );
916 int pos = get_edge_src_pos( edge );
918 if (!is_in_loop(src)) {
923 entry.pred_irn_n = pos;
924 ARR_APP1(out_edge, cond_chain_entries, entry);
925 mark_irn_visited(src);
930 /* this block is not part of the chain,
931 * because the chain would become too long or we have no successor outside of the loop */
933 set_Block_mark(block, 0);
936 set_Block_mark(block, 1);
937 ++head_inversion_block_count;
938 DB((dbg, LEVEL_5, "block %ld is part of condition chain\n", get_irn_node_nr(block)));
939 head_inversion_node_count += nodes_n;
942 /* Second: walk all successors, and add them to the list if they are not part of the chain */
943 foreach_block_succ(block, edge) {
945 ir_node *src = get_edge_src_irn( edge );
946 int pos = get_edge_src_pos( edge );
948 /* already done cases */
949 if (!is_in_loop( src ) || (get_irn_visited(src) >= get_irg_visited(current_ir_graph))) {
953 mark_irn_visited(src);
954 DB((dbg, LEVEL_5, "condition chain walk %ld\n", get_irn_node_nr(src)));
955 inchain = find_condition_chains(src);
957 /* if successor is not part of chain we need to collect its outs */
961 entry.pred_irn_n = pos;
962 ARR_APP1(out_edge, cond_chain_entries, entry);
969 * Rewire the loop head and inverted head for loop inversion.
971 static void inversion_fix_heads(void)
973 ir_node **loopheadnins, **invheadnins;
974 ir_node *loophead = loop_cf_head;
975 ir_node *invhead = get_copy(loophead);
977 int headarity = get_irn_arity(loophead);
984 int backedges_n = get_backedge_n(loophead, 0);
985 int lhead_arity = backedges_n;
986 int ihead_arity = headarity - backedges_n;
988 assert(lhead_arity != 0 && "Loophead has arity 0. Probably wrong backedge informations.");
989 assert(ihead_arity != 0 && "Inversionhead has arity 0. Probably wrong backedge informations.");
991 /* new in arrays for all phis in the head blocks */
992 NEW_ARR_A(ir_node *, loopheadnins, lhead_arity);
993 NEW_ARR_A(ir_node *, invheadnins, ihead_arity);
995 for_each_phi(loophead, phi) {
996 NEW_ARR_A(ir_node *, get_node_info(phi)->ins, lhead_arity);
998 for_each_phi(invhead, phi) {
999 NEW_ARR_A(ir_node *, get_node_info(phi)->ins, ihead_arity);
1002 for (i = 0; i < headarity; i++) {
1003 ir_node *pred = get_irn_n(loophead, i);
1006 * Rewire the head blocks ins and their phi ins.
1007 * Requires phi list per block.
1009 if (is_backedge(loophead, i) && !is_alien_edge(loophead, i)) {
1010 /* just copy these edges */
1011 loopheadnins[lheadin_c] = pred;
1012 for_each_phi(loophead, phi) {
1013 get_node_info(phi)->ins[lheadin_c] = get_irn_n(phi, i);
1017 invheadnins[iheadin_c] = pred;
1018 for_each_phi(invhead, phi) {
1019 get_node_info(phi)->ins[iheadin_c] = get_irn_n(phi, i) ;
1025 /* assign the ins to the head blocks */
1026 set_irn_in(loophead, ARR_LEN(loopheadnins), loopheadnins);
1027 set_irn_in(invhead, ARR_LEN(invheadnins), invheadnins);
1029 /* assign the ins for the phis */
1030 for_each_phi(loophead, phi) {
1031 ir_node **ins = get_node_info(phi)->ins;
1032 set_irn_in(phi, lhead_arity, ins);
1035 for_each_phi(invhead, phi) {
1036 ir_node **ins = get_node_info(phi)->ins;
1037 set_irn_in(phi, ihead_arity, ins);
1041 static void inversion_walk(out_edge *head_entries)
1046 ir_node **entry_buffer;
1047 ir_node **head_phi_assign;
1049 NEW_ARR_A(ir_node *, entry_buffer, ARR_LEN(head_entries));
1051 head_phi_assign = NEW_ARR_F(ir_node *, 0);
1053 /* Find assignments in the condition chain, to construct_ssa for them after the loop inversion. */
1054 for_each_phi(loop_cf_head , phi) {
1055 for(i=0; i<get_irn_arity(phi); ++i) {
1056 ir_node *def = get_irn_n(phi, i);
1057 if (is_nodesblock_marked(def)) {
1058 ARR_APP1(ir_node *, head_phi_assign, def);
1063 ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED);
1066 * duplicate condition chain
1068 inc_irg_visited(current_ir_graph);
1070 for(i = 0; i < ARR_LEN(head_entries); ++i) {
1071 out_edge entry = head_entries[i];
1072 ir_node *node = entry.node;
1073 ir_node *pred = get_irn_n(entry.node, entry.pred_irn_n);
1075 if (is_Block(node)) {
1076 DB((dbg, LEVEL_5, "\nInit walk block %ld\n", get_irn_node_nr(pred)));
1077 copy_walk(pred, is_nodesblock_marked, cur_loop);
1078 duplicate_preds(node, entry.pred_irn_n, get_copy(pred) );
1080 DB((dbg, LEVEL_5, "\nInit walk node %ld\n", get_irn_node_nr(pred)));
1081 copy_walk(pred, is_nodesblock_marked, cur_loop);
1083 /* ignore keepalives */
1085 /* Node is user of a value assigned inside the loop.
1086 * We will need a phi since we duplicated the head. */
1087 entry_buffer[entry_c++] = pred;
1091 ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED);
1093 inversion_fix_heads();
1095 /* Generate phis for users of values assigned in the condition chain
1096 * and read in the loops body */
1097 construct_ssa_foreach(entry_buffer, entry_c);
1099 /* Generate phis for values that are assigned in the condition chain
1100 * but not read in the loops body. */
1101 construct_ssa_foreach(head_phi_assign, ARR_LEN(head_phi_assign));
1103 loop_cf_head = get_copy(loop_cf_head);
1107 void loop_peeling(void)
1109 cur_loop_outs = NEW_ARR_F(out_edge, 0);
1110 irg_walk_graph( current_ir_graph, get_loop_outs, NULL, NULL );
1112 peel(cur_loop_outs);
1117 set_irg_doms_inconsistent(current_ir_graph);
1118 set_irg_loopinfo_inconsistent(current_ir_graph);
1119 set_irg_outs_inconsistent(current_ir_graph);
1121 DEL_ARR_F(cur_loop_outs);
1124 /* Loop inversion */
1125 void loop_inversion(void)
1127 unsigned do_inversion = 1;
1129 inversion_head_node_limit = INT_MAX;
1131 /* Search for condition chains. */
1132 ir_reserve_resources(current_ir_graph, IR_RESOURCE_BLOCK_MARK);
1134 irg_walk_graph(current_ir_graph, reset_block_mark, NULL, NULL);
1136 loop_info.blocks = get_loop_n_blocks(cur_loop);
1137 cond_chain_entries = NEW_ARR_F(out_edge, 0);
1139 head_inversion_node_count = 0;
1140 head_inversion_block_count = 0;
1142 set_Block_mark(loop_cf_head, 1);
1143 mark_irn_visited(loop_cf_head);
1144 inc_irg_visited(current_ir_graph);
1146 find_condition_chains(loop_cf_head);
1148 DB((dbg, LEVEL_3, "Loop contains %d blocks.\n", loop_info.blocks));
1149 if (loop_info.blocks < 2) {
1151 DB((dbg, LEVEL_3, "Loop contains %d (less than 2) blocks => No Inversion done.\n", loop_info.blocks));
1154 /* We also catch endless loops here,
1155 * because they do not have a condition chain. */
1156 if (head_inversion_block_count < 1) {
1158 DB((dbg, LEVEL_3, "Loop contains %d (less than 1) invertible blocks => No Inversion done.\n", head_inversion_block_count));
1162 cur_head_outs = NEW_ARR_F(out_edge, 0);
1164 /* Get all edges pointing into the head or condition chain (outs). */
1165 irg_walk_graph(current_ir_graph, get_head_outs, NULL, NULL);
1166 inversion_walk(cur_head_outs);
1168 DEL_ARR_F(cur_head_outs);
1170 set_irg_doms_inconsistent(current_ir_graph);
1171 set_irg_loopinfo_inconsistent(current_ir_graph);
1172 set_irg_outs_inconsistent(current_ir_graph);
1176 DEL_ARR_F(cond_chain_entries);
1177 ir_free_resources(current_ir_graph, IR_RESOURCE_BLOCK_MARK);
1181 * Returns last element of linked list of copies by
1182 * walking the linked list.
1184 ir_node *get_last_copy(ir_node *node)
1186 ir_node *copy, *cur;
1188 while ((copy = get_copy(cur))) {
1195 * Rewire floating copies of the current loop.
1197 void unrolling_fix_cf(void)
1199 ir_node *loophead = loop_cf_head;
1200 int headarity = get_irn_arity(loophead);
1201 ir_node *phi, *headnode;
1202 /*ir_node *high, *low;*/
1206 int backedges_n = get_backedge_n(loophead, 0);
1207 int unroll_arity = backedges_n;
1209 /* Create ins for all heads and their phis */
1210 headnode = get_copy(loophead);
1211 for_each_copy(headnode) {
1212 NEW_ARR_A(ir_node *, get_node_info(headnode)->ins, unroll_arity);
1213 for_each_phi(headnode, phi) {
1214 NEW_ARR_A(ir_node *, get_node_info(phi)->ins, unroll_arity);
1218 /* Append the copies to the existing loop. */
1219 for (i = 0; i < headarity; i++) {
1220 ir_node *upper_head = loophead;
1221 ir_node *lower_head = get_copy(loophead);
1223 ir_node *upper_pred = get_irn_n(loophead, i);
1224 ir_node *lower_pred = get_copy(get_irn_n(loophead, i));
1229 * Build unrolled loop top down
1231 if (is_backedge(loophead, i) && !is_alien_edge(loophead, i)) {
1232 for_each_copy2(upper_head, lower_head, upper_pred, lower_pred) {
1233 get_node_info(lower_head)->ins[uhead_in_n] = upper_pred;
1235 for_each_phi(upper_head, phi) {
1236 ir_node *phi_copy = get_copy(phi);
1237 get_node_info(phi_copy)->ins[uhead_in_n] = get_irn_n(phi, i);
1241 last_pred = upper_pred;
1244 /* Fix the topmost loop heads backedges. */
1245 set_irn_n(loophead, i, last_pred);
1246 for_each_phi(loophead, phi) {
1247 ir_node *last_phi = get_last_copy(phi);
1248 ir_node *pred = get_irn_n(last_phi, i);
1249 set_irn_n(phi, i, pred);
1254 headnode = get_copy(loophead);
1255 for_each_copy(headnode) {
1256 set_irn_in(headnode, unroll_arity, get_node_info(headnode)->ins);
1257 for_each_phi(headnode, phi) {
1258 set_irn_in(phi, unroll_arity, get_node_info(phi)->ins);
1264 static ir_node *add_phi(ir_node *node, int phi_pos)
1269 mode = get_irn_mode(get_irn_n(node, phi_pos));
1270 ir_node *block = get_nodes_block(node);
1271 int n_cfgpreds = get_irn_arity(block);
1272 ir_node *pred = get_irn_n(node, phi_pos);
1275 /* create a new Phi */
1276 NEW_ARR_A(ir_node*, in, n_cfgpreds);
1277 for(i = 0; i < n_cfgpreds; ++i)
1278 in[i] = new_Unknown(mode); /*pred;*/
1280 phi = new_r_Phi(block, n_cfgpreds, in, mode);
1282 assert(phi && "phi null");
1283 assert(is_Bad(phi) && "phi bad");
1285 /* Important: always keep block phi list up to date. */
1286 add_Block_phi(block, phi);
1287 /* EVERY node is assumed to have a node_info linked. */
1288 alloc_node_info(phi, NULL);
1290 set_irn_n(node, phi_pos, phi);
1298 * Could be improved with variable range informations.
1300 void loop_unrolling(void)
1306 cur_loop_outs = NEW_ARR_F(out_edge, 0);
1307 irg_walk_graph( current_ir_graph, get_loop_outs, NULL, NULL );
1309 ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED);
1311 /* duplicate whole loop content */
1312 inc_irg_visited(current_ir_graph);
1314 for(i = 0; i < ARR_LEN(cur_loop_outs); i++) {
1315 out_edge entry = cur_loop_outs[i];
1316 ir_node *node = entry.node;
1317 ir_node *pred = get_irn_n(entry.node, entry.pred_irn_n);
1319 if (!is_Block(node)) {
1321 for (j = 0; j < unroll_times-1; ++j) {
1322 copy_walk(pred, is_in_loop, cur_loop);
1324 pred = get_copy(pred);
1329 for(i = 0; i < ARR_LEN(cur_loop_outs); i++) {
1330 out_edge entry = cur_loop_outs[i];
1331 ir_node *node = entry.node;
1332 ir_node *pred = get_irn_n(entry.node, entry.pred_irn_n);
1334 /* build linked list of copied nodes/blocks */
1335 if (is_Block(node)) {
1336 for (j = 0; j < unroll_times-1; ++j) {
1337 copy_walk(pred, is_in_loop, cur_loop);
1338 duplicate_preds(node, entry.pred_irn_n, get_copy(pred));
1340 pred = get_copy(pred);
1345 ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED);
1347 /* Line up the floating copies. */
1350 /* Generate phis for all loop outs */
1351 for(i = 0; i < ARR_LEN(cur_loop_outs); i++) {
1352 out_edge entry = cur_loop_outs[i];
1353 ir_node *node = entry.node;
1354 ir_node *pred = get_irn_n(entry.node, entry.pred_irn_n);
1356 if (!is_Block(node) && !is_End(node)) {
1357 DB((dbg, LEVEL_1, " construct_ssa_n def %ld node %ld pos %d\n",
1358 get_irn_node_nr(pred), get_irn_node_nr(node), entry.pred_irn_n));
1359 construct_ssa_n(pred, node);
1363 DEL_ARR_F(cur_loop_outs);
1365 set_irg_doms_inconsistent(current_ir_graph);
1366 set_irg_loopinfo_inconsistent(current_ir_graph);
1367 set_irg_outs_inconsistent(current_ir_graph);
1370 /* Initialization and */
1371 static void init_analyze(ir_loop *loop)
1373 /* Init new for every loop */
1376 loop_cf_head = NULL;
1377 loop_cf_head_valid = 1;
1378 loop_inv_head = NULL;
1379 loop_peeled_head = NULL;
1382 loop_info.calls = 0;
1383 loop_info.loads = 0;
1384 loop_info.blocks = 0;
1386 DB((dbg, LEVEL_2, " >>>> current loop includes node %ld <<<\n", get_irn_node_nr(get_loop_node(loop, 0))));
1388 irg_walk_graph(current_ir_graph, get_loop_info, NULL, NULL);
1390 /* RETURN if there is no valid head */
1391 if (!loop_cf_head || !loop_cf_head_valid) {
1392 DB((dbg, LEVEL_2, "No valid loop head. Nothing done.\n"));
1399 if (enable_inversion)
1401 if (enable_unrolling)
1405 /* RETURN if there is a call in the loop */
1406 if (loop_info.calls)
1410 DB((dbg, LEVEL_2, " <<<< end of loop with node %ld >>>>\n", get_irn_node_nr(get_loop_node(loop, 0))));
1413 /* Find most inner loops and send them to analyze_loop */
1414 static void find_most_inner_loop(ir_loop *loop)
1416 /* descend into sons */
1417 int sons = get_loop_n_sons(loop);
1423 el_n = get_loop_n_elements(loop);
1425 for (i=0; i < el_n; ++i) {
1426 elem = get_loop_element(loop, i);
1427 /* We can only rely on the blocks,
1428 * as the loop attribute of the nodes seems not to be set. */
1429 if (is_ir_node(elem.kind) && is_Block(elem.node)) {
1430 ARR_APP1(ir_node *, loops, elem.node);
1431 DB((dbg, LEVEL_5, "Found most inner loop (contains block %+F)\n", elem.node));
1437 for(s=0; s<sons; s++) {
1438 find_most_inner_loop(get_loop_son(loop, s));
1444 * Assure preconditions are met and go through all loops.
1446 void loop_optimization(ir_graph *irg)
1452 link_node_state_list = NULL;
1453 set_current_ir_graph(irg);
1457 assure_irg_outs(irg);
1459 /* NOTE: sets only the loop attribute of blocks, not nodes */
1460 /* NOTE: Kills links */
1461 assure_cf_loop(irg);
1463 ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1464 collect_phiprojs(irg);
1465 ir_free_resources(irg, IR_RESOURCE_IRN_LINK);
1467 /* allocate node_info for additional information on nodes */
1468 ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK);
1469 irg_walk_graph(current_ir_graph, alloc_node_info, NULL, NULL);
1471 loop = get_irg_loop(irg);
1472 sons = get_loop_n_sons(loop);
1474 loops = NEW_ARR_F(ir_node *, 0);
1476 for (nr = 0; nr < sons; ++nr) {
1477 find_most_inner_loop(get_loop_son(loop, nr));
1480 /* TODO Keep backedges during optimization to avoid
1481 * this ugly allocation and deallocation.
1482 * (set_irn_in seems to destroy them)
1485 for (i = 0; i < ARR_LEN(loops); ++i) {
1488 loop = get_irn_loop(loops[i]);
1492 /* This part is useful for testing
1493 * or has to be used if the backedge information is destroyed.
1494 * Which is the case at the moment, because the backedge information gets lost
1495 * before inversion_fix_heads/unrolling_fix_cf, which results in bads.
1496 * NOTE!: Testsuite runs successfully nevertheless...
1500 * assure_cf_loop() creates a completely new loop tree.
1501 * Thus we cannot optimize a loop, assure_cf_loop() and continue with the next loop,
1502 * as the next loop must be searched because it is not distinguishable from the
1503 * already done loops.
1504 * The links of the loops are also not available anymore (to store a "loop done" flag).
1505 * Therefore we save a block per loop.
1506 * NOTE: We rely on the loop optimizations not to remove any block from the loop.
1507 * Later, we fetch the blocks loop attribute, as it is updated by assure_cf_loop.
1509 for (i = 0; i < ARR_LEN(loops); ++i) {
1513 ir_free_resources(irg, IR_RESOURCE_IRN_LINK);
1515 edges_assure(current_ir_graph);
1516 assure_irg_outs(current_ir_graph);
1518 /* NOTE: sets only the loop attribute of blocks */
1519 /* NOTE: Kills links */
1520 assure_cf_loop(current_ir_graph);
1522 irg_walk_graph(current_ir_graph, alloc_node_info, NULL, NULL);
1523 ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK);
1525 /* Get loop from block */
1526 loop = get_irn_loop(loops[i]);
1535 ir_free_resources(irg, IR_RESOURCE_IRN_LINK);
1536 ir_free_resources(irg, IR_RESOURCE_PHI_LIST);
1539 void do_loop_unrolling(ir_graph *irg)
1541 enable_unrolling = 1;
1543 enable_inversion = 0;
1545 DB((dbg, LEVEL_2, " >>> unrolling (Startnode %ld) <<<\n",
1546 get_irn_node_nr(get_irg_start(irg))));
1548 loop_optimization(irg);
1550 DB((dbg, LEVEL_2, " >>> unrolling done (Startnode %ld) <<<\n",
1551 get_irn_node_nr(get_irg_start(irg))));
1554 void do_loop_inversion(ir_graph *irg)
1556 enable_unrolling = 0;
1558 enable_inversion = 1;
1560 DB((dbg, LEVEL_2, " >>> inversion (Startnode %ld) <<<\n",
1561 get_irn_node_nr(get_irg_start(irg))));
1563 loop_optimization(irg);
1565 DB((dbg, LEVEL_2, " >>> inversion done (Startnode %ld) <<<\n",
1566 get_irn_node_nr(get_irg_start(irg))));
1569 void do_loop_peeling(ir_graph *irg)
1571 enable_unrolling = 0;
1573 enable_inversion = 0;
1575 DB((dbg, LEVEL_2, " >>> peeling (Startnode %ld) <<<\n",
1576 get_irn_node_nr(get_irg_start(irg))));
1578 loop_optimization(irg);
1580 DB((dbg, LEVEL_2, " >>> peeling done (Startnode %ld) <<<\n",
1581 get_irn_node_nr(get_irg_start(irg))));
1585 ir_graph_pass_t *loop_inversion_pass(const char *name) {
1586 return def_graph_pass(name ? name : "loop_inversion", do_loop_inversion);
1589 ir_graph_pass_t *loop_unroll_pass(const char *name) {
1590 return def_graph_pass(name ? name : "loop_unroll", do_loop_unrolling);
1593 ir_graph_pass_t *loop_peeling_pass(const char *name) {
1594 return def_graph_pass(name ? name : "loop_peeling", do_loop_peeling);
1597 void firm_init_loop_opt(void)
1599 FIRM_DBG_REGISTER(dbg, "firm.opt.loop");