2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @author Christian Helmer
23 * @brief loop inversion and loop unrolling, loop peeling
39 #include "array_t.h" /* automatic array */
40 #include "beutil.h" /* get_block */
41 #include "irloop_t.h" /* set_irn_loop*/
49 DEBUG_ONLY(static firm_dbg_module_t *dbg);
52 * Convenience macro for iterating over every phi node of the given block.
53 * Requires phi list per block.
55 #define for_each_phi(block, phi) \
56 for ( (phi) = get_Block_phis( (block) ); (phi) ; (phi) = get_Phi_next( (phi) ) )
59 static ir_loop *cur_loop;
61 /* abortable walker function */
62 typedef unsigned irg_walk_func_abortable(ir_node *, void *);
64 /* condition for walking a node during a copy_walk */
65 typedef unsigned walker_condition(ir_node *);
67 /* node and position of a predecessor */
68 typedef struct out_edges {
73 /* access complex values through the nodes links */
74 typedef struct node_info {
77 ir_node *link; /* temporary links for ssa creation */
78 ir_node **ins; /* ins for phi nodes, during rewiring of blocks */
80 struct node_info *freelistnext; /* linked list to free all node_infos */
83 static node_info *link_node_state_list; /* head of the linked list to free all node_infos */
85 static out_edge *cur_loop_outs; /* A walker may start visiting the current loop with these nodes. */
86 static out_edge *cur_head_outs; /* A walker may start visiting the cur head with these nodes. */
88 static ir_node *loop_cf_head = NULL; /* Loop head node */
89 static unsigned loop_cf_head_valid = 1; /* A loop may have one head, otherwise we do not touch it. */
94 static ir_node *loop_inv_head = NULL;
96 static ir_node *loop_peeled_head = NULL;
98 /* Loop analysis informations */
99 typedef struct loop_info_t {
100 unsigned blocks; /* number of blocks in the loop */
101 unsigned calls; /* number of calls */
102 unsigned loads; /* number of load nodes */
103 unsigned outs; /* outs without keepalives */
105 unsigned invariant_loads;
106 unsigned stores; /* number of store nodes */
107 unsigned opnodes_n; /* nodes that probably result in an instruction */
108 unsigned do_invariant_opt;
112 /* Information about the current loop */
113 static loop_info_t loop_info;
115 /* A walker may start visiting a condition chain with these nodes. */
116 static out_edge *cond_chain_entries;
118 /* Number of unrolling */
121 static unsigned head_inversion_node_count;
122 static unsigned inversion_head_node_limit;
123 static unsigned head_inversion_block_count;
125 static unsigned enable_peeling;
126 static unsigned enable_inversion;
127 static unsigned enable_unrolling;
131 * ============= AUXILIARY FUNCTIONS =====================================
136 * Creates object on the heap, and adds it to a linked list to free it later.
138 static node_info *new_node_info(void)
140 node_info *l = XMALLOCZ(node_info);
141 l->freelistnext = link_node_state_list;
142 link_node_state_list = l;
148 static node_info *get_node_info(ir_node *n)
150 return ((node_info *)get_irn_link(n));
153 /* Allocates a node_info struct for the given node. For use with a walker. */
154 static void alloc_node_info(ir_node *node, void *env)
158 state = new_node_info();
159 set_irn_link(node, (void *)state);
162 static void free_node_info(void)
166 n = link_node_state_list;
168 node_info *next = n->freelistnext;
173 link_node_state_list = NULL;
177 * Use the linked list to reset the reused values of all node_infos
178 * Reset in particular the copy attribute as copy_walk uses it to determine a present copy
180 static void reset_node_infos(void)
183 next = link_node_state_list;
184 while(next->freelistnext) {
185 node_info *cur = next;
186 next = cur->freelistnext;
193 /* Returns the nodes node_info link. */
194 static ir_node *get_link(ir_node *n)
196 return ((node_info *)get_irn_link(n))->link;
199 /* Sets the nodes node_info link. */
200 static void set_link(ir_node *n, ir_node *link)
202 ((node_info *)get_irn_link(n))->link = link;
205 /* Returns a nodes copy. */
206 static ir_node *get_copy(ir_node *n)
208 return ((node_info *)get_irn_link(n))->copy;
211 /* Sets a nodes copy. */
212 static void set_copy(ir_node *n, ir_node *copy)
214 ((node_info *)get_irn_link(n) )->copy = copy;
218 * Convenience macro for iterating over every copy in a linked list
221 #define for_each_copy(node) \
222 for ( ; (node) ; (node) = get_copy(node))
225 * Convenience macro for iterating over every copy in 2 linked lists
226 * of copies in parallel.
228 #define for_each_copy2(high1, low1, high2, low2) \
229 for ( ; (low1) && (low2); (high1) = (low1), (low1) = get_copy(low1), \
230 (high2) = (low2), (low2) = get_copy(low2))
233 * Returns 0 if the node or block is not in cur_loop.
235 static unsigned is_in_loop(ir_node *node)
237 return (get_irn_loop(get_block(node)) == cur_loop);
240 /* Returns if the given be is an alien edge. This is the case when the pred is not in the loop. */
241 static unsigned is_alien_edge(ir_node *n, int i)
243 return(!is_in_loop(get_irn_n(n, i)));
246 /* used for block walker */
247 static void reset_block_mark(ir_node *node, void * env)
252 set_Block_mark(node, 0);
255 static unsigned is_nodesblock_marked(ir_node* node)
257 return (get_Block_mark(get_block(node)));
260 /* Returns the number of blocks in a loop. */
261 int get_loop_n_blocks(ir_loop *loop)
265 elements = get_loop_n_elements(loop);
267 for(e=0; e<elements; e++) {
268 loop_element elem = get_loop_element(loop, e);
269 if (is_ir_node(elem.kind) && is_Block(elem.node))
276 * Add newpred at position pos to node and also add the corresponding value to the phis.
277 * Requires block phi list.
279 static int duplicate_preds(ir_node* node, unsigned pos, ir_node* newpred)
287 assert(is_Block(node) && "duplicate_preds is only allowed for blocks");
289 DB((dbg, LEVEL_5, "duplicate_preds(node %ld, pos %d, newpred %ld)\n",
290 get_irn_node_nr(node), pos, get_irn_node_nr(newpred)));
292 block_arity = get_irn_arity(node);
294 NEW_ARR_A(ir_node*, ins, block_arity + 1);
296 for (i = 0; i < block_arity; ++i) {
297 ins[i] = get_irn_n(node, i);
299 ins[block_arity] = newpred;
301 set_irn_in(node, block_arity + 1, ins);
303 for_each_phi(node, phi) {
304 int phi_arity = get_irn_arity(phi);
305 DB((dbg, LEVEL_5, "duplicate_preds: fixing phi %ld\n", get_irn_node_nr(phi)));
307 NEW_ARR_A(ir_node *, ins, block_arity + 1);
308 for (i=0; i < phi_arity; ++i) {
309 DB((dbg, LEVEL_5, "in %ld\n", get_irn_node_nr(get_irn_n(phi, i))));
310 ins[i] = get_irn_n(phi, i);
312 ins[block_arity] = get_irn_n(phi, pos);
313 set_irn_in(phi, block_arity + 1, ins);
319 * Finds loop head and loop_info.
321 static void get_loop_info(ir_node *node, void *env)
323 unsigned node_in_loop, pred_in_loop;
327 arity = get_irn_arity(node);
328 for (i = 0; i < arity; i++) {
329 ir_node *pred = get_irn_n(node, i);
331 pred_in_loop = is_in_loop(pred);
332 node_in_loop = is_in_loop(node);
334 /* collect some loop information */
340 /* Find the loops head/the blocks with cfpred outside of the loop */
341 if (is_Block(node) && node_in_loop && !pred_in_loop && loop_cf_head_valid) {
342 ir_node *cfgpred = get_Block_cfgpred(node, i);
344 if (!is_in_loop(cfgpred)) {
345 DB((dbg, LEVEL_5, "potential head %+F because inloop and pred %+F not inloop\n", node, pred));
346 /* another head? We do not touch this. */
347 if (loop_cf_head && loop_cf_head != node) {
348 loop_cf_head_valid = 0;
357 /* Adds all nodes pointing into the loop to loop_entries and also finds the loops head */
358 static void get_loop_outs(ir_node *node, void *env)
360 unsigned node_in_loop, pred_in_loop;
364 arity = get_irn_arity(node);
365 for (i = 0; i < arity; i++) {
366 ir_node *pred = get_irn_n(node, i);
368 pred_in_loop = is_in_loop(pred);
369 node_in_loop = is_in_loop(node);
371 if (pred_in_loop && !node_in_loop) {
374 entry.pred_irn_n = i;
375 ARR_APP1(out_edge, cur_loop_outs, entry);
380 static ir_node *ssa_second_def;
381 static ir_node *ssa_second_def_block;
384 * Walks the graph bottom up, searching for definitions and creates phis.
386 static ir_node *search_def_and_create_phis(ir_node *block, ir_mode *mode)
394 DB((dbg, LEVEL_5, "ssa search_def_and_create_phis: block %ld\n", get_irn_node_nr(block)));
396 /* Prevents creation of phi that would be bad anyway.
397 * Dead and bad blocks. */
398 if (get_irn_arity(block) < 1 || is_Bad(block))
401 if (block == ssa_second_def_block) {
402 DB((dbg, LEVEL_5, "ssa found second definition: use second def %ld\n", get_irn_node_nr(ssa_second_def)));
403 return ssa_second_def;
406 /* already processed this block? */
407 if (irn_visited(block)) {
408 ir_node *value = get_link(block);
409 DB((dbg, LEVEL_5, "ssa already visited: use linked %ld\n", get_irn_node_nr(value)));
413 irg = get_irn_irg(block);
414 assert(block != get_irg_start_block(irg));
416 /* a Block with only 1 predecessor needs no Phi */
417 n_cfgpreds = get_Block_n_cfgpreds(block);
418 if (n_cfgpreds == 1) {
419 ir_node *pred_block = get_Block_cfgpred_block(block, 0);
422 DB((dbg, LEVEL_5, "ssa 1 pred: walk pred %ld\n", get_irn_node_nr(pred_block)));
424 value = search_def_and_create_phis(pred_block, mode);
425 set_link(block, value);
426 mark_irn_visited(block);
431 /* create a new Phi */
432 NEW_ARR_A(ir_node*, in, n_cfgpreds);
433 for(i = 0; i < n_cfgpreds; ++i)
434 in[i] = new_Unknown(mode);
436 phi = new_r_Phi(block, n_cfgpreds, in, mode);
438 /* Important: always keep block phi list up to date. */
439 add_Block_phi(block, phi);
440 /* EVERY node is assumed to have a node_info linked. */
441 alloc_node_info(phi, NULL);
443 DB((dbg, LEVEL_5, "ssa phi creation: link new phi %ld to block %ld\n", get_irn_node_nr(phi), get_irn_node_nr(block)));
445 set_link(block, phi);
446 mark_irn_visited(block);
448 /* set Phi predecessors */
449 for(i = 0; i < n_cfgpreds; ++i) {
451 ir_node *pred_block = get_Block_cfgpred_block(block, i);
452 assert(pred_block != NULL);
453 pred_val = search_def_and_create_phis(pred_block, mode);
454 assert(pred_val != NULL);
456 DB((dbg, LEVEL_5, "ssa phi pred:phi %ld, pred %ld\n", get_irn_node_nr(phi), get_irn_node_nr(pred_val)));
457 set_irn_n(phi, i, pred_val);
463 * Given a set of values this function constructs SSA-form for the users of the
464 * first value (the users are determined through the out-edges of the value).
465 * Works without using the dominance tree.
467 static void construct_ssa(ir_node *orig_block, ir_node *orig_val,
468 ir_node *second_block, ir_node *second_val)
472 const ir_edge_t *edge;
473 const ir_edge_t *next;
475 assert(orig_block && orig_val && second_block && second_val &&
476 "no parameter of construct_ssa may be NULL");
478 /* no need to do anything */
479 if (orig_val == second_val)
482 irg = get_irn_irg(orig_val);
484 ir_reserve_resources(irg, IR_RESOURCE_IRN_VISITED);
485 inc_irg_visited(irg);
487 mode = get_irn_mode(orig_val);
488 set_link(orig_block, orig_val);
489 mark_irn_visited(orig_block);
491 ssa_second_def_block = second_block;
492 ssa_second_def = second_val;
494 /* Only fix the users of the first, i.e. the original node */
495 foreach_out_edge_safe(orig_val, edge, next) {
496 ir_node *user = get_edge_src_irn(edge);
497 int j = get_edge_src_pos(edge);
498 ir_node *user_block = get_nodes_block(user);
505 DB((dbg, LEVEL_5, "original user %ld\n", get_irn_node_nr(user)));
508 ir_node *pred_block = get_Block_cfgpred_block(user_block, j);
509 newval = search_def_and_create_phis(pred_block, mode);
511 newval = search_def_and_create_phis(user_block, mode);
514 /* If we get a bad node the user keeps the original in. No second definition needed. */
515 if (newval != user && !is_Bad(newval))
516 set_irn_n(user, j, newval);
519 ir_free_resources(irg, IR_RESOURCE_IRN_VISITED);
523 * Construct SSA for def and all of its copies.
525 static void construct_ssa_n(ir_node *def, ir_node *user)
530 const ir_edge_t *edge;
531 const ir_edge_t *next;
532 irg = get_irn_irg(def);
534 ir_reserve_resources(irg, IR_RESOURCE_IRN_VISITED);
535 inc_irg_visited(irg);
537 mode = get_irn_mode(def);
539 for_each_copy(iter) {
540 set_link(get_nodes_block(iter), iter);
541 mark_irn_visited(get_nodes_block(iter));
543 DB((dbg, LEVEL_5, "ssa_n: Link def %ld to block %ld\n",
544 get_irn_node_nr(iter), get_irn_node_nr(get_nodes_block(iter))));
547 /* Need to search the outs, because we need the in-pos on the user node. */
548 foreach_out_edge_safe(def, edge, next) {
549 ir_node *edge_user = get_edge_src_irn(edge);
550 int edge_src = get_edge_src_pos(edge);
551 ir_node *user_block = get_nodes_block(user);
554 if (edge_user != user)
558 ir_node *pred_block = get_Block_cfgpred_block(user_block, edge_src);
559 newval = search_def_and_create_phis(pred_block, mode);
561 newval = search_def_and_create_phis(user_block, mode);
564 if (newval != user && !is_Bad(newval))
565 set_irn_n(user, edge_src, newval);
568 ir_free_resources(irg, IR_RESOURCE_IRN_VISITED);
572 * Construct SSA for all definitions in arr.
574 void construct_ssa_foreach(ir_node **arr, int arr_n)
577 for(i = 0; i < arr_n; ++i) {
578 ir_node *cppred, *block, *cpblock, *pred;
581 cppred = get_copy(pred);
582 block = get_nodes_block(pred);
583 cpblock = get_nodes_block(cppred);
584 construct_ssa(block, pred, cpblock, cppred);
588 /* get the number of backedges without alien bes */
589 static int get_backedge_n(ir_node *loophead, unsigned with_alien)
593 int arity = get_irn_arity(loophead);
594 for (i = 0; i < arity; ++i) {
595 ir_node *pred = get_irn_n(loophead, i);
596 if (is_backedge(loophead, i) && (with_alien || is_in_loop(pred)))
603 * Rewires the heads after peeling.
605 static void peel_fix_heads(void)
607 ir_node **loopheadnins, **peelheadnins;
608 ir_node *loophead = loop_cf_head;
609 ir_node *peelhead = get_copy(loophead);
611 int headarity = get_irn_arity(loophead);
618 int backedges_n = get_backedge_n(loophead, 0);
620 int lhead_arity = 2 * backedges_n;
621 int phead_arity = headarity - backedges_n;
624 NEW_ARR_A(ir_node *, loopheadnins, lhead_arity );
625 NEW_ARR_A(ir_node *, peelheadnins, phead_arity );
627 for_each_phi(loophead, phi) {
628 NEW_ARR_A(ir_node *, get_node_info(phi)->ins, lhead_arity);
630 for_each_phi(peelhead, phi) {
631 NEW_ARR_A(ir_node *, get_node_info(phi)->ins, phead_arity);
634 for (i = 0; i < headarity; i++)
636 ir_node *orgjmp = get_irn_n(loophead, i);
637 ir_node *copyjmp = get_copy(orgjmp);
640 * Rewire the head blocks ins and their phi ins.
641 * Requires phi list per block.
643 if (is_backedge(loophead, i) && !is_alien_edge(loophead, i)) {
644 loopheadnins[lheadin_c] = orgjmp;
645 for_each_phi(loophead, phi) {
646 get_node_info( phi )->ins[lheadin_c] = get_irn_n( phi, i) ;
650 /* former bes of the peeled code origin now from the loophead */
651 loopheadnins[lheadin_c] = copyjmp;
653 /* get_irn_n( get_copy_of(phi), i ) <!=> get_copy_of( get_irn_n( phi, i) )
654 * Order is crucial! Predecessors outside of the loop are non existent.
655 * The copy (cloned with its ins!) has pred i,
656 * but phis pred i might not have a copy of itself.
658 for_each_phi(loophead, phi) {
659 get_node_info( phi )->ins[lheadin_c] = get_irn_n( get_copy(phi), i) ;
663 peelheadnins[pheadin_c] = orgjmp;
664 for_each_phi(peelhead, phi) {
665 get_node_info( phi )->ins[pheadin_c] = get_irn_n(phi, i);
671 assert(pheadin_c == ARR_LEN(peelheadnins) &&
672 lheadin_c == ARR_LEN(loopheadnins) &&
673 "the constructed head arities do not match the predefined arities");
675 /* assign the ins to the nodes */
676 set_irn_in(loophead, ARR_LEN(loopheadnins), loopheadnins);
677 set_irn_in(peelhead, ARR_LEN(peelheadnins), peelheadnins);
679 for_each_phi(loophead, phi) {
680 ir_node **ins = get_node_info( phi )->ins;
681 set_irn_in(phi, lhead_arity, ins);
684 for_each_phi(peelhead, phi) {
685 ir_node **ins = get_node_info( phi )->ins;
686 set_irn_in(phi, phead_arity, ins);
691 * Create a raw copy (ins are still the old ones) of the given node.
692 * We rely on copies to be NOT visited.
694 static ir_node *rawcopy_node(ir_node *node)
700 cp = exact_copy(node);
702 arity = get_irn_arity(node);
704 for (i = 0; i < arity; ++i) {
705 if (is_backedge(node, i))
710 cpstate = new_node_info();
711 set_irn_link(cp, cpstate);
715 * exact_copy already sets Macroblock.
716 * Why do we need to do this anyway? */
717 set_Block_MacroBlock(cp, cp);
723 * This walker copies all walked nodes.
724 * If the walk_condition is true for a node, it is walked.
725 * All nodes node_info->copy attributes has to be NULL prior to every to every walk.
727 static void copy_walk(ir_node *node, walker_condition *walk_condition, ir_loop *set_loop)
733 ir_graph *irg = current_ir_graph;
734 node_info *node_info = get_node_info(node);
737 * break condition and cycle resolver, creating temporary node copies
739 if (get_irn_visited(node) >= get_irg_visited(irg)) {
740 /* Here we rely on nodestate's copy being initialized with NULL */
741 DB((dbg, LEVEL_5, "copy_walk: We have already visited %ld\n", get_irn_node_nr(node)));
742 if (node_info->copy == NULL) {
743 cp = rawcopy_node(node);
744 DB((dbg, LEVEL_5, "The TEMP copy of %ld is created %ld\n", get_irn_node_nr(node), get_irn_node_nr(cp)));
750 mark_irn_visited(node);
752 if (!is_Block(node)) {
753 ir_node *pred = get_nodes_block(node);
754 if (walk_condition(pred))
755 DB((dbg, LEVEL_5, "walk block %ld\n", get_irn_node_nr(pred)));
756 copy_walk(pred, walk_condition, set_loop);
759 arity = get_irn_arity(node);
761 NEW_ARR_A(ir_node *, cpin, arity);
763 for (i = get_irn_arity(node) - 1; i >= 0; --i) {
764 ir_node *pred = get_irn_n(node, i);
766 if (walk_condition(pred)) {
767 DB((dbg, LEVEL_5, "walk node %ld\n", get_irn_node_nr(pred)));
768 copy_walk(pred, walk_condition, set_loop);
769 cpin[i] = get_copy(pred);
770 DB((dbg, LEVEL_5, "copy of %ld gets new in %ld which is copy of %ld\n",
771 get_irn_node_nr(node), get_irn_node_nr(get_copy(pred)), get_irn_node_nr(pred)));
777 /* copy node / finalize temp node */
778 if (node_info->copy == NULL) {
779 /* No temporary copy existent */
780 cp = rawcopy_node(node);
781 DB((dbg, LEVEL_5, "The FINAL copy of %ld is CREATED %ld\n", get_irn_node_nr(node), get_irn_node_nr(cp)));
783 /* temporary copy is existent but without correct ins */
785 DB((dbg, LEVEL_5, "The FINAL copy of %ld is EXISTENT %ld\n", get_irn_node_nr(node), get_irn_node_nr(cp)));
788 if (!is_Block(node)) {
789 ir_node *cpblock = get_copy(get_nodes_block(node));
791 set_nodes_block(cp, cpblock );
792 /* fix the phi information in attr.phis */
794 add_Block_phi(cpblock, cp);
797 set_irn_loop(cp, set_loop);
798 set_irn_in(cp, ARR_LEN(cpin), cpin);
802 * Loop peeling, and fix the cf for the loop entry nodes, which have now more preds
804 static void peel(out_edge *loop_outs)
807 ir_node **entry_buffer;
810 ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED);
812 NEW_ARR_A(ir_node *, entry_buffer, ARR_LEN(loop_outs));
814 /* duplicate loop walk */
815 inc_irg_visited(current_ir_graph);
817 for(i = 0; i < ARR_LEN(loop_outs); i++) {
818 out_edge entry = loop_outs[i];
819 ir_node *node = entry.node;
820 ir_node *pred = get_irn_n(entry.node, entry.pred_irn_n);
822 if (is_Block(node)) {
823 copy_walk(pred, is_in_loop, NULL);
824 duplicate_preds(node, entry.pred_irn_n, get_copy(pred) );
826 copy_walk(pred, is_in_loop, NULL);
827 /* leave out keepalives */
829 /* Node is user of a value defined inside the loop.
830 * We'll need a phi since we duplicated the loop. */
831 /* Cannot construct_ssa here, because it needs another walker. */
832 entry_buffer[entry_c++] = pred;
836 ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED);
838 /* Rewires the 2 heads */
841 /* Generate phis for values from peeled code and original loop */
842 construct_ssa_foreach(entry_buffer, entry_c);
843 /*for(i = 0; i < entry_c; i++)
845 ir_node *cppred, *block, *cpblock, *pred;
847 pred = entry_buffer[i];
848 cppred = get_copy(pred);
849 block = get_nodes_block(pred);
850 cpblock = get_nodes_block(cppred);
851 construct_ssa(block, pred, cpblock, cppred);
856 * Populates head_entries with (node, pred_pos) tuple
857 * whereas the node's pred at pred_pos is in the head but not the node itself.
858 * Head and condition chain blocks must be marked.
860 static void get_head_outs(ir_node *node, void *env)
863 int arity = get_irn_arity(node);
866 DB((dbg, LEVEL_5, "get head entries %ld \n", get_irn_node_nr(node)));
868 for(i = 0; i < arity; ++i) {
869 /* node is not in the head, but the predecessor is.
870 * (head or loop chain nodes are marked) */
872 DB((dbg, LEVEL_5, "... "));
873 DB((dbg, LEVEL_5, "node %ld marked %d (0) pred %d marked %d (1) \n",
874 node->node_nr, is_nodesblock_marked(node),i, is_nodesblock_marked(get_irn_n(node, i))));
876 if (!is_nodesblock_marked(node) && is_nodesblock_marked(get_irn_n(node, i))) {
879 entry.pred_irn_n = i;
881 "Found head chain entry %ld @%d because !inloop %ld and inloop %ld\n",
882 get_irn_node_nr(node), i, get_irn_node_nr(node), get_irn_node_nr(get_irn_n(node, i))));
883 ARR_APP1(out_edge, cur_head_outs, entry);
889 * Find condition chains, and add them to be inverted, until the node count exceeds the limit.
890 * A block belongs to the chain if a condition branches out of the loop.
891 * Returns 1 if the given block belongs to the condition chain.
893 static unsigned find_condition_chains(ir_node *block)
895 const ir_edge_t *edge;
899 DB((dbg, LEVEL_5, "condition_chains for block %ld\n", get_irn_node_nr(block)));
901 /* Collect all outs, including keeps.
902 * (TODO firm function for number of out edges?) */
903 foreach_out_edge_kind(block, edge, EDGE_KIND_NORMAL) {
907 /* We do not want to collect more nodes from condition chains, than the limit allows us to.
908 * Also, leave at least one block as body. */
909 if (head_inversion_node_count + nodes_n > inversion_head_node_limit
910 || head_inversion_block_count + 1 == loop_info.blocks) {
911 set_Block_mark(block, 0);
916 /* First: check our successors, and add all succs that are outside of the loop to the list */
917 foreach_block_succ(block, edge) {
918 ir_node *src = get_edge_src_irn( edge );
919 int pos = get_edge_src_pos( edge );
921 if (!is_in_loop(src)) {
926 entry.pred_irn_n = pos;
927 ARR_APP1(out_edge, cond_chain_entries, entry);
928 mark_irn_visited(src);
933 /* this block is not part of the chain,
934 * because the chain would become too long or we have no successor outside of the loop */
936 set_Block_mark(block, 0);
939 set_Block_mark(block, 1);
940 ++head_inversion_block_count;
941 DB((dbg, LEVEL_5, "block %ld is part of condition chain\n", get_irn_node_nr(block)));
942 head_inversion_node_count += nodes_n;
945 /* Second: walk all successors, and add them to the list if they are not part of the chain */
946 foreach_block_succ(block, edge) {
948 ir_node *src = get_edge_src_irn( edge );
949 int pos = get_edge_src_pos( edge );
951 /* already done cases */
952 if (!is_in_loop( src ) || (get_irn_visited(src) >= get_irg_visited(current_ir_graph))) {
956 mark_irn_visited(src);
957 DB((dbg, LEVEL_5, "condition chain walk %ld\n", get_irn_node_nr(src)));
958 inchain = find_condition_chains(src);
960 /* if successor is not part of chain we need to collect its outs */
964 entry.pred_irn_n = pos;
965 ARR_APP1(out_edge, cond_chain_entries, entry);
972 * Rewire the loop head and inverted head for loop inversion.
974 static void inversion_fix_heads(void)
976 ir_node **loopheadnins, **invheadnins;
977 ir_node *loophead = loop_cf_head;
978 ir_node *invhead = get_copy(loophead);
980 int headarity = get_irn_arity(loophead);
987 int backedges_n = get_backedge_n(loophead, 0);
988 int lhead_arity = backedges_n;
989 int ihead_arity = headarity - backedges_n;
991 assert(lhead_arity != 0 && "Loophead has arity 0. Probably wrong backedge informations.");
992 assert(ihead_arity != 0 && "Inversionhead has arity 0. Probably wrong backedge informations.");
994 /* new in arrays for all phis in the head blocks */
995 NEW_ARR_A(ir_node *, loopheadnins, lhead_arity);
996 NEW_ARR_A(ir_node *, invheadnins, ihead_arity);
998 for_each_phi(loophead, phi) {
999 NEW_ARR_A(ir_node *, get_node_info(phi)->ins, lhead_arity);
1001 for_each_phi(invhead, phi) {
1002 NEW_ARR_A(ir_node *, get_node_info(phi)->ins, ihead_arity);
1005 for (i = 0; i < headarity; i++) {
1006 ir_node *pred = get_irn_n(loophead, i);
1009 * Rewire the head blocks ins and their phi ins.
1010 * Requires phi list per block.
1012 if (is_backedge(loophead, i) && !is_alien_edge(loophead, i)) {
1013 /* just copy these edges */
1014 loopheadnins[lheadin_c] = pred;
1015 for_each_phi(loophead, phi) {
1016 get_node_info(phi)->ins[lheadin_c] = get_irn_n(phi, i);
1020 invheadnins[iheadin_c] = pred;
1021 for_each_phi(invhead, phi) {
1022 get_node_info(phi)->ins[iheadin_c] = get_irn_n(phi, i) ;
1028 /* assign the ins to the head blocks */
1029 set_irn_in(loophead, ARR_LEN(loopheadnins), loopheadnins);
1030 set_irn_in(invhead, ARR_LEN(invheadnins), invheadnins);
1032 /* assign the ins for the phis */
1033 for_each_phi(loophead, phi) {
1034 ir_node **ins = get_node_info(phi)->ins;
1035 set_irn_in(phi, lhead_arity, ins);
1038 for_each_phi(invhead, phi) {
1039 ir_node **ins = get_node_info(phi)->ins;
1040 set_irn_in(phi, ihead_arity, ins);
1044 static void inversion_walk(out_edge *head_entries)
1049 ir_node **entry_buffer;
1050 ir_node **head_phi_assign;
1052 NEW_ARR_A(ir_node *, entry_buffer, ARR_LEN(head_entries));
1054 head_phi_assign = NEW_ARR_F(ir_node *, 0);
1056 /* Find assignments in the condition chain, to construct_ssa for them after the loop inversion. */
1057 for_each_phi(loop_cf_head , phi) {
1058 for(i=0; i<get_irn_arity(phi); ++i) {
1059 ir_node *def = get_irn_n(phi, i);
1060 if (is_nodesblock_marked(def)) {
1061 ARR_APP1(ir_node *, head_phi_assign, def);
1066 ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED);
1069 * duplicate condition chain
1071 inc_irg_visited(current_ir_graph);
1073 for(i = 0; i < ARR_LEN(head_entries); ++i) {
1074 out_edge entry = head_entries[i];
1075 ir_node *node = entry.node;
1076 ir_node *pred = get_irn_n(entry.node, entry.pred_irn_n);
1078 if (is_Block(node)) {
1079 DB((dbg, LEVEL_5, "\nInit walk block %ld\n", get_irn_node_nr(pred)));
1080 copy_walk(pred, is_nodesblock_marked, cur_loop);
1081 duplicate_preds(node, entry.pred_irn_n, get_copy(pred) );
1083 DB((dbg, LEVEL_5, "\nInit walk node %ld\n", get_irn_node_nr(pred)));
1084 copy_walk(pred, is_nodesblock_marked, cur_loop);
1086 /* ignore keepalives */
1088 /* Node is user of a value assigned inside the loop.
1089 * We will need a phi since we duplicated the head. */
1090 entry_buffer[entry_c++] = pred;
1094 ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED);
1096 inversion_fix_heads();
1098 /* Generate phis for users of values assigned in the condition chain
1099 * and read in the loops body */
1100 construct_ssa_foreach(entry_buffer, entry_c);
1102 /* Generate phis for values that are assigned in the condition chain
1103 * but not read in the loops body. */
1104 construct_ssa_foreach(head_phi_assign, ARR_LEN(head_phi_assign));
1106 loop_cf_head = get_copy(loop_cf_head);
1110 void loop_peeling(void)
1112 cur_loop_outs = NEW_ARR_F(out_edge, 0);
1113 irg_walk_graph( current_ir_graph, get_loop_outs, NULL, NULL );
1115 peel(cur_loop_outs);
1120 set_irg_doms_inconsistent(current_ir_graph);
1121 set_irg_loopinfo_inconsistent(current_ir_graph);
1122 set_irg_outs_inconsistent(current_ir_graph);
1124 DEL_ARR_F(cur_loop_outs);
1127 /* Loop inversion */
1128 void loop_inversion(void)
1130 unsigned do_inversion = 1;
1132 inversion_head_node_limit = INT_MAX;
1134 /* Search for condition chains. */
1135 ir_reserve_resources(current_ir_graph, IR_RESOURCE_BLOCK_MARK);
1137 irg_walk_graph(current_ir_graph, reset_block_mark, NULL, NULL);
1139 loop_info.blocks = get_loop_n_blocks(cur_loop);
1140 cond_chain_entries = NEW_ARR_F(out_edge, 0);
1142 head_inversion_node_count = 0;
1143 head_inversion_block_count = 0;
1145 set_Block_mark(loop_cf_head, 1);
1146 mark_irn_visited(loop_cf_head);
1147 inc_irg_visited(current_ir_graph);
1149 find_condition_chains(loop_cf_head);
1151 DB((dbg, LEVEL_3, "Loop contains %d blocks.\n", loop_info.blocks));
1152 if (loop_info.blocks < 2) {
1154 DB((dbg, LEVEL_3, "Loop contains %d (less than 2) blocks => No Inversion done.\n", loop_info.blocks));
1157 /* We also catch endless loops here,
1158 * because they do not have a condition chain. */
1159 if (head_inversion_block_count < 1) {
1161 DB((dbg, LEVEL_3, "Loop contains %d (less than 1) invertible blocks => No Inversion done.\n", head_inversion_block_count));
1165 cur_head_outs = NEW_ARR_F(out_edge, 0);
1167 /* Get all edges pointing into the head or condition chain (outs). */
1168 irg_walk_graph(current_ir_graph, get_head_outs, NULL, NULL);
1169 inversion_walk(cur_head_outs);
1171 DEL_ARR_F(cur_head_outs);
1173 set_irg_doms_inconsistent(current_ir_graph);
1174 set_irg_loopinfo_inconsistent(current_ir_graph);
1175 set_irg_outs_inconsistent(current_ir_graph);
1179 DEL_ARR_F(cond_chain_entries);
1180 ir_free_resources(current_ir_graph, IR_RESOURCE_BLOCK_MARK);
1184 * Returns last element of linked list of copies by
1185 * walking the linked list.
1187 ir_node *get_last_copy(ir_node *node)
1189 ir_node *copy, *cur;
1191 while ((copy = get_copy(cur))) {
1198 * Rewire floating copies of the current loop.
1200 void unrolling_fix_cf(void)
1202 ir_node *loophead = loop_cf_head;
1203 int headarity = get_irn_arity(loophead);
1204 ir_node *phi, *headnode;
1205 /*ir_node *high, *low;*/
1209 int backedges_n = get_backedge_n(loophead, 0);
1210 int unroll_arity = backedges_n;
1212 /* Create ins for all heads and their phis */
1213 headnode = get_copy(loophead);
1214 for_each_copy(headnode) {
1215 NEW_ARR_A(ir_node *, get_node_info(headnode)->ins, unroll_arity);
1216 for_each_phi(headnode, phi) {
1217 NEW_ARR_A(ir_node *, get_node_info(phi)->ins, unroll_arity);
1221 /* Append the copies to the existing loop. */
1222 for (i = 0; i < headarity; i++) {
1223 ir_node *upper_head = loophead;
1224 ir_node *lower_head = get_copy(loophead);
1226 ir_node *upper_pred = get_irn_n(loophead, i);
1227 ir_node *lower_pred = get_copy(get_irn_n(loophead, i));
1232 * Build unrolled loop top down
1234 if (is_backedge(loophead, i) && !is_alien_edge(loophead, i)) {
1235 for_each_copy2(upper_head, lower_head, upper_pred, lower_pred) {
1236 get_node_info(lower_head)->ins[uhead_in_n] = upper_pred;
1238 for_each_phi(upper_head, phi) {
1239 ir_node *phi_copy = get_copy(phi);
1240 get_node_info(phi_copy)->ins[uhead_in_n] = get_irn_n(phi, i);
1244 last_pred = upper_pred;
1247 /* Fix the topmost loop heads backedges. */
1248 set_irn_n(loophead, i, last_pred);
1249 for_each_phi(loophead, phi) {
1250 ir_node *last_phi = get_last_copy(phi);
1251 ir_node *pred = get_irn_n(last_phi, i);
1252 set_irn_n(phi, i, pred);
1257 headnode = get_copy(loophead);
1258 for_each_copy(headnode) {
1259 set_irn_in(headnode, unroll_arity, get_node_info(headnode)->ins);
1260 for_each_phi(headnode, phi) {
1261 set_irn_in(phi, unroll_arity, get_node_info(phi)->ins);
1267 static ir_node *add_phi(ir_node *node, int phi_pos)
1272 mode = get_irn_mode(get_irn_n(node, phi_pos));
1273 ir_node *block = get_nodes_block(node);
1274 int n_cfgpreds = get_irn_arity(block);
1275 ir_node *pred = get_irn_n(node, phi_pos);
1278 /* create a new Phi */
1279 NEW_ARR_A(ir_node*, in, n_cfgpreds);
1280 for(i = 0; i < n_cfgpreds; ++i)
1281 in[i] = new_Unknown(mode); /*pred;*/
1283 phi = new_r_Phi(block, n_cfgpreds, in, mode);
1285 assert(phi && "phi null");
1286 assert(is_Bad(phi) && "phi bad");
1288 /* Important: always keep block phi list up to date. */
1289 add_Block_phi(block, phi);
1290 /* EVERY node is assumed to have a node_info linked. */
1291 alloc_node_info(phi, NULL);
1293 set_irn_n(node, phi_pos, phi);
1301 * Could be improved with variable range informations.
1303 void loop_unrolling(void)
1309 cur_loop_outs = NEW_ARR_F(out_edge, 0);
1310 irg_walk_graph( current_ir_graph, get_loop_outs, NULL, NULL );
1312 ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED);
1314 /* duplicate whole loop content */
1315 inc_irg_visited(current_ir_graph);
1317 for(i = 0; i < ARR_LEN(cur_loop_outs); i++) {
1318 out_edge entry = cur_loop_outs[i];
1319 ir_node *node = entry.node;
1320 ir_node *pred = get_irn_n(entry.node, entry.pred_irn_n);
1322 if (!is_Block(node)) {
1324 for (j = 0; j < unroll_times-1; ++j) {
1325 copy_walk(pred, is_in_loop, cur_loop);
1327 pred = get_copy(pred);
1332 for(i = 0; i < ARR_LEN(cur_loop_outs); i++) {
1333 out_edge entry = cur_loop_outs[i];
1334 ir_node *node = entry.node;
1335 ir_node *pred = get_irn_n(entry.node, entry.pred_irn_n);
1337 /* build linked list of copied nodes/blocks */
1338 if (is_Block(node)) {
1339 for (j = 0; j < unroll_times-1; ++j) {
1340 copy_walk(pred, is_in_loop, cur_loop);
1341 duplicate_preds(node, entry.pred_irn_n, get_copy(pred));
1343 pred = get_copy(pred);
1348 ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED);
1350 /* Line up the floating copies. */
1353 /* Generate phis for all loop outs */
1354 for(i = 0; i < ARR_LEN(cur_loop_outs); i++) {
1355 out_edge entry = cur_loop_outs[i];
1356 ir_node *node = entry.node;
1357 ir_node *pred = get_irn_n(entry.node, entry.pred_irn_n);
1359 if (!is_Block(node) && !is_End(node)) {
1360 DB((dbg, LEVEL_1, " construct_ssa_n def %ld node %ld pos %d\n",
1361 get_irn_node_nr(pred), get_irn_node_nr(node), entry.pred_irn_n));
1362 construct_ssa_n(pred, node);
1366 DEL_ARR_F(cur_loop_outs);
1368 set_irg_doms_inconsistent(current_ir_graph);
1369 set_irg_loopinfo_inconsistent(current_ir_graph);
1370 set_irg_outs_inconsistent(current_ir_graph);
1373 /* Initialization and */
1374 static void init_analyze(ir_loop *loop)
1376 /* Init new for every loop */
1379 loop_cf_head = NULL;
1380 loop_cf_head_valid = 1;
1381 loop_inv_head = NULL;
1382 loop_peeled_head = NULL;
1385 loop_info.calls = 0;
1386 loop_info.loads = 0;
1387 loop_info.blocks = 0;
1389 DB((dbg, LEVEL_2, " >>>> current loop includes node %ld <<<\n", get_irn_node_nr(get_loop_node(loop, 0))));
1391 irg_walk_graph(current_ir_graph, get_loop_info, NULL, NULL);
1393 /* RETURN if there is no valid head */
1394 if (!loop_cf_head || !loop_cf_head_valid) {
1395 DB((dbg, LEVEL_2, "No valid loop head. Nothing done.\n"));
1402 if (enable_inversion)
1404 if (enable_unrolling)
1408 /* RETURN if there is a call in the loop */
1409 if (loop_info.calls)
1413 DB((dbg, LEVEL_2, " <<<< end of loop with node %ld >>>>\n", get_irn_node_nr(get_loop_node(loop, 0))));
1416 /* Find most inner loops and send them to analyze_loop */
1417 static void find_most_inner_loop(ir_loop *loop)
1419 /* descend into sons */
1420 int sons = get_loop_n_sons(loop);
1426 el_n = get_loop_n_elements(loop);
1428 for (i=0; i < el_n; ++i) {
1429 elem = get_loop_element(loop, i);
1430 /* We can only rely on the blocks,
1431 * as the loop attribute of the nodes seems not to be set. */
1432 if (is_ir_node(elem.kind) && is_Block(elem.node)) {
1433 ARR_APP1(ir_node *, loops, elem.node);
1434 DB((dbg, LEVEL_5, "Found most inner loop (contains block %+F)\n", elem.node));
1440 for(s=0; s<sons; s++) {
1441 find_most_inner_loop(get_loop_son(loop, s));
1447 * Assure preconditions are met and go through all loops.
1449 void loop_optimization(ir_graph *irg)
1455 link_node_state_list = NULL;
1456 set_current_ir_graph(irg);
1460 assure_irg_outs(irg);
1462 /* NOTE: sets only the loop attribute of blocks, not nodes */
1463 /* NOTE: Kills links */
1464 assure_cf_loop(irg);
1466 ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1467 collect_phiprojs(irg);
1468 ir_free_resources(irg, IR_RESOURCE_IRN_LINK);
1470 /* allocate node_info for additional information on nodes */
1471 ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK);
1472 irg_walk_graph(current_ir_graph, alloc_node_info, NULL, NULL);
1474 loop = get_irg_loop(irg);
1475 sons = get_loop_n_sons(loop);
1477 loops = NEW_ARR_F(ir_node *, 0);
1479 for (nr = 0; nr < sons; ++nr) {
1480 find_most_inner_loop(get_loop_son(loop, nr));
1483 /* TODO Keep backedges during optimization to avoid
1484 * this ugly allocation and deallocation.
1485 * (set_irn_in seems to destroy them)
1488 for (i = 0; i < ARR_LEN(loops); ++i) {
1491 loop = get_irn_loop(loops[i]);
1495 /* This part is useful for testing
1496 * or has to be used if the backedge information is destroyed.
1497 * Which is the case at the moment, because the backedge information gets lost
1498 * before inversion_fix_heads/unrolling_fix_cf, which results in bads.
1499 * NOTE!: Testsuite runs successfully nevertheless...
1503 * assure_cf_loop() creates a completely new loop tree.
1504 * Thus we cannot optimize a loop, assure_cf_loop() and continue with the next loop,
1505 * as the next loop must be searched because it is not distinguishable from the
1506 * already done loops.
1507 * The links of the loops are also not available anymore (to store a "loop done" flag).
1508 * Therefore we save a block per loop.
1509 * NOTE: We rely on the loop optimizations not to remove any block from the loop.
1510 * Later, we fetch the blocks loop attribute, as it is updated by assure_cf_loop.
1512 for (i = 0; i < ARR_LEN(loops); ++i) {
1516 ir_free_resources(irg, IR_RESOURCE_IRN_LINK);
1518 edges_assure(current_ir_graph);
1519 assure_irg_outs(current_ir_graph);
1521 /* NOTE: sets only the loop attribute of blocks */
1522 /* NOTE: Kills links */
1523 assure_cf_loop(current_ir_graph);
1525 irg_walk_graph(current_ir_graph, alloc_node_info, NULL, NULL);
1526 ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK);
1528 /* Get loop from block */
1529 loop = get_irn_loop(loops[i]);
1538 ir_free_resources(irg, IR_RESOURCE_IRN_LINK);
1539 ir_free_resources(irg, IR_RESOURCE_PHI_LIST);
1542 void do_loop_unrolling(ir_graph *irg)
1544 enable_unrolling = 1;
1546 enable_inversion = 0;
1548 DB((dbg, LEVEL_2, " >>> unrolling (Startnode %ld) <<<\n",
1549 get_irn_node_nr(get_irg_start(irg))));
1551 loop_optimization(irg);
1553 DB((dbg, LEVEL_2, " >>> unrolling done (Startnode %ld) <<<\n",
1554 get_irn_node_nr(get_irg_start(irg))));
1557 void do_loop_inversion(ir_graph *irg)
1559 enable_unrolling = 0;
1561 enable_inversion = 1;
1563 DB((dbg, LEVEL_2, " >>> inversion (Startnode %ld) <<<\n",
1564 get_irn_node_nr(get_irg_start(irg))));
1566 loop_optimization(irg);
1568 DB((dbg, LEVEL_2, " >>> inversion done (Startnode %ld) <<<\n",
1569 get_irn_node_nr(get_irg_start(irg))));
1572 void do_loop_peeling(ir_graph *irg)
1574 enable_unrolling = 0;
1576 enable_inversion = 0;
1578 DB((dbg, LEVEL_2, " >>> peeling (Startnode %ld) <<<\n",
1579 get_irn_node_nr(get_irg_start(irg))));
1581 loop_optimization(irg);
1583 DB((dbg, LEVEL_2, " >>> peeling done (Startnode %ld) <<<\n",
1584 get_irn_node_nr(get_irg_start(irg))));
1588 ir_graph_pass_t *loop_inversion_pass(const char *name)
1590 return def_graph_pass(name ? name : "loop_inversion", do_loop_inversion);
1593 ir_graph_pass_t *loop_unroll_pass(const char *name)
1595 return def_graph_pass(name ? name : "loop_unroll", do_loop_unrolling);
1598 ir_graph_pass_t *loop_peeling_pass(const char *name)
1600 return def_graph_pass(name ? name : "loop_peeling", do_loop_peeling);
1603 void firm_init_loop_opt(void)
1605 FIRM_DBG_REGISTER(dbg, "firm.opt.loop");