2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @author Christian Helmer
23 * @brief loop inversion and loop unrolling, loop peeling
29 #include "iroptimize.h"
46 DEBUG_ONLY(static firm_dbg_module_t *dbg);
49 * Convenience macro for iterating over every phi node of the given block.
50 * Requires phi list per block.
52 #define for_each_phi(block, phi) \
53 for ( (phi) = get_Block_phis( (block) ); (phi) ; (phi) = get_Phi_next( (phi) ) )
56 static ir_loop *cur_loop;
58 /* abortable walker function */
59 typedef unsigned irg_walk_func_abortable(ir_node *, void *);
61 /* condition for walking a node during a copy_walk */
62 typedef unsigned walker_condition(ir_node *);
64 /* node and position of a predecessor */
65 typedef struct out_edges {
70 /* access complex values through the nodes links */
71 typedef struct node_info {
74 ir_node *link; /* temporary links for ssa creation */
75 ir_node **ins; /* ins for phi nodes, during rewiring of blocks */
77 struct node_info *freelistnext; /* linked list to free all node_infos */
80 static node_info *link_node_state_list; /* head of the linked list to free all node_infos */
82 static out_edge *cur_loop_outs; /* A walker may start visiting the current loop with these nodes. */
83 static out_edge *cur_head_outs; /* A walker may start visiting the cur head with these nodes. */
85 static ir_node *loop_cf_head = NULL; /* Loop head node */
86 static unsigned loop_cf_head_valid = 1; /* A loop may have one head, otherwise we do not touch it. */
91 static ir_node *loop_inv_head = NULL;
93 static ir_node *loop_peeled_head = NULL;
95 /* Loop analysis informations */
96 typedef struct loop_info_t {
97 unsigned blocks; /* number of blocks in the loop */
98 unsigned calls; /* number of calls */
99 unsigned loads; /* number of load nodes */
100 unsigned outs; /* outs without keepalives */
102 unsigned invariant_loads;
103 unsigned stores; /* number of store nodes */
104 unsigned opnodes_n; /* nodes that probably result in an instruction */
105 unsigned do_invariant_opt;
109 /* Information about the current loop */
110 static loop_info_t loop_info;
112 /* A walker may start visiting a condition chain with these nodes. */
113 static out_edge *cond_chain_entries;
115 /* Number of unrolling */
118 static unsigned head_inversion_node_count;
119 static unsigned inversion_head_node_limit;
120 static unsigned head_inversion_block_count;
122 static unsigned enable_peeling;
123 static unsigned enable_inversion;
124 static unsigned enable_unrolling;
128 * ============= AUXILIARY FUNCTIONS =====================================
133 * Creates object on the heap, and adds it to a linked list to free it later.
135 static node_info *new_node_info(void)
137 node_info *l = XMALLOCZ(node_info);
138 l->freelistnext = link_node_state_list;
139 link_node_state_list = l;
145 static node_info *get_node_info(ir_node *n)
147 return ((node_info *)get_irn_link(n));
150 /* Allocates a node_info struct for the given node. For use with a walker. */
151 static void alloc_node_info(ir_node *node, void *env)
155 state = new_node_info();
156 set_irn_link(node, (void *)state);
159 static void free_node_info(void)
163 n = link_node_state_list;
165 node_info *next = n->freelistnext;
170 link_node_state_list = NULL;
174 * Use the linked list to reset the reused values of all node_infos
175 * Reset in particular the copy attribute as copy_walk uses it to determine a present copy
177 static void reset_node_infos(void)
180 next = link_node_state_list;
181 while (next->freelistnext) {
182 node_info *cur = next;
183 next = cur->freelistnext;
190 /* Returns the nodes node_info link. */
191 static ir_node *get_link(ir_node *n)
193 return ((node_info *)get_irn_link(n))->link;
196 /* Sets the nodes node_info link. */
197 static void set_link(ir_node *n, ir_node *link)
199 ((node_info *)get_irn_link(n))->link = link;
202 /* Returns a nodes copy. */
203 static ir_node *get_copy(ir_node *n)
205 return ((node_info *)get_irn_link(n))->copy;
208 /* Sets a nodes copy. */
209 static void set_copy(ir_node *n, ir_node *copy)
211 ((node_info *)get_irn_link(n) )->copy = copy;
215 * Convenience macro for iterating over every copy in a linked list
218 #define for_each_copy(node) \
219 for ( ; (node) ; (node) = get_copy(node))
222 * Convenience macro for iterating over every copy in 2 linked lists
223 * of copies in parallel.
225 #define for_each_copy2(high1, low1, high2, low2) \
226 for ( ; (low1) && (low2); (high1) = (low1), (low1) = get_copy(low1), \
227 (high2) = (low2), (low2) = get_copy(low2))
230 * Returns 0 if the node or block is not in cur_loop.
232 static unsigned is_in_loop(ir_node *node)
234 return (get_irn_loop(get_block(node)) == cur_loop);
237 /* Returns if the given be is an alien edge. This is the case when the pred is not in the loop. */
238 static unsigned is_alien_edge(ir_node *n, int i)
240 return(!is_in_loop(get_irn_n(n, i)));
243 /* used for block walker */
244 static void reset_block_mark(ir_node *node, void * env)
249 set_Block_mark(node, 0);
252 static unsigned is_nodesblock_marked(ir_node* node)
254 return (get_Block_mark(get_block(node)));
257 /* Returns the number of blocks in a loop. */
258 static int get_loop_n_blocks(ir_loop *loop)
262 elements = get_loop_n_elements(loop);
264 for (e=0; e<elements; e++) {
265 loop_element elem = get_loop_element(loop, e);
266 if (is_ir_node(elem.kind) && is_Block(elem.node))
273 * Add newpred at position pos to node and also add the corresponding value to the phis.
274 * Requires block phi list.
276 static int duplicate_preds(ir_node* block, unsigned pos, ir_node* newpred)
284 assert(is_Block(block) && "duplicate_preds may be called for blocks only");
286 DB((dbg, LEVEL_5, "duplicate_preds(node %N, pos %d, newpred %N)\n", block, pos, newpred));
288 block_arity = get_irn_arity(block);
290 NEW_ARR_A(ir_node*, ins, block_arity + 1);
292 for (i = 0; i < block_arity; ++i) {
293 ins[i] = get_irn_n(block, i);
295 ins[block_arity] = newpred;
297 set_irn_in(block, block_arity + 1, ins);
301 for_each_phi(block, phi) {
302 int phi_arity = get_irn_arity(phi);
303 DB((dbg, LEVEL_5, "duplicate_preds: fixing phi %N\n", phi));
305 NEW_ARR_A(ir_node *, ins, block_arity + 1);
306 for (i = 0; i < phi_arity; ++i) {
307 DB((dbg, LEVEL_5, "pos %N\n", get_irn_n(phi, i)));
308 ins[i] = get_irn_n(phi, i);
310 ins[block_arity] = get_copy(get_irn_n(phi, pos));
311 set_irn_in(phi, block_arity + 1, ins);
318 * Finds loop head and loop_info.
320 static void get_loop_info(ir_node *node, void *env)
322 unsigned node_in_loop, pred_in_loop;
326 arity = get_irn_arity(node);
327 for (i = 0; i < arity; i++) {
328 ir_node *pred = get_irn_n(node, i);
330 pred_in_loop = is_in_loop(pred);
331 node_in_loop = is_in_loop(node);
333 /* collect some loop information */
339 /* Find the loops head/the blocks with cfpred outside of the loop */
340 if (is_Block(node) && node_in_loop && !pred_in_loop && loop_cf_head_valid) {
341 ir_node *cfgpred = get_Block_cfgpred(node, i);
343 if (!is_in_loop(cfgpred)) {
344 DB((dbg, LEVEL_5, "potential head %+F because inloop and pred %+F not inloop\n", node, pred));
345 /* another head? We do not touch this. */
346 if (loop_cf_head && loop_cf_head != node) {
347 loop_cf_head_valid = 0;
356 /* Adds all nodes pointing into the loop to loop_entries and also finds the loops head */
357 static void get_loop_outs(ir_node *node, void *env)
359 unsigned node_in_loop, pred_in_loop;
363 arity = get_irn_arity(node);
364 for (i = 0; i < arity; ++i) {
365 ir_node *pred = get_irn_n(node, i);
367 pred_in_loop = is_in_loop(pred);
368 node_in_loop = is_in_loop(node);
370 if (pred_in_loop && !node_in_loop) {
373 entry.pred_irn_n = i;
374 ARR_APP1(out_edge, cur_loop_outs, entry);
379 static ir_node *ssa_second_def;
380 static ir_node *ssa_second_def_block;
383 * Walks the graph bottom up, searching for definitions and creates phis.
385 static ir_node *search_def_and_create_phis(ir_node *block, ir_mode *mode)
393 DB((dbg, LEVEL_5, "ssa search_def_and_create_phis: block %N\n", block));
395 /* Prevents creation of phi that would be bad anyway.
396 * Dead and bad blocks. */
397 if (get_irn_arity(block) < 1 || is_Bad(block))
400 if (block == ssa_second_def_block) {
401 DB((dbg, LEVEL_5, "ssa found second definition: use second def %N\n", ssa_second_def));
402 return ssa_second_def;
405 /* already processed this block? */
406 if (irn_visited(block)) {
407 ir_node *value = get_link(block);
408 DB((dbg, LEVEL_5, "ssa already visited: use linked %N\n", value));
412 irg = get_irn_irg(block);
413 assert(block != get_irg_start_block(irg));
415 /* a Block with only 1 predecessor needs no Phi */
416 n_cfgpreds = get_Block_n_cfgpreds(block);
417 if (n_cfgpreds == 1) {
418 ir_node *pred_block = get_Block_cfgpred_block(block, 0);
421 DB((dbg, LEVEL_5, "ssa 1 pred: walk pred %N\n", pred_block));
423 value = search_def_and_create_phis(pred_block, mode);
424 set_link(block, value);
425 mark_irn_visited(block);
430 /* create a new Phi */
431 NEW_ARR_A(ir_node*, in, n_cfgpreds);
432 for (i = 0; i < n_cfgpreds; ++i)
433 in[i] = new_Unknown(mode);
435 phi = new_r_Phi(block, n_cfgpreds, in, mode);
437 /* Important: always keep block phi list up to date. */
438 add_Block_phi(block, phi);
439 /* EVERY node is assumed to have a node_info linked. */
440 alloc_node_info(phi, NULL);
442 DB((dbg, LEVEL_5, "ssa phi creation: link new phi %N to block %N\n", phi, block));
444 set_link(block, phi);
445 mark_irn_visited(block);
447 /* set Phi predecessors */
448 for (i = 0; i < n_cfgpreds; ++i) {
450 ir_node *pred_block = get_Block_cfgpred_block(block, i);
451 assert(pred_block != NULL);
452 pred_val = search_def_and_create_phis(pred_block, mode);
453 assert(pred_val != NULL);
455 DB((dbg, LEVEL_5, "ssa phi pred:phi %N, pred %N\n", phi, pred_val));
456 set_irn_n(phi, i, pred_val);
462 * Given a set of values this function constructs SSA-form for the users of the
463 * first value (the users are determined through the out-edges of the value).
464 * Works without using the dominance tree.
466 static void construct_ssa(ir_node *orig_block, ir_node *orig_val,
467 ir_node *second_block, ir_node *second_val)
471 const ir_edge_t *edge;
472 const ir_edge_t *next;
474 assert(orig_block && orig_val && second_block && second_val &&
475 "no parameter of construct_ssa may be NULL");
477 /* no need to do anything */
478 if (orig_val == second_val)
481 irg = get_irn_irg(orig_val);
483 ir_reserve_resources(irg, IR_RESOURCE_IRN_VISITED);
484 inc_irg_visited(irg);
486 mode = get_irn_mode(orig_val);
487 set_link(orig_block, orig_val);
488 mark_irn_visited(orig_block);
490 ssa_second_def_block = second_block;
491 ssa_second_def = second_val;
493 /* Only fix the users of the first, i.e. the original node */
494 foreach_out_edge_safe(orig_val, edge, next) {
495 ir_node *user = get_edge_src_irn(edge);
496 int j = get_edge_src_pos(edge);
497 ir_node *user_block = get_nodes_block(user);
504 DB((dbg, LEVEL_5, "original user %N\n", user));
507 ir_node *pred_block = get_Block_cfgpred_block(user_block, j);
508 newval = search_def_and_create_phis(pred_block, mode);
510 newval = search_def_and_create_phis(user_block, mode);
513 /* If we get a bad node the user keeps the original in. No second definition needed. */
514 if (newval != user && !is_Bad(newval))
515 set_irn_n(user, j, newval);
518 ir_free_resources(irg, IR_RESOURCE_IRN_VISITED);
522 * Construct SSA for def and all of its copies.
524 static void construct_ssa_n(ir_node *def, ir_node *user)
529 const ir_edge_t *edge;
530 const ir_edge_t *next;
531 irg = get_irn_irg(def);
533 ir_reserve_resources(irg, IR_RESOURCE_IRN_VISITED);
534 inc_irg_visited(irg);
536 mode = get_irn_mode(def);
538 for_each_copy(iter) {
539 set_link(get_nodes_block(iter), iter);
540 mark_irn_visited(get_nodes_block(iter));
542 DB((dbg, LEVEL_5, "ssa_n: Link def %N to block %N\n",
543 iter, get_nodes_block(iter)));
546 /* Need to search the outs, because we need the in-pos on the user node. */
547 foreach_out_edge_safe(def, edge, next) {
548 ir_node *edge_user = get_edge_src_irn(edge);
549 int edge_src = get_edge_src_pos(edge);
550 ir_node *user_block = get_nodes_block(user);
553 if (edge_user != user)
557 ir_node *pred_block = get_Block_cfgpred_block(user_block, edge_src);
558 newval = search_def_and_create_phis(pred_block, mode);
560 newval = search_def_and_create_phis(user_block, mode);
563 if (newval != user && !is_Bad(newval))
564 set_irn_n(user, edge_src, newval);
567 ir_free_resources(irg, IR_RESOURCE_IRN_VISITED);
571 * Construct SSA for all definitions in arr.
573 static void construct_ssa_foreach(ir_node **arr, int arr_n)
576 for (i = 0; i < arr_n ; ++i) {
577 ir_node *cppred, *block, *cpblock, *pred;
580 cppred = get_copy(pred);
581 block = get_nodes_block(pred);
582 cpblock = get_nodes_block(cppred);
583 construct_ssa(block, pred, cpblock, cppred);
587 /* get the number of backedges without alien bes */
588 static int get_backedge_n(ir_node *loophead, unsigned with_alien)
592 int arity = get_irn_arity(loophead);
593 for (i = 0; i < arity; ++i) {
594 ir_node *pred = get_irn_n(loophead, i);
595 if (is_backedge(loophead, i) && (with_alien || is_in_loop(pred)))
602 * Rewires the heads after peeling.
604 static void peel_fix_heads(void)
606 ir_node **loopheadnins, **peelheadnins;
607 ir_node *loophead = loop_cf_head;
608 ir_node *peelhead = get_copy(loophead);
610 int headarity = get_irn_arity(loophead);
617 int backedges_n = get_backedge_n(loophead, 0);
619 int lhead_arity = 2 * backedges_n;
620 int phead_arity = headarity - backedges_n;
623 NEW_ARR_A(ir_node *, loopheadnins, lhead_arity );
624 NEW_ARR_A(ir_node *, peelheadnins, phead_arity );
626 for_each_phi(loophead, phi) {
627 NEW_ARR_A(ir_node *, get_node_info(phi)->ins, lhead_arity);
629 for_each_phi(peelhead, phi) {
630 NEW_ARR_A(ir_node *, get_node_info(phi)->ins, phead_arity);
633 for (i = 0; i < headarity; i++)
635 ir_node *orgjmp = get_irn_n(loophead, i);
636 ir_node *copyjmp = get_copy(orgjmp);
639 * Rewire the head blocks ins and their phi ins.
640 * Requires phi list per block.
642 if (is_backedge(loophead, i) && !is_alien_edge(loophead, i)) {
643 loopheadnins[lheadin_c] = orgjmp;
644 for_each_phi(loophead, phi) {
645 get_node_info( phi )->ins[lheadin_c] = get_irn_n( phi, i) ;
649 /* former bes of the peeled code origin now from the loophead */
650 loopheadnins[lheadin_c] = copyjmp;
652 /* get_irn_n( get_copy_of(phi), i ) <!=> get_copy_of( get_irn_n( phi, i) )
653 * Order is crucial! Predecessors outside of the loop are non existent.
654 * The copy (cloned with its ins!) has pred i,
655 * but phis pred i might not have a copy of itself.
657 for_each_phi(loophead, phi) {
658 get_node_info( phi )->ins[lheadin_c] = get_irn_n( get_copy(phi), i) ;
662 peelheadnins[pheadin_c] = orgjmp;
663 for_each_phi(peelhead, phi) {
664 get_node_info( phi )->ins[pheadin_c] = get_irn_n(phi, i);
670 assert(pheadin_c == ARR_LEN(peelheadnins) &&
671 lheadin_c == ARR_LEN(loopheadnins) &&
672 "the constructed head arities do not match the predefined arities");
674 /* assign the ins to the nodes */
675 set_irn_in(loophead, ARR_LEN(loopheadnins), loopheadnins);
676 set_irn_in(peelhead, ARR_LEN(peelheadnins), peelheadnins);
678 for_each_phi(loophead, phi) {
679 ir_node **ins = get_node_info( phi )->ins;
680 set_irn_in(phi, lhead_arity, ins);
683 for_each_phi(peelhead, phi) {
684 ir_node **ins = get_node_info( phi )->ins;
685 set_irn_in(phi, phead_arity, ins);
690 * Create a raw copy (ins are still the old ones) of the given node.
691 * We rely on copies to be NOT visited.
693 static ir_node *rawcopy_node(ir_node *node)
699 cp = exact_copy(node);
701 arity = get_irn_arity(node);
703 for (i = 0; i < arity; ++i) {
704 if (is_backedge(node, i))
709 cpstate = new_node_info();
710 set_irn_link(cp, cpstate);
715 * exact_copy already sets Macroblock.
716 * Why should we do this anyway? */
717 set_Block_MacroBlock(cp, cp);
724 * This walker copies all walked nodes.
725 * If the walk_condition is true for a node, it is walked.
726 * All nodes node_info->copy attributes have to be NULL prior to every walk.
728 static void copy_walk(ir_node *node, walker_condition *walk_condition, ir_loop *set_loop)
734 ir_graph *irg = current_ir_graph;
735 node_info *node_info = get_node_info(node);
738 * break condition and cycle resolver, creating temporary node copies
740 if (get_irn_visited(node) >= get_irg_visited(irg)) {
741 /* Here we rely on nodestate's copy being initialized with NULL */
742 DB((dbg, LEVEL_5, "copy_walk: We have already visited %N\n", node));
743 if (node_info->copy == NULL) {
744 cp = rawcopy_node(node);
745 DB((dbg, LEVEL_5, "The TEMP copy of %N is created %N\n", node, cp));
751 mark_irn_visited(node);
753 if (!is_Block(node)) {
754 ir_node *pred = get_nodes_block(node);
755 if (walk_condition(pred))
756 DB((dbg, LEVEL_5, "walk block %N\n", pred));
757 copy_walk(pred, walk_condition, set_loop);
760 arity = get_irn_arity(node);
762 NEW_ARR_A(ir_node *, cpin, arity);
764 for (i = 0; i < arity; ++i) {
765 ir_node *pred = get_irn_n(node, i);
767 if (walk_condition(pred)) {
768 DB((dbg, LEVEL_5, "walk node %N\n", pred));
769 copy_walk(pred, walk_condition, set_loop);
770 cpin[i] = get_copy(pred);
771 DB((dbg, LEVEL_5, "copy of %N gets new in %N which is copy of %N\n",
772 node, get_copy(pred), pred));
778 /* copy node / finalize temp node */
779 if (node_info->copy == NULL) {
780 /* No temporary copy existent */
781 cp = rawcopy_node(node);
782 DB((dbg, LEVEL_5, "The FINAL copy of %N is CREATED %N\n", node, cp));
784 /* temporary copy is existent but without correct ins */
786 DB((dbg, LEVEL_5, "The FINAL copy of %N is EXISTENT %N\n", node, cp));
789 if (!is_Block(node)) {
790 ir_node *cpblock = get_copy(get_nodes_block(node));
792 set_nodes_block(cp, cpblock );
794 add_Block_phi(cpblock, cp);
797 set_irn_loop(cp, set_loop);
798 set_irn_in(cp, ARR_LEN(cpin), cpin);
802 * Loop peeling, and fix the cf for the loop entry nodes, which have now more preds
804 static void peel(out_edge *loop_outs)
807 ir_node **entry_buffer;
810 ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED);
812 NEW_ARR_A(ir_node *, entry_buffer, ARR_LEN(loop_outs));
814 /* duplicate loop walk */
815 inc_irg_visited(current_ir_graph);
817 for (i = 0; i < ARR_LEN(loop_outs); i++) {
818 out_edge entry = loop_outs[i];
819 ir_node *node = entry.node;
820 ir_node *pred = get_irn_n(entry.node, entry.pred_irn_n);
822 if (is_Block(node)) {
823 copy_walk(pred, is_in_loop, NULL);
824 duplicate_preds(node, entry.pred_irn_n, get_copy(pred) );
826 copy_walk(pred, is_in_loop, NULL);
827 /* leave out keepalives */
829 /* Node is user of a value defined inside the loop.
830 * We'll need a phi since we duplicated the loop. */
831 /* Cannot construct_ssa here, because it needs another walker. */
832 entry_buffer[entry_c] = pred;
838 ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED);
840 /* Rewires the 2 heads */
843 /* Generate phis for values from peeled code and original loop */
844 construct_ssa_foreach(entry_buffer, entry_c);
845 /*for (i = 0; i < entry_c; i++)
847 ir_node *cppred, *block, *cpblock, *pred;
849 pred = entry_buffer[i];
850 cppred = get_copy(pred);
851 block = get_nodes_block(pred);
852 cpblock = get_nodes_block(cppred);
853 construct_ssa(block, pred, cpblock, cppred);
858 * Populates head_entries with (node, pred_pos) tuple
859 * whereas the node's pred at pred_pos is in the head but not the node itself.
860 * Head and condition chain blocks must be marked.
862 static void get_head_outs(ir_node *node, void *env)
865 int arity = get_irn_arity(node);
868 DB((dbg, LEVEL_5, "get head entries %N \n", node));
870 for (i = 0; i < arity; ++i) {
871 /* node is not in the head, but the predecessor is.
872 * (head or loop chain nodes are marked) */
874 DB((dbg, LEVEL_5, "... "));
875 DB((dbg, LEVEL_5, "node %N marked %d (0) pred %d marked %d (1) \n",
876 node->node_nr, is_nodesblock_marked(node),i, is_nodesblock_marked(get_irn_n(node, i))));
878 if (!is_nodesblock_marked(node) && is_nodesblock_marked(get_irn_n(node, i))) {
881 entry.pred_irn_n = i;
883 "Found head chain entry %N @%d because !inloop %N and inloop %N\n",
884 node, i, node, get_irn_n(node, i)));
885 ARR_APP1(out_edge, cur_head_outs, entry);
891 * Find condition chains, and add them to be inverted, until the node count exceeds the limit.
892 * A block belongs to the chain if a condition branches out of the loop.
893 * Returns 1 if the given block belongs to the condition chain.
895 static unsigned find_condition_chains(ir_node *block)
897 const ir_edge_t *edge;
901 DB((dbg, LEVEL_5, "condition_chains for block %N\n", block));
903 /* Collect all outs, including keeps.
904 * (TODO firm function for number of out edges?) */
905 foreach_out_edge_kind(block, edge, EDGE_KIND_NORMAL) {
909 /* We do not want to collect more nodes from condition chains, than the limit allows us to.
910 * Also, leave at least one block as body. */
911 if (head_inversion_node_count + nodes_n > inversion_head_node_limit
912 || head_inversion_block_count + 1 == loop_info.blocks) {
913 set_Block_mark(block, 0);
918 /* First: check our successors, and add all succs that are outside of the loop to the list */
919 foreach_block_succ(block, edge) {
920 ir_node *src = get_edge_src_irn( edge );
921 int pos = get_edge_src_pos( edge );
923 if (!is_in_loop(src)) {
928 entry.pred_irn_n = pos;
929 ARR_APP1(out_edge, cond_chain_entries, entry);
930 mark_irn_visited(src);
935 /* this block is not part of the chain,
936 * because the chain would become too long or we have no successor outside of the loop */
938 set_Block_mark(block, 0);
941 set_Block_mark(block, 1);
942 ++head_inversion_block_count;
943 DB((dbg, LEVEL_5, "block %N is part of condition chain\n", block));
944 head_inversion_node_count += nodes_n;
947 /* Second: walk all successors, and add them to the list if they are not part of the chain */
948 foreach_block_succ(block, edge) {
950 ir_node *src = get_edge_src_irn( edge );
951 int pos = get_edge_src_pos( edge );
953 /* already done cases */
954 if (!is_in_loop( src ) || (get_irn_visited(src) >= get_irg_visited(current_ir_graph))) {
958 mark_irn_visited(src);
959 DB((dbg, LEVEL_5, "condition chain walk %N\n", src));
960 inchain = find_condition_chains(src);
962 /* if successor is not part of chain we need to collect its outs */
966 entry.pred_irn_n = pos;
967 ARR_APP1(out_edge, cond_chain_entries, entry);
974 * Rewire the loop head and inverted head for loop inversion.
976 static void inversion_fix_heads(void)
978 ir_node **loopheadnins, **invheadnins;
979 ir_node *loophead = loop_cf_head;
980 ir_node *invhead = get_copy(loophead);
982 int headarity = get_irn_arity(loophead);
989 int backedges_n = get_backedge_n(loophead, 0);
990 int lhead_arity = backedges_n;
991 int ihead_arity = headarity - backedges_n;
993 assert(lhead_arity != 0 && "Loophead has arity 0. Probably wrong backedge informations.");
994 assert(ihead_arity != 0 && "Inversionhead has arity 0. Probably wrong backedge informations.");
996 /* new in arrays for all phis in the head blocks */
997 NEW_ARR_A(ir_node *, loopheadnins, lhead_arity);
998 NEW_ARR_A(ir_node *, invheadnins, ihead_arity);
1000 for_each_phi(loophead, phi) {
1001 NEW_ARR_A(ir_node *, get_node_info(phi)->ins, lhead_arity);
1003 for_each_phi(invhead, phi) {
1004 NEW_ARR_A(ir_node *, get_node_info(phi)->ins, ihead_arity);
1007 for (i = 0; i < headarity; i++) {
1008 ir_node *pred = get_irn_n(loophead, i);
1011 * Rewire the head blocks ins and their phi ins.
1012 * Requires phi list per block.
1014 if (is_backedge(loophead, i) && !is_alien_edge(loophead, i)) {
1015 /* just copy these edges */
1016 loopheadnins[lheadin_c] = pred;
1017 for_each_phi(loophead, phi) {
1018 get_node_info(phi)->ins[lheadin_c] = get_irn_n(phi, i);
1022 invheadnins[iheadin_c] = pred;
1023 for_each_phi(invhead, phi) {
1024 get_node_info(phi)->ins[iheadin_c] = get_irn_n(phi, i) ;
1030 /* assign the ins to the head blocks */
1031 set_irn_in(loophead, ARR_LEN(loopheadnins), loopheadnins);
1032 set_irn_in(invhead, ARR_LEN(invheadnins), invheadnins);
1034 /* assign the ins for the phis */
1035 for_each_phi(loophead, phi) {
1036 ir_node **ins = get_node_info(phi)->ins;
1037 set_irn_in(phi, lhead_arity, ins);
1040 for_each_phi(invhead, phi) {
1041 ir_node **ins = get_node_info(phi)->ins;
1042 set_irn_in(phi, ihead_arity, ins);
1046 static void inversion_walk(out_edge *head_entries)
1051 ir_node **entry_buffer;
1052 ir_node **head_phi_assign;
1054 NEW_ARR_A(ir_node *, entry_buffer, ARR_LEN(head_entries));
1056 head_phi_assign = NEW_ARR_F(ir_node *, 0);
1058 /* Find assignments in the condition chain,
1059 * to construct_ssa for them after the loop inversion. */
1060 for_each_phi(loop_cf_head , phi) {
1061 int arity = get_irn_arity(phi);
1062 for (i = 0; i < arity; ++i) {
1063 ir_node *def = get_irn_n(phi, i);
1064 if (is_nodesblock_marked(def)) {
1065 ARR_APP1(ir_node *, head_phi_assign, def);
1070 ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED);
1073 * duplicate condition chain
1075 inc_irg_visited(current_ir_graph);
1077 for (i = 0; i < ARR_LEN(head_entries); ++i) {
1078 out_edge entry = head_entries[i];
1079 ir_node *node = entry.node;
1080 ir_node *pred = get_irn_n(entry.node, entry.pred_irn_n);
1082 if (is_Block(node)) {
1083 DB((dbg, LEVEL_5, "\nInit walk block %N\n", pred));
1085 copy_walk(pred, is_nodesblock_marked, cur_loop);
1086 duplicate_preds(node, entry.pred_irn_n, get_copy(pred) );
1088 DB((dbg, LEVEL_5, "\nInit walk node %N\n", pred));
1090 copy_walk(pred, is_nodesblock_marked, cur_loop);
1092 /* ignore keepalives */
1093 if (!is_End(node)) {
1094 /* Node is user of a value assigned inside the loop.
1095 * We will need a phi since we duplicated the head. */
1096 entry_buffer[entry_c] = pred;
1102 ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED);
1104 inversion_fix_heads();
1106 /* Generate phis for users of values assigned in the condition chain
1107 * and read in the loops body */
1108 construct_ssa_foreach(entry_buffer, entry_c);
1110 /* Generate phis for values that are assigned in the condition chain
1111 * but not read in the loops body. */
1112 construct_ssa_foreach(head_phi_assign, ARR_LEN(head_phi_assign));
1114 loop_cf_head = get_copy(loop_cf_head);
1118 static void loop_peeling(void)
1120 cur_loop_outs = NEW_ARR_F(out_edge, 0);
1121 irg_walk_graph( current_ir_graph, get_loop_outs, NULL, NULL );
1123 peel(cur_loop_outs);
1128 set_irg_doms_inconsistent(current_ir_graph);
1129 set_irg_loopinfo_inconsistent(current_ir_graph);
1130 set_irg_outs_inconsistent(current_ir_graph);
1132 DEL_ARR_F(cur_loop_outs);
1135 /* Loop inversion */
1136 static void loop_inversion(void)
1138 unsigned do_inversion = 1;
1140 inversion_head_node_limit = INT_MAX;
1142 /* Search for condition chains. */
1143 ir_reserve_resources(current_ir_graph, IR_RESOURCE_BLOCK_MARK);
1145 irg_walk_graph(current_ir_graph, reset_block_mark, NULL, NULL);
1147 loop_info.blocks = get_loop_n_blocks(cur_loop);
1148 cond_chain_entries = NEW_ARR_F(out_edge, 0);
1150 head_inversion_node_count = 0;
1151 head_inversion_block_count = 0;
1153 set_Block_mark(loop_cf_head, 1);
1154 mark_irn_visited(loop_cf_head);
1155 inc_irg_visited(current_ir_graph);
1157 find_condition_chains(loop_cf_head);
1159 DB((dbg, LEVEL_3, "Loop contains %d blocks.\n", loop_info.blocks));
1160 if (loop_info.blocks < 2) {
1162 DB((dbg, LEVEL_3, "Loop contains %d (less than 2) blocks => No Inversion done.\n", loop_info.blocks));
1165 /* We also catch endless loops here,
1166 * because they do not have a condition chain. */
1167 if (head_inversion_block_count < 1) {
1169 DB((dbg, LEVEL_3, "Loop contains %d (less than 1) invertible blocks => No Inversion done.\n", head_inversion_block_count));
1173 cur_head_outs = NEW_ARR_F(out_edge, 0);
1175 /* Get all edges pointing into the head or condition chain (outs). */
1176 irg_walk_graph(current_ir_graph, get_head_outs, NULL, NULL);
1177 inversion_walk(cur_head_outs);
1179 DEL_ARR_F(cur_head_outs);
1181 set_irg_doms_inconsistent(current_ir_graph);
1182 set_irg_loopinfo_inconsistent(current_ir_graph);
1183 set_irg_outs_inconsistent(current_ir_graph);
1187 DEL_ARR_F(cond_chain_entries);
1188 ir_free_resources(current_ir_graph, IR_RESOURCE_BLOCK_MARK);
1192 * Returns last element of linked list of copies by
1193 * walking the linked list.
1195 static ir_node *get_last_copy(ir_node *node)
1197 ir_node *copy, *cur;
1199 while ((copy = get_copy(cur))) {
1206 * Rewire floating copies of the current loop.
1208 static void unrolling_fix_cf(void)
1210 ir_node *loophead = loop_cf_head;
1211 int headarity = get_irn_arity(loophead);
1212 ir_node *phi, *headnode;
1213 /*ir_node *high, *low;*/
1217 int backedges_n = get_backedge_n(loophead, 0);
1218 int unroll_arity = backedges_n;
1220 /* Create ins for all heads and their phis */
1221 headnode = get_copy(loophead);
1222 for_each_copy(headnode) {
1223 NEW_ARR_A(ir_node *, get_node_info(headnode)->ins, unroll_arity);
1224 for_each_phi(headnode, phi) {
1225 NEW_ARR_A(ir_node *, get_node_info(phi)->ins, unroll_arity);
1229 /* Append the copies to the existing loop. */
1230 for (i = 0; i < headarity; i++) {
1231 ir_node *upper_head = loophead;
1232 ir_node *lower_head = get_copy(loophead);
1234 ir_node *upper_pred = get_irn_n(loophead, i);
1235 ir_node *lower_pred = get_copy(get_irn_n(loophead, i));
1240 * Build unrolled loop top down
1242 if (is_backedge(loophead, i) && !is_alien_edge(loophead, i)) {
1243 for_each_copy2(upper_head, lower_head, upper_pred, lower_pred) {
1244 get_node_info(lower_head)->ins[uhead_in_n] = upper_pred;
1246 for_each_phi(upper_head, phi) {
1247 ir_node *phi_copy = get_copy(phi);
1248 get_node_info(phi_copy)->ins[uhead_in_n] = get_irn_n(phi, i);
1252 last_pred = upper_pred;
1255 /* Fix the topmost loop heads backedges. */
1256 set_irn_n(loophead, i, last_pred);
1257 for_each_phi(loophead, phi) {
1258 ir_node *last_phi = get_last_copy(phi);
1259 ir_node *pred = get_irn_n(last_phi, i);
1260 set_irn_n(phi, i, pred);
1265 headnode = get_copy(loophead);
1266 for_each_copy(headnode) {
1267 set_irn_in(headnode, unroll_arity, get_node_info(headnode)->ins);
1268 for_each_phi(headnode, phi) {
1269 set_irn_in(phi, unroll_arity, get_node_info(phi)->ins);
1275 static ir_node *add_phi(ir_node *node, int phi_pos)
1280 mode = get_irn_mode(get_irn_n(node, phi_pos));
1281 ir_node *block = get_nodes_block(node);
1282 int n_cfgpreds = get_irn_arity(block);
1283 ir_node *pred = get_irn_n(node, phi_pos);
1286 /* create a new Phi */
1287 NEW_ARR_A(ir_node*, in, n_cfgpreds);
1288 for (i = 0; i < n_cfgpreds; ++i)
1289 in[i] = new_Unknown(mode); /*pred;*/
1291 phi = new_r_Phi(block, n_cfgpreds, in, mode);
1293 assert(phi && "phi null");
1294 assert(is_Bad(phi) && "phi bad");
1296 /* Important: always keep block phi list up to date. */
1297 add_Block_phi(block, phi);
1298 /* EVERY node is assumed to have a node_info linked. */
1299 alloc_node_info(phi, NULL);
1301 set_irn_n(node, phi_pos, phi);
1309 * Could be improved with variable range informations.
1311 static void loop_unrolling(void)
1317 cur_loop_outs = NEW_ARR_F(out_edge, 0);
1318 irg_walk_graph( current_ir_graph, get_loop_outs, NULL, NULL );
1320 ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED);
1322 /* duplicate whole loop content */
1323 inc_irg_visited(current_ir_graph);
1325 for (i = 0; i < ARR_LEN(cur_loop_outs); ++i) {
1326 out_edge entry = cur_loop_outs[i];
1327 ir_node *node = entry.node;
1328 ir_node *pred = get_irn_n(entry.node, entry.pred_irn_n);
1329 if (!is_Block(node)) {
1330 for (j = 0; j < unroll_times - 1; ++j) {
1331 copy_walk(pred, is_in_loop, cur_loop);
1332 pred = get_copy(pred);
1337 for (i = 0; i < ARR_LEN(cur_loop_outs); ++i) {
1338 out_edge entry = cur_loop_outs[i];
1339 ir_node *node = entry.node;
1340 ir_node *pred = get_irn_n(entry.node, entry.pred_irn_n);
1342 if (is_Block(node)) {
1343 for (j = 0; j < unroll_times - 1; ++j) {
1344 copy_walk(pred, is_in_loop, cur_loop);
1345 duplicate_preds(node, entry.pred_irn_n, get_copy(pred));
1347 pred = get_copy(pred);
1352 ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED);
1354 /*dump_ir_graph(current_ir_graph, "-raw");*/
1358 /* Line up the floating copies. */
1361 /* Generate phis for all loop outs */
1362 for (i = 0; i < ARR_LEN(cur_loop_outs); ++i) {
1363 out_edge entry = cur_loop_outs[i];
1364 ir_node *node = entry.node;
1365 ir_node *pred = get_irn_n(entry.node, entry.pred_irn_n);
1367 if (!is_Block(node) && !is_End(node)) {
1368 DB((dbg, LEVEL_5, " construct_ssa_n def %N node %N pos %d\n",
1369 pred, node, entry.pred_irn_n));
1370 construct_ssa_n(pred, node);
1375 DEL_ARR_F(cur_loop_outs);
1377 set_irg_doms_inconsistent(current_ir_graph);
1378 set_irg_loopinfo_inconsistent(current_ir_graph);
1379 set_irg_outs_inconsistent(current_ir_graph);
1382 /* Initialization and */
1383 static void init_analyze(ir_loop *loop)
1385 /* Init new for every loop */
1388 loop_cf_head = NULL;
1389 loop_cf_head_valid = 1;
1390 loop_inv_head = NULL;
1391 loop_peeled_head = NULL;
1394 loop_info.calls = 0;
1395 loop_info.loads = 0;
1396 loop_info.blocks = 0;
1398 DB((dbg, LEVEL_2, " >>>> current loop includes node %N <<<\n", get_loop_node(loop, 0)));
1400 irg_walk_graph(current_ir_graph, get_loop_info, NULL, NULL);
1402 /* RETURN if there is no valid head */
1403 if (!loop_cf_head || !loop_cf_head_valid) {
1404 DB((dbg, LEVEL_2, "No valid loop head. Nothing done.\n"));
1411 if (enable_inversion)
1413 if (enable_unrolling)
1417 /* RETURN if there is a call in the loop */
1418 if (loop_info.calls)
1422 DB((dbg, LEVEL_2, " <<<< end of loop with node %N >>>>\n", get_loop_node(loop, 0)));
1425 /* Find most inner loops and send them to analyze_loop */
1426 static void find_most_inner_loop(ir_loop *loop)
1428 /* descend into sons */
1429 int sons = get_loop_n_sons(loop);
1435 el_n = get_loop_n_elements(loop);
1437 for (i=0; i < el_n; ++i) {
1438 elem = get_loop_element(loop, i);
1439 /* We can only rely on the blocks,
1440 * as the loop attribute of the nodes seems not to be set. */
1441 if (is_ir_node(elem.kind) && is_Block(elem.node)) {
1442 ARR_APP1(ir_node *, loops, elem.node);
1443 DB((dbg, LEVEL_5, "Found most inner loop (contains block %+F)\n", elem.node));
1449 for (s=0; s<sons; s++) {
1450 find_most_inner_loop(get_loop_son(loop, s));
1456 * Assure preconditions are met and go through all loops.
1458 void loop_optimization(ir_graph *irg)
1464 link_node_state_list = NULL;
1465 set_current_ir_graph(irg);
1469 assure_irg_outs(irg);
1471 /* NOTE: sets only the loop attribute of blocks, not nodes */
1472 /* NOTE: Kills links */
1473 assure_cf_loop(irg);
1475 ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1476 collect_phiprojs(irg);
1477 ir_free_resources(irg, IR_RESOURCE_IRN_LINK);
1479 /* allocate node_info for additional information on nodes */
1480 ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK);
1481 irg_walk_graph(current_ir_graph, alloc_node_info, NULL, NULL);
1483 loop = get_irg_loop(irg);
1484 sons = get_loop_n_sons(loop);
1486 loops = NEW_ARR_F(ir_node *, 0);
1488 for (nr = 0; nr < sons; ++nr) {
1489 find_most_inner_loop(get_loop_son(loop, nr));
1492 /* TODO Keep backedges during optimization to avoid
1493 * this ugly allocation and deallocation.
1494 * (set_irn_in seems to destroy them)
1497 for (i = 0; i < ARR_LEN(loops); ++i) {
1500 loop = get_irn_loop(loops[i]);
1504 /* This part is useful for testing
1505 * or has to be used if the backedge information is destroyed.
1506 * Which is the case at the moment, because the backedge information gets lost
1507 * before inversion_fix_heads/unrolling_fix_cf, which results in bads.
1508 * NOTE!: Testsuite runs successfully nevertheless...
1512 * assure_cf_loop() creates a completely new loop tree.
1513 * Thus we cannot optimize a loop, assure_cf_loop() and continue with the next loop,
1514 * as the next loop must be searched because it is not distinguishable from the
1515 * already done loops.
1516 * The links of the loops are also not available anymore (to store a "loop done" flag).
1517 * Therefore we save a block per loop.
1518 * NOTE: We rely on the loop optimizations not to remove any block from the loop.
1519 * Later, we fetch the blocks loop attribute, as it is updated by assure_cf_loop.
1521 for (i = 0; i < ARR_LEN(loops); ++i) {
1525 ir_free_resources(irg, IR_RESOURCE_IRN_LINK);
1527 edges_assure(current_ir_graph);
1528 assure_irg_outs(current_ir_graph);
1530 /* NOTE: sets only the loop attribute of blocks */
1531 /* NOTE: Kills links */
1532 assure_cf_loop(current_ir_graph);
1534 irg_walk_graph(current_ir_graph, alloc_node_info, NULL, NULL);
1535 ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK);
1537 /* Get loop from block */
1538 loop = get_irn_loop(loops[i]);
1547 ir_free_resources(irg, IR_RESOURCE_IRN_LINK);
1548 ir_free_resources(irg, IR_RESOURCE_PHI_LIST);
1551 void do_loop_unrolling(ir_graph *irg)
1553 enable_unrolling = 1;
1555 enable_inversion = 0;
1557 DB((dbg, LEVEL_2, " >>> unrolling (Startnode %N) <<<\n",
1558 get_irg_start(irg)));
1560 loop_optimization(irg);
1562 DB((dbg, LEVEL_2, " >>> unrolling done (Startnode %N) <<<\n",
1563 get_irg_start(irg)));
1566 void do_loop_inversion(ir_graph *irg)
1568 enable_unrolling = 0;
1570 enable_inversion = 1;
1572 DB((dbg, LEVEL_2, " >>> inversion (Startnode %N) <<<\n",
1573 get_irg_start(irg)));
1575 loop_optimization(irg);
1577 DB((dbg, LEVEL_2, " >>> inversion done (Startnode %N) <<<\n",
1578 get_irg_start(irg)));
1581 void do_loop_peeling(ir_graph *irg)
1583 enable_unrolling = 0;
1585 enable_inversion = 0;
1587 DB((dbg, LEVEL_2, " >>> peeling (Startnode %N) <<<\n",
1588 get_irg_start(irg)));
1590 loop_optimization(irg);
1592 DB((dbg, LEVEL_2, " >>> peeling done (Startnode %N) <<<\n",
1593 get_irg_start(irg)));
1597 ir_graph_pass_t *loop_inversion_pass(const char *name)
1599 return def_graph_pass(name ? name : "loop_inversion", do_loop_inversion);
1602 ir_graph_pass_t *loop_unroll_pass(const char *name)
1604 return def_graph_pass(name ? name : "loop_unroll", do_loop_unrolling);
1607 ir_graph_pass_t *loop_peeling_pass(const char *name)
1609 return def_graph_pass(name ? name : "loop_peeling", do_loop_peeling);
1612 void firm_init_loop_opt(void)
1614 FIRM_DBG_REGISTER(dbg, "firm.opt.loop");