2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @author Christian Helmer
23 * @brief loop inversion and loop unrolling, loop peeling
39 #include "array_t.h" /* automatic array */
40 #include "beutil.h" /* get_block */
41 #include "irloop_t.h" /* set_irn_loop*/
49 DEBUG_ONLY(static firm_dbg_module_t *dbg);
52 * Convenience macro for iterating over every phi node of the given block.
53 * Requires phi list per block.
55 #define for_each_phi(block, phi) \
56 for ( (phi) = get_Block_phis( (block) ); (phi) ; (phi) = get_Phi_next( (phi) ) )
59 static ir_loop *cur_loop;
61 /* abortable walker function */
62 typedef unsigned irg_walk_func_abortable(ir_node *, void *);
64 /* condition for walking a node during a copy_walk */
65 typedef unsigned walker_condition(ir_node *);
67 /* node and position of a predecessor */
68 typedef struct out_edges {
73 /* access complex values through the nodes links */
74 typedef struct node_info {
77 ir_node *link; /* temporary links for ssa creation */
78 ir_node **ins; /* ins for phi nodes, during rewiring of blocks */
80 struct node_info *freelistnext; /* linked list to free all node_infos */
83 static node_info *link_node_state_list; /* head of the linked list to free all node_infos */
85 static out_edge *cur_loop_outs; /* A walker may start visiting the current loop with these nodes. */
86 static out_edge *cur_head_outs; /* A walker may start visiting the cur head with these nodes. */
88 static ir_node *loop_cf_head = NULL; /* Loop head node */
89 static unsigned loop_cf_head_valid = 1; /* A loop may have one head, otherwise we do not touch it. */
94 static ir_node *loop_inv_head = NULL;
96 static ir_node *loop_peeled_head = NULL;
98 /* Loop analysis informations */
99 typedef struct loop_info_t {
100 unsigned blocks; /* number of blocks in the loop */
101 unsigned calls; /* number of calls */
102 unsigned loads; /* number of load nodes */
103 unsigned outs; /* outs without keepalives */
105 unsigned invariant_loads;
106 unsigned stores; /* number of store nodes */
107 unsigned opnodes_n; /* nodes that probably result in an instruction */
108 unsigned do_invariant_opt;
112 /* Information about the current loop */
113 static loop_info_t loop_info;
115 /* A walker may start visiting a condition chain with these nodes. */
116 static out_edge *cond_chain_entries;
118 /* Number of unrolling */
121 static unsigned head_inversion_node_count;
122 static unsigned inversion_head_node_limit;
123 static unsigned head_inversion_block_count;
125 static unsigned enable_peeling;
126 static unsigned enable_inversion;
127 static unsigned enable_unrolling;
131 * ============= AUXILIARY FUNCTIONS =====================================
136 * Creates object on the heap, and adds it to a linked list to free it later.
138 static node_info *new_node_info(void)
140 node_info *l = XMALLOCZ(node_info);
141 l->freelistnext = link_node_state_list;
142 link_node_state_list = l;
148 static node_info *get_node_info(ir_node *n)
150 return ((node_info *)get_irn_link(n));
153 /* Allocates a node_info struct for the given node. For use with a walker. */
154 static void alloc_node_info(ir_node *node, void *env)
158 state = new_node_info();
159 set_irn_link(node, (void *)state);
162 static void free_node_info(void)
166 n = link_node_state_list;
168 node_info *next = n->freelistnext;
173 link_node_state_list = NULL;
177 * Use the linked list to reset the reused values of all node_infos
178 * Reset in particular the copy attribute as copy_walk uses it to determine a present copy
180 static void reset_node_infos(void)
183 next = link_node_state_list;
184 while (next->freelistnext) {
185 node_info *cur = next;
186 next = cur->freelistnext;
193 /* Returns the nodes node_info link. */
194 static ir_node *get_link(ir_node *n)
196 return ((node_info *)get_irn_link(n))->link;
199 /* Sets the nodes node_info link. */
200 static void set_link(ir_node *n, ir_node *link)
202 ((node_info *)get_irn_link(n))->link = link;
205 /* Returns a nodes copy. */
206 static ir_node *get_copy(ir_node *n)
208 return ((node_info *)get_irn_link(n))->copy;
211 /* Sets a nodes copy. */
212 static void set_copy(ir_node *n, ir_node *copy)
214 ((node_info *)get_irn_link(n) )->copy = copy;
218 * Convenience macro for iterating over every copy in a linked list
221 #define for_each_copy(node) \
222 for ( ; (node) ; (node) = get_copy(node))
225 * Convenience macro for iterating over every copy in 2 linked lists
226 * of copies in parallel.
228 #define for_each_copy2(high1, low1, high2, low2) \
229 for ( ; (low1) && (low2); (high1) = (low1), (low1) = get_copy(low1), \
230 (high2) = (low2), (low2) = get_copy(low2))
233 * Returns 0 if the node or block is not in cur_loop.
235 static unsigned is_in_loop(ir_node *node)
237 return (get_irn_loop(get_block(node)) == cur_loop);
240 /* Returns if the given be is an alien edge. This is the case when the pred is not in the loop. */
241 static unsigned is_alien_edge(ir_node *n, int i)
243 return(!is_in_loop(get_irn_n(n, i)));
246 /* used for block walker */
247 static void reset_block_mark(ir_node *node, void * env)
252 set_Block_mark(node, 0);
255 static unsigned is_nodesblock_marked(ir_node* node)
257 return (get_Block_mark(get_block(node)));
260 /* Returns the number of blocks in a loop. */
261 int get_loop_n_blocks(ir_loop *loop)
265 elements = get_loop_n_elements(loop);
267 for (e=0; e<elements; e++) {
268 loop_element elem = get_loop_element(loop, e);
269 if (is_ir_node(elem.kind) && is_Block(elem.node))
276 * Add newpred at position pos to node and also add the corresponding value to the phis.
277 * Requires block phi list.
279 static int duplicate_preds(ir_node* block, unsigned pos, ir_node* newpred)
287 assert(is_Block(block) && "duplicate_preds may be called for blocks only");
289 DB((dbg, LEVEL_1, "duplicate_preds(node %N, pos %d, newpred %N)\n", block, pos, newpred));
291 block_arity = get_irn_arity(block);
293 NEW_ARR_A(ir_node*, ins, block_arity + 1);
295 for (i = 0; i < block_arity; ++i) {
296 ins[i] = get_irn_n(block, i);
298 ins[block_arity] = newpred;
300 set_irn_in(block, block_arity + 1, ins);
303 block_arity = get_irn_arity(block);
305 NEW_ARR_A(ir_node*, ins, block_arity - 1);
307 for (i = 0; i < block_arity - 1; ++i) {
308 ins[i] = get_irn_n(block, i);
311 set_irn_in(block, block_arity - 1, ins);
315 for_each_phi(block, phi) {
316 int phi_arity = get_irn_arity(phi);
317 DB((dbg, LEVEL_5, "duplicate_preds: fixing phi %N\n", phi)));
319 NEW_ARR_A(ir_node *, ins, block_arity + 1);
320 for (i = 0; i < phi_arity; ++i) {
321 DB((dbg, LEVEL_5, "in %N\n", get_irn_n(phi, i))));
322 ins[i] = get_irn_n(phi, i);
324 ins[block_arity] = get_copy(get_irn_n(phi, pos));
325 set_irn_in(phi, block_arity + 1, ins);
332 * Finds loop head and loop_info.
334 static void get_loop_info(ir_node *node, void *env)
336 unsigned node_in_loop, pred_in_loop;
340 arity = get_irn_arity(node);
341 for (i = 0; i < arity; i++) {
342 ir_node *pred = get_irn_n(node, i);
344 pred_in_loop = is_in_loop(pred);
345 node_in_loop = is_in_loop(node);
347 /* collect some loop information */
353 /* Find the loops head/the blocks with cfpred outside of the loop */
354 if (is_Block(node) && node_in_loop && !pred_in_loop && loop_cf_head_valid) {
355 ir_node *cfgpred = get_Block_cfgpred(node, i);
357 if (!is_in_loop(cfgpred)) {
358 DB((dbg, LEVEL_5, "potential head %+F because inloop and pred %+F not inloop\n", node, pred));
359 /* another head? We do not touch this. */
360 if (loop_cf_head && loop_cf_head != node) {
361 loop_cf_head_valid = 0;
370 /* Adds all nodes pointing into the loop to loop_entries and also finds the loops head */
371 static void get_loop_outs(ir_node *node, void *env)
373 unsigned node_in_loop, pred_in_loop;
377 arity = get_irn_arity(node);
378 for (i = 0; i < arity; ++i) {
379 ir_node *pred = get_irn_n(node, i);
381 pred_in_loop = is_in_loop(pred);
382 node_in_loop = is_in_loop(node);
384 if (pred_in_loop && !node_in_loop) {
387 entry.pred_irn_n = i;
388 ARR_APP1(out_edge, cur_loop_outs, entry);
393 static ir_node *ssa_second_def;
394 static ir_node *ssa_second_def_block;
397 * Walks the graph bottom up, searching for definitions and creates phis.
399 static ir_node *search_def_and_create_phis(ir_node *block, ir_mode *mode)
407 DB((dbg, LEVEL_5, "ssa search_def_and_create_phis: block %N\n", block));
409 /* Prevents creation of phi that would be bad anyway.
410 * Dead and bad blocks. */
411 if (get_irn_arity(block) < 1 || is_Bad(block))
414 if (block == ssa_second_def_block) {
415 DB((dbg, LEVEL_5, "ssa found second definition: use second def %N\n", ssa_second_def));
416 return ssa_second_def;
419 /* already processed this block? */
420 if (irn_visited(block)) {
421 ir_node *value = get_link(block);
422 DB((dbg, LEVEL_5, "ssa already visited: use linked %N\n", value));
426 irg = get_irn_irg(block);
427 assert(block != get_irg_start_block(irg));
429 /* a Block with only 1 predecessor needs no Phi */
430 n_cfgpreds = get_Block_n_cfgpreds(block);
431 if (n_cfgpreds == 1) {
432 ir_node *pred_block = get_Block_cfgpred_block(block, 0);
435 DB((dbg, LEVEL_5, "ssa 1 pred: walk pred %N\n", pred_block));
437 value = search_def_and_create_phis(pred_block, mode);
438 set_link(block, value);
439 mark_irn_visited(block);
444 /* create a new Phi */
445 NEW_ARR_A(ir_node*, in, n_cfgpreds);
446 for (i = 0; i < n_cfgpreds; ++i)
447 in[i] = new_Unknown(mode);
449 phi = new_r_Phi(block, n_cfgpreds, in, mode);
451 /* Important: always keep block phi list up to date. */
452 add_Block_phi(block, phi);
453 /* EVERY node is assumed to have a node_info linked. */
454 alloc_node_info(phi, NULL);
456 DB((dbg, LEVEL_5, "ssa phi creation: link new phi %N to block %N\n", phi, block));
458 set_link(block, phi);
459 mark_irn_visited(block);
461 /* set Phi predecessors */
462 for (i = 0; i < n_cfgpreds; ++i) {
464 ir_node *pred_block = get_Block_cfgpred_block(block, i);
465 assert(pred_block != NULL);
466 pred_val = search_def_and_create_phis(pred_block, mode);
467 assert(pred_val != NULL);
469 DB((dbg, LEVEL_5, "ssa phi pred:phi %N, pred %N\n", phi, pred_val));
470 set_irn_n(phi, i, pred_val);
476 * Given a set of values this function constructs SSA-form for the users of the
477 * first value (the users are determined through the out-edges of the value).
478 * Works without using the dominance tree.
480 static void construct_ssa(ir_node *orig_block, ir_node *orig_val,
481 ir_node *second_block, ir_node *second_val)
485 const ir_edge_t *edge;
486 const ir_edge_t *next;
488 assert(orig_block && orig_val && second_block && second_val &&
489 "no parameter of construct_ssa may be NULL");
491 /* no need to do anything */
492 if (orig_val == second_val)
495 irg = get_irn_irg(orig_val);
497 ir_reserve_resources(irg, IR_RESOURCE_IRN_VISITED);
498 inc_irg_visited(irg);
500 mode = get_irn_mode(orig_val);
501 set_link(orig_block, orig_val);
502 mark_irn_visited(orig_block);
504 ssa_second_def_block = second_block;
505 ssa_second_def = second_val;
507 /* Only fix the users of the first, i.e. the original node */
508 foreach_out_edge_safe(orig_val, edge, next) {
509 ir_node *user = get_edge_src_irn(edge);
510 int j = get_edge_src_pos(edge);
511 ir_node *user_block = get_nodes_block(user);
518 DB((dbg, LEVEL_5, "original user %N\n", user));
521 ir_node *pred_block = get_Block_cfgpred_block(user_block, j);
522 newval = search_def_and_create_phis(pred_block, mode);
524 newval = search_def_and_create_phis(user_block, mode);
527 /* If we get a bad node the user keeps the original in. No second definition needed. */
528 if (newval != user && !is_Bad(newval))
529 set_irn_n(user, j, newval);
532 ir_free_resources(irg, IR_RESOURCE_IRN_VISITED);
536 * Construct SSA for def and all of its copies.
538 static void construct_ssa_n(ir_node *def, ir_node *user)
543 const ir_edge_t *edge;
544 const ir_edge_t *next;
545 irg = get_irn_irg(def);
547 ir_reserve_resources(irg, IR_RESOURCE_IRN_VISITED);
548 inc_irg_visited(irg);
550 mode = get_irn_mode(def);
552 for_each_copy(iter) {
553 set_link(get_nodes_block(iter), iter);
554 mark_irn_visited(get_nodes_block(iter));
556 DB((dbg, LEVEL_5, "ssa_n: Link def %N to block %N\n",
557 iter, get_nodes_block(iter)));
560 /* Need to search the outs, because we need the in-pos on the user node. */
561 foreach_out_edge_safe(def, edge, next) {
562 ir_node *edge_user = get_edge_src_irn(edge);
563 int edge_src = get_edge_src_pos(edge);
564 ir_node *user_block = get_nodes_block(user);
567 if (edge_user != user)
571 ir_node *pred_block = get_Block_cfgpred_block(user_block, edge_src);
572 newval = search_def_and_create_phis(pred_block, mode);
574 newval = search_def_and_create_phis(user_block, mode);
577 if (newval != user && !is_Bad(newval))
578 set_irn_n(user, edge_src, newval);
581 ir_free_resources(irg, IR_RESOURCE_IRN_VISITED);
585 * Construct SSA for all definitions in arr.
587 void construct_ssa_foreach(ir_node **arr, int arr_n)
590 for (i = 0; i < arr_n ; ++i) {
591 ir_node *cppred, *block, *cpblock, *pred;
594 cppred = get_copy(pred);
595 block = get_nodes_block(pred);
596 cpblock = get_nodes_block(cppred);
597 construct_ssa(block, pred, cpblock, cppred);
601 /* get the number of backedges without alien bes */
602 static int get_backedge_n(ir_node *loophead, unsigned with_alien)
606 int arity = get_irn_arity(loophead);
607 for (i = 0; i < arity; ++i) {
608 ir_node *pred = get_irn_n(loophead, i);
609 if (is_backedge(loophead, i) && (with_alien || is_in_loop(pred)))
616 * Rewires the heads after peeling.
618 static void peel_fix_heads(void)
620 ir_node **loopheadnins, **peelheadnins;
621 ir_node *loophead = loop_cf_head;
622 ir_node *peelhead = get_copy(loophead);
624 int headarity = get_irn_arity(loophead);
631 int backedges_n = get_backedge_n(loophead, 0);
633 int lhead_arity = 2 * backedges_n;
634 int phead_arity = headarity - backedges_n;
637 NEW_ARR_A(ir_node *, loopheadnins, lhead_arity );
638 NEW_ARR_A(ir_node *, peelheadnins, phead_arity );
640 for_each_phi(loophead, phi) {
641 NEW_ARR_A(ir_node *, get_node_info(phi)->ins, lhead_arity);
643 for_each_phi(peelhead, phi) {
644 NEW_ARR_A(ir_node *, get_node_info(phi)->ins, phead_arity);
647 for (i = 0; i < headarity; i++)
649 ir_node *orgjmp = get_irn_n(loophead, i);
650 ir_node *copyjmp = get_copy(orgjmp);
653 * Rewire the head blocks ins and their phi ins.
654 * Requires phi list per block.
656 if (is_backedge(loophead, i) && !is_alien_edge(loophead, i)) {
657 loopheadnins[lheadin_c] = orgjmp;
658 for_each_phi(loophead, phi) {
659 get_node_info( phi )->ins[lheadin_c] = get_irn_n( phi, i) ;
663 /* former bes of the peeled code origin now from the loophead */
664 loopheadnins[lheadin_c] = copyjmp;
666 /* get_irn_n( get_copy_of(phi), i ) <!=> get_copy_of( get_irn_n( phi, i) )
667 * Order is crucial! Predecessors outside of the loop are non existent.
668 * The copy (cloned with its ins!) has pred i,
669 * but phis pred i might not have a copy of itself.
671 for_each_phi(loophead, phi) {
672 get_node_info( phi )->ins[lheadin_c] = get_irn_n( get_copy(phi), i) ;
676 peelheadnins[pheadin_c] = orgjmp;
677 for_each_phi(peelhead, phi) {
678 get_node_info( phi )->ins[pheadin_c] = get_irn_n(phi, i);
684 assert(pheadin_c == ARR_LEN(peelheadnins) &&
685 lheadin_c == ARR_LEN(loopheadnins) &&
686 "the constructed head arities do not match the predefined arities");
688 /* assign the ins to the nodes */
689 set_irn_in(loophead, ARR_LEN(loopheadnins), loopheadnins);
690 set_irn_in(peelhead, ARR_LEN(peelheadnins), peelheadnins);
692 for_each_phi(loophead, phi) {
693 ir_node **ins = get_node_info( phi )->ins;
694 set_irn_in(phi, lhead_arity, ins);
697 for_each_phi(peelhead, phi) {
698 ir_node **ins = get_node_info( phi )->ins;
699 set_irn_in(phi, phead_arity, ins);
704 * Create a raw copy (ins are still the old ones) of the given node.
705 * We rely on copies to be NOT visited.
707 static ir_node *rawcopy_node(ir_node *node)
713 cp = exact_copy(node);
715 arity = get_irn_arity(node);
718 set_irn_in(cp, arity, get_irn_in(node));
720 for (i = 0; i < arity; ++i) {
721 if (is_backedge(node, i))
726 cpstate = new_node_info();
727 set_irn_link(cp, cpstate);
732 * exact_copy already sets Macroblock.
733 * Why should we do this anyway? */
734 set_Block_MacroBlock(cp, cp);
741 * This walker copies all walked nodes.
742 * If the walk_condition is true for a node, it is walked.
743 * All nodes node_info->copy attributes have to be NULL prior to every walk.
745 static void copy_walk(ir_node *node, walker_condition *walk_condition, ir_loop *set_loop)
751 ir_graph *irg = current_ir_graph;
752 node_info *node_info = get_node_info(node);
755 * break condition and cycle resolver, creating temporary node copies
757 if (get_irn_visited(node) >= get_irg_visited(irg)) {
758 /* Here we rely on nodestate's copy being initialized with NULL */
759 DB((dbg, LEVEL_5, "copy_walk: We have already visited %N\n", node));
760 if (node_info->copy == NULL) {
761 cp = rawcopy_node(node);
762 DB((dbg, LEVEL_5, "The TEMP copy of %N is created %N\n", node, cp));
768 mark_irn_visited(node);
770 if (!is_Block(node)) {
771 ir_node *pred = get_nodes_block(node);
772 if (walk_condition(pred))
773 DB((dbg, LEVEL_5, "walk block %N\n", pred));
774 copy_walk(pred, walk_condition, set_loop);
777 arity = get_irn_arity(node);
779 NEW_ARR_A(ir_node *, cpin, arity);
781 for (i = 0; i < arity; ++i) {
782 ir_node *pred = get_irn_n(node, i);
784 if (walk_condition(pred)) {
785 DB((dbg, LEVEL_5, "walk node %N\n", pred));
786 copy_walk(pred, walk_condition, set_loop);
787 cpin[i] = get_copy(pred);
788 DB((dbg, LEVEL_5, "copy of %N gets new in %N which is copy of %N\n",
789 node, get_copy(pred), pred));
795 /* copy node / finalize temp node */
796 if (node_info->copy == NULL) {
797 /* No temporary copy existent */
798 cp = rawcopy_node(node);
799 DB((dbg, LEVEL_5, "The FINAL copy of %N is CREATED %N\n", node, cp));
801 /* temporary copy is existent but without correct ins */
803 DB((dbg, LEVEL_5, "The FINAL copy of %N is EXISTENT %N\n", node, cp));
806 if (!is_Block(node)) {
807 ir_node *cpblock = get_copy(get_nodes_block(node));
809 set_nodes_block(cp, cpblock );
811 add_Block_phi(cpblock, cp);
814 set_irn_loop(cp, set_loop);
815 set_irn_in(cp, ARR_LEN(cpin), cpin);
819 * Loop peeling, and fix the cf for the loop entry nodes, which have now more preds
821 static void peel(out_edge *loop_outs)
824 ir_node **entry_buffer;
827 ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED);
829 NEW_ARR_A(ir_node *, entry_buffer, ARR_LEN(loop_outs));
831 /* duplicate loop walk */
832 inc_irg_visited(current_ir_graph);
834 for (i = 0; i < ARR_LEN(loop_outs); i++) {
835 out_edge entry = loop_outs[i];
836 ir_node *node = entry.node;
837 ir_node *pred = get_irn_n(entry.node, entry.pred_irn_n);
839 if (is_Block(node)) {
840 copy_walk(pred, is_in_loop, NULL);
841 duplicate_preds(node, entry.pred_irn_n, get_copy(pred) );
843 copy_walk(pred, is_in_loop, NULL);
844 /* leave out keepalives */
846 /* Node is user of a value defined inside the loop.
847 * We'll need a phi since we duplicated the loop. */
848 /* Cannot construct_ssa here, because it needs another walker. */
849 entry_buffer[entry_c] = pred;
855 ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED);
857 /* Rewires the 2 heads */
860 /* Generate phis for values from peeled code and original loop */
861 construct_ssa_foreach(entry_buffer, entry_c);
862 /*for (i = 0; i < entry_c; i++)
864 ir_node *cppred, *block, *cpblock, *pred;
866 pred = entry_buffer[i];
867 cppred = get_copy(pred);
868 block = get_nodes_block(pred);
869 cpblock = get_nodes_block(cppred);
870 construct_ssa(block, pred, cpblock, cppred);
875 * Populates head_entries with (node, pred_pos) tuple
876 * whereas the node's pred at pred_pos is in the head but not the node itself.
877 * Head and condition chain blocks must be marked.
879 static void get_head_outs(ir_node *node, void *env)
882 int arity = get_irn_arity(node);
885 DB((dbg, LEVEL_5, "get head entries %N \n", node));
887 for (i = 0; i < arity; ++i) {
888 /* node is not in the head, but the predecessor is.
889 * (head or loop chain nodes are marked) */
891 DB((dbg, LEVEL_5, "... "));
892 DB((dbg, LEVEL_5, "node %N marked %d (0) pred %d marked %d (1) \n",
893 node->node_nr, is_nodesblock_marked(node),i, is_nodesblock_marked(get_irn_n(node, i))));
895 if (!is_nodesblock_marked(node) && is_nodesblock_marked(get_irn_n(node, i))) {
898 entry.pred_irn_n = i;
900 "Found head chain entry %N @%d because !inloop %N and inloop %N\n",
901 node, i, node, get_irn_n(node, i)));
902 ARR_APP1(out_edge, cur_head_outs, entry);
908 * Find condition chains, and add them to be inverted, until the node count exceeds the limit.
909 * A block belongs to the chain if a condition branches out of the loop.
910 * Returns 1 if the given block belongs to the condition chain.
912 static unsigned find_condition_chains(ir_node *block)
914 const ir_edge_t *edge;
918 DB((dbg, LEVEL_5, "condition_chains for block %N\n", block));
920 /* Collect all outs, including keeps.
921 * (TODO firm function for number of out edges?) */
922 foreach_out_edge_kind(block, edge, EDGE_KIND_NORMAL) {
926 /* We do not want to collect more nodes from condition chains, than the limit allows us to.
927 * Also, leave at least one block as body. */
928 if (head_inversion_node_count + nodes_n > inversion_head_node_limit
929 || head_inversion_block_count + 1 == loop_info.blocks) {
930 set_Block_mark(block, 0);
935 /* First: check our successors, and add all succs that are outside of the loop to the list */
936 foreach_block_succ(block, edge) {
937 ir_node *src = get_edge_src_irn( edge );
938 int pos = get_edge_src_pos( edge );
940 if (!is_in_loop(src)) {
945 entry.pred_irn_n = pos;
946 ARR_APP1(out_edge, cond_chain_entries, entry);
947 mark_irn_visited(src);
952 /* this block is not part of the chain,
953 * because the chain would become too long or we have no successor outside of the loop */
955 set_Block_mark(block, 0);
958 set_Block_mark(block, 1);
959 ++head_inversion_block_count;
960 DB((dbg, LEVEL_5, "block %N is part of condition chain\n", block));
961 head_inversion_node_count += nodes_n;
964 /* Second: walk all successors, and add them to the list if they are not part of the chain */
965 foreach_block_succ(block, edge) {
967 ir_node *src = get_edge_src_irn( edge );
968 int pos = get_edge_src_pos( edge );
970 /* already done cases */
971 if (!is_in_loop( src ) || (get_irn_visited(src) >= get_irg_visited(current_ir_graph))) {
975 mark_irn_visited(src);
976 DB((dbg, LEVEL_5, "condition chain walk %N\n", src));
977 inchain = find_condition_chains(src);
979 /* if successor is not part of chain we need to collect its outs */
983 entry.pred_irn_n = pos;
984 ARR_APP1(out_edge, cond_chain_entries, entry);
991 * Rewire the loop head and inverted head for loop inversion.
993 static void inversion_fix_heads(void)
995 ir_node **loopheadnins, **invheadnins;
996 ir_node *loophead = loop_cf_head;
997 ir_node *invhead = get_copy(loophead);
999 int headarity = get_irn_arity(loophead);
1006 int backedges_n = get_backedge_n(loophead, 0);
1007 int lhead_arity = backedges_n;
1008 int ihead_arity = headarity - backedges_n;
1010 assert(lhead_arity != 0 && "Loophead has arity 0. Probably wrong backedge informations.");
1011 assert(ihead_arity != 0 && "Inversionhead has arity 0. Probably wrong backedge informations.");
1013 /* new in arrays for all phis in the head blocks */
1014 NEW_ARR_A(ir_node *, loopheadnins, lhead_arity);
1015 NEW_ARR_A(ir_node *, invheadnins, ihead_arity);
1017 for_each_phi(loophead, phi) {
1018 NEW_ARR_A(ir_node *, get_node_info(phi)->ins, lhead_arity);
1020 for_each_phi(invhead, phi) {
1021 NEW_ARR_A(ir_node *, get_node_info(phi)->ins, ihead_arity);
1024 for (i = 0; i < headarity; i++) {
1025 ir_node *pred = get_irn_n(loophead, i);
1028 * Rewire the head blocks ins and their phi ins.
1029 * Requires phi list per block.
1031 if (is_backedge(loophead, i) && !is_alien_edge(loophead, i)) {
1032 /* just copy these edges */
1033 loopheadnins[lheadin_c] = pred;
1034 for_each_phi(loophead, phi) {
1035 get_node_info(phi)->ins[lheadin_c] = get_irn_n(phi, i);
1039 invheadnins[iheadin_c] = pred;
1040 for_each_phi(invhead, phi) {
1041 get_node_info(phi)->ins[iheadin_c] = get_irn_n(phi, i) ;
1047 /* assign the ins to the head blocks */
1048 set_irn_in(loophead, ARR_LEN(loopheadnins), loopheadnins);
1049 set_irn_in(invhead, ARR_LEN(invheadnins), invheadnins);
1051 /* assign the ins for the phis */
1052 for_each_phi(loophead, phi) {
1053 ir_node **ins = get_node_info(phi)->ins;
1054 set_irn_in(phi, lhead_arity, ins);
1057 for_each_phi(invhead, phi) {
1058 ir_node **ins = get_node_info(phi)->ins;
1059 set_irn_in(phi, ihead_arity, ins);
1063 static void inversion_walk(out_edge *head_entries)
1068 ir_node **entry_buffer;
1069 ir_node **head_phi_assign;
1071 NEW_ARR_A(ir_node *, entry_buffer, ARR_LEN(head_entries));
1073 head_phi_assign = NEW_ARR_F(ir_node *, 0);
1075 /* Find assignments in the condition chain,
1076 * to construct_ssa for them after the loop inversion. */
1077 for_each_phi(loop_cf_head , phi) {
1078 int arity = get_irn_arity(phi);
1079 for (i = 0; i < arity; ++i) {
1080 ir_node *def = get_irn_n(phi, i);
1081 if (is_nodesblock_marked(def)) {
1082 ARR_APP1(ir_node *, head_phi_assign, def);
1087 ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED);
1090 * duplicate condition chain
1092 inc_irg_visited(current_ir_graph);
1094 for (i = 0; i < ARR_LEN(head_entries); ++i) {
1095 out_edge entry = head_entries[i];
1096 ir_node *node = entry.node;
1097 ir_node *pred = get_irn_n(entry.node, entry.pred_irn_n);
1099 if (is_Block(node)) {
1100 DB((dbg, LEVEL_5, "\nInit walk block %N\n", pred));
1102 copy_walk(pred, is_nodesblock_marked, cur_loop);
1103 duplicate_preds(node, entry.pred_irn_n, get_copy(pred) );
1105 DB((dbg, LEVEL_5, "\nInit walk node %N\n", pred));
1107 copy_walk(pred, is_nodesblock_marked, cur_loop);
1109 /* ignore keepalives */
1110 if (!is_End(node)) {
1111 /* Node is user of a value assigned inside the loop.
1112 * We will need a phi since we duplicated the head. */
1113 entry_buffer[entry_c] = pred;
1119 ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED);
1121 inversion_fix_heads();
1123 /* Generate phis for users of values assigned in the condition chain
1124 * and read in the loops body */
1125 construct_ssa_foreach(entry_buffer, entry_c);
1127 /* Generate phis for values that are assigned in the condition chain
1128 * but not read in the loops body. */
1129 construct_ssa_foreach(head_phi_assign, ARR_LEN(head_phi_assign));
1131 loop_cf_head = get_copy(loop_cf_head);
1135 void loop_peeling(void)
1137 cur_loop_outs = NEW_ARR_F(out_edge, 0);
1138 irg_walk_graph( current_ir_graph, get_loop_outs, NULL, NULL );
1140 peel(cur_loop_outs);
1145 set_irg_doms_inconsistent(current_ir_graph);
1146 set_irg_loopinfo_inconsistent(current_ir_graph);
1147 set_irg_outs_inconsistent(current_ir_graph);
1149 DEL_ARR_F(cur_loop_outs);
1152 /* Loop inversion */
1153 void loop_inversion(void)
1155 unsigned do_inversion = 1;
1157 inversion_head_node_limit = INT_MAX;
1159 /* Search for condition chains. */
1160 ir_reserve_resources(current_ir_graph, IR_RESOURCE_BLOCK_MARK);
1162 irg_walk_graph(current_ir_graph, reset_block_mark, NULL, NULL);
1164 loop_info.blocks = get_loop_n_blocks(cur_loop);
1165 cond_chain_entries = NEW_ARR_F(out_edge, 0);
1167 head_inversion_node_count = 0;
1168 head_inversion_block_count = 0;
1170 set_Block_mark(loop_cf_head, 1);
1171 mark_irn_visited(loop_cf_head);
1172 inc_irg_visited(current_ir_graph);
1174 find_condition_chains(loop_cf_head);
1176 DB((dbg, LEVEL_3, "Loop contains %d blocks.\n", loop_info.blocks));
1177 if (loop_info.blocks < 2) {
1179 DB((dbg, LEVEL_3, "Loop contains %d (less than 2) blocks => No Inversion done.\n", loop_info.blocks));
1182 /* We also catch endless loops here,
1183 * because they do not have a condition chain. */
1184 if (head_inversion_block_count < 1) {
1186 DB((dbg, LEVEL_3, "Loop contains %d (less than 1) invertible blocks => No Inversion done.\n", head_inversion_block_count));
1190 cur_head_outs = NEW_ARR_F(out_edge, 0);
1192 /* Get all edges pointing into the head or condition chain (outs). */
1193 irg_walk_graph(current_ir_graph, get_head_outs, NULL, NULL);
1194 inversion_walk(cur_head_outs);
1196 DEL_ARR_F(cur_head_outs);
1198 set_irg_doms_inconsistent(current_ir_graph);
1199 set_irg_loopinfo_inconsistent(current_ir_graph);
1200 set_irg_outs_inconsistent(current_ir_graph);
1204 DEL_ARR_F(cond_chain_entries);
1205 ir_free_resources(current_ir_graph, IR_RESOURCE_BLOCK_MARK);
1209 * Returns last element of linked list of copies by
1210 * walking the linked list.
1212 ir_node *get_last_copy(ir_node *node)
1214 ir_node *copy, *cur;
1216 while ((copy = get_copy(cur))) {
1223 * Rewire floating copies of the current loop.
1225 void unrolling_fix_cf(void)
1227 ir_node *loophead = loop_cf_head;
1228 int headarity = get_irn_arity(loophead);
1229 ir_node *phi, *headnode;
1230 /*ir_node *high, *low;*/
1234 int backedges_n = get_backedge_n(loophead, 0);
1235 int unroll_arity = backedges_n;
1237 /* Create ins for all heads and their phis */
1238 headnode = get_copy(loophead);
1239 for_each_copy(headnode) {
1240 NEW_ARR_A(ir_node *, get_node_info(headnode)->ins, unroll_arity);
1241 for_each_phi(headnode, phi) {
1242 NEW_ARR_A(ir_node *, get_node_info(phi)->ins, unroll_arity);
1246 /* Append the copies to the existing loop. */
1247 for (i = 0; i < headarity; i++) {
1248 ir_node *upper_head = loophead;
1249 ir_node *lower_head = get_copy(loophead);
1251 ir_node *upper_pred = get_irn_n(loophead, i);
1252 ir_node *lower_pred = get_copy(get_irn_n(loophead, i));
1257 * Build unrolled loop top down
1259 if (is_backedge(loophead, i) && !is_alien_edge(loophead, i)) {
1260 for_each_copy2(upper_head, lower_head, upper_pred, lower_pred) {
1261 get_node_info(lower_head)->ins[uhead_in_n] = upper_pred;
1263 for_each_phi(upper_head, phi) {
1264 ir_node *phi_copy = get_copy(phi);
1265 get_node_info(phi_copy)->ins[uhead_in_n] = get_irn_n(phi, i);
1269 last_pred = upper_pred;
1272 /* Fix the topmost loop heads backedges. */
1273 set_irn_n(loophead, i, last_pred);
1274 for_each_phi(loophead, phi) {
1275 ir_node *last_phi = get_last_copy(phi);
1276 ir_node *pred = get_irn_n(last_phi, i);
1277 set_irn_n(phi, i, pred);
1282 headnode = get_copy(loophead);
1283 for_each_copy(headnode) {
1284 set_irn_in(headnode, unroll_arity, get_node_info(headnode)->ins);
1285 for_each_phi(headnode, phi) {
1286 set_irn_in(phi, unroll_arity, get_node_info(phi)->ins);
1292 static ir_node *add_phi(ir_node *node, int phi_pos)
1297 mode = get_irn_mode(get_irn_n(node, phi_pos));
1298 ir_node *block = get_nodes_block(node);
1299 int n_cfgpreds = get_irn_arity(block);
1300 ir_node *pred = get_irn_n(node, phi_pos);
1303 /* create a new Phi */
1304 NEW_ARR_A(ir_node*, in, n_cfgpreds);
1305 for (i = 0; i < n_cfgpreds; ++i)
1306 in[i] = new_Unknown(mode); /*pred;*/
1308 phi = new_r_Phi(block, n_cfgpreds, in, mode);
1310 assert(phi && "phi null");
1311 assert(is_Bad(phi) && "phi bad");
1313 /* Important: always keep block phi list up to date. */
1314 add_Block_phi(block, phi);
1315 /* EVERY node is assumed to have a node_info linked. */
1316 alloc_node_info(phi, NULL);
1318 set_irn_n(node, phi_pos, phi);
1326 * Could be improved with variable range informations.
1328 void loop_unrolling(void)
1334 cur_loop_outs = NEW_ARR_F(out_edge, 0);
1335 irg_walk_graph( current_ir_graph, get_loop_outs, NULL, NULL );
1337 ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED);
1339 /* duplicate whole loop content */
1340 inc_irg_visited(current_ir_graph);
1342 for (i = 0; i < ARR_LEN(cur_loop_outs); ++i) {
1343 out_edge entry = cur_loop_outs[i];
1344 ir_node *node = entry.node;
1345 ir_node *pred = get_irn_n(entry.node, entry.pred_irn_n);
1347 for (j = 0; j < unroll_times - 1; ++j) {
1348 copy_walk(pred, is_in_loop, cur_loop);
1349 if (is_Block(node)) {
1350 duplicate_preds(node, entry.pred_irn_n, get_copy(pred));
1352 pred = get_copy(pred);
1356 ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED);
1358 /* Line up the floating copies. */
1361 /* Generate phis for all loop outs */
1362 for (i = 0; i < ARR_LEN(cur_loop_outs); ++i) {
1363 out_edge entry = cur_loop_outs[i];
1364 ir_node *node = entry.node;
1365 ir_node *pred = get_irn_n(entry.node, entry.pred_irn_n);
1367 if (!is_Block(node) && !is_End(node)) {
1368 DB((dbg, LEVEL_5, " construct_ssa_n def %N node %N pos %d\n",
1369 pred, node, entry.pred_irn_n));
1370 construct_ssa_n(pred, node);
1374 DEL_ARR_F(cur_loop_outs);
1376 set_irg_doms_inconsistent(current_ir_graph);
1377 set_irg_loopinfo_inconsistent(current_ir_graph);
1378 set_irg_outs_inconsistent(current_ir_graph);
1381 /* Initialization and */
1382 static void init_analyze(ir_loop *loop)
1384 /* Init new for every loop */
1387 loop_cf_head = NULL;
1388 loop_cf_head_valid = 1;
1389 loop_inv_head = NULL;
1390 loop_peeled_head = NULL;
1393 loop_info.calls = 0;
1394 loop_info.loads = 0;
1395 loop_info.blocks = 0;
1397 DB((dbg, LEVEL_2, " >>>> current loop includes node %N <<<\n", get_loop_node(loop, 0)));
1399 irg_walk_graph(current_ir_graph, get_loop_info, NULL, NULL);
1401 /* RETURN if there is no valid head */
1402 if (!loop_cf_head || !loop_cf_head_valid) {
1403 DB((dbg, LEVEL_2, "No valid loop head. Nothing done.\n"));
1410 if (enable_inversion)
1412 if (enable_unrolling)
1416 /* RETURN if there is a call in the loop */
1417 if (loop_info.calls)
1421 DB((dbg, LEVEL_2, " <<<< end of loop with node %N >>>>\n", get_loop_node(loop, 0)));
1424 /* Find most inner loops and send them to analyze_loop */
1425 static void find_most_inner_loop(ir_loop *loop)
1427 /* descend into sons */
1428 int sons = get_loop_n_sons(loop);
1434 el_n = get_loop_n_elements(loop);
1436 for (i=0; i < el_n; ++i) {
1437 elem = get_loop_element(loop, i);
1438 /* We can only rely on the blocks,
1439 * as the loop attribute of the nodes seems not to be set. */
1440 if (is_ir_node(elem.kind) && is_Block(elem.node)) {
1441 ARR_APP1(ir_node *, loops, elem.node);
1442 DB((dbg, LEVEL_5, "Found most inner loop (contains block %+F)\n", elem.node));
1448 for (s=0; s<sons; s++) {
1449 find_most_inner_loop(get_loop_son(loop, s));
1455 * Assure preconditions are met and go through all loops.
1457 void loop_optimization(ir_graph *irg)
1463 link_node_state_list = NULL;
1464 set_current_ir_graph(irg);
1468 assure_irg_outs(irg);
1470 /* NOTE: sets only the loop attribute of blocks, not nodes */
1471 /* NOTE: Kills links */
1472 assure_cf_loop(irg);
1474 ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1475 collect_phiprojs(irg);
1476 ir_free_resources(irg, IR_RESOURCE_IRN_LINK);
1478 /* allocate node_info for additional information on nodes */
1479 ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK);
1480 irg_walk_graph(current_ir_graph, alloc_node_info, NULL, NULL);
1482 loop = get_irg_loop(irg);
1483 sons = get_loop_n_sons(loop);
1485 loops = NEW_ARR_F(ir_node *, 0);
1487 for (nr = 0; nr < sons; ++nr) {
1488 find_most_inner_loop(get_loop_son(loop, nr));
1491 /* TODO Keep backedges during optimization to avoid
1492 * this ugly allocation and deallocation.
1493 * (set_irn_in seems to destroy them)
1496 for (i = 0; i < ARR_LEN(loops); ++i) {
1499 loop = get_irn_loop(loops[i]);
1503 /* This part is useful for testing
1504 * or has to be used if the backedge information is destroyed.
1505 * Which is the case at the moment, because the backedge information gets lost
1506 * before inversion_fix_heads/unrolling_fix_cf, which results in bads.
1507 * NOTE!: Testsuite runs successfully nevertheless...
1511 * assure_cf_loop() creates a completely new loop tree.
1512 * Thus we cannot optimize a loop, assure_cf_loop() and continue with the next loop,
1513 * as the next loop must be searched because it is not distinguishable from the
1514 * already done loops.
1515 * The links of the loops are also not available anymore (to store a "loop done" flag).
1516 * Therefore we save a block per loop.
1517 * NOTE: We rely on the loop optimizations not to remove any block from the loop.
1518 * Later, we fetch the blocks loop attribute, as it is updated by assure_cf_loop.
1520 for (i = 0; i < ARR_LEN(loops); ++i) {
1524 ir_free_resources(irg, IR_RESOURCE_IRN_LINK);
1526 edges_assure(current_ir_graph);
1527 assure_irg_outs(current_ir_graph);
1529 /* NOTE: sets only the loop attribute of blocks */
1530 /* NOTE: Kills links */
1531 assure_cf_loop(current_ir_graph);
1533 irg_walk_graph(current_ir_graph, alloc_node_info, NULL, NULL);
1534 ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK);
1536 /* Get loop from block */
1537 loop = get_irn_loop(loops[i]);
1546 ir_free_resources(irg, IR_RESOURCE_IRN_LINK);
1547 ir_free_resources(irg, IR_RESOURCE_PHI_LIST);
1550 void do_loop_unrolling(ir_graph *irg)
1552 enable_unrolling = 1;
1554 enable_inversion = 0;
1556 DB((dbg, LEVEL_2, " >>> unrolling (Startnode %N) <<<\n",
1557 get_irg_start(irg)));
1559 loop_optimization(irg);
1561 DB((dbg, LEVEL_2, " >>> unrolling done (Startnode %N) <<<\n",
1562 get_irg_start(irg)));
1565 void do_loop_inversion(ir_graph *irg)
1567 enable_unrolling = 0;
1569 enable_inversion = 1;
1571 DB((dbg, LEVEL_2, " >>> inversion (Startnode %N) <<<\n",
1572 get_irg_start(irg)));
1574 loop_optimization(irg);
1576 DB((dbg, LEVEL_2, " >>> inversion done (Startnode %N) <<<\n",
1577 get_irg_start(irg)));
1580 void do_loop_peeling(ir_graph *irg)
1582 enable_unrolling = 0;
1584 enable_inversion = 0;
1586 DB((dbg, LEVEL_2, " >>> peeling (Startnode %N) <<<\n",
1587 get_irg_start(irg)));
1589 loop_optimization(irg);
1591 DB((dbg, LEVEL_2, " >>> peeling done (Startnode %N) <<<\n",
1592 get_irg_start(irg)));
1596 ir_graph_pass_t *loop_inversion_pass(const char *name)
1598 return def_graph_pass(name ? name : "loop_inversion", do_loop_inversion);
1601 ir_graph_pass_t *loop_unroll_pass(const char *name)
1603 return def_graph_pass(name ? name : "loop_unroll", do_loop_unrolling);
1606 ir_graph_pass_t *loop_peeling_pass(const char *name)
1608 return def_graph_pass(name ? name : "loop_peeling", do_loop_peeling);
1611 void firm_init_loop_opt(void)
1613 FIRM_DBG_REGISTER(dbg, "firm.opt.loop");