2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @author Christian Helmer
24 * Loop peeling, loop inversion and loop unrolling
25 * NOTE: Inversion creates abnormal looking loops because there is probably
26 * no head as single loop entry point. Therefore peeling
27 * will do nothing as it relies on the head as single loop entry point.
43 #include "array_t.h" /* automatic array */
44 #include "beutil.h" /* get_block */
45 #include "irloop_t.h" /*set_irn_loop*/
48 /*#include "irdump.h"*/
50 DEBUG_ONLY(static firm_dbg_module_t *dbg);
53 * Convenience macro for iterating over every phi node of the given block.
54 * Requires phi list per block.
56 #define for_each_phi(block, phi) \
57 for ( (phi) = get_Block_phis( (block) ); (phi) ; (phi) = get_Phi_next( (phi) ) )
60 static ir_loop *cur_loop;
62 /* The loop walker should be possible to abort if nothing can be done anymore */
63 typedef unsigned irg_walk_func_abortable(ir_node *, void *);
65 /* condition for breaking a copy_walk */
66 typedef unsigned walker_condition(ir_node *);
68 /* stores node and position of a predecessor */
69 typedef struct out_edges {
74 /* access complex values through the nodes links */
75 typedef struct node_info {
78 ir_node *link; /* temporary links for ssa creation */
79 ir_node **ins; /* ins for phi nodes, during rewiring of blocks */
81 struct node_info *freelistnext; /* linked list to free all node_infos */
84 static node_info *link_node_state_list; /* head of the linked list to free all node_infos */
86 static out_edge *cur_loop_outs; /* A walker may start visiting the current loop with these nodes. */
87 static out_edge *cur_head_outs; /* A walker may start visiting the cur head with these nodes. */
89 static ir_node *loop_cf_head = NULL; /* Loop head node */
90 static unsigned loop_cf_head_valid = 1; /* A loop may have one head, otherwise we do not touch it. */
95 static ir_node *loop_inv_head = NULL;
97 static ir_node *loop_peeled_head = NULL;
99 /* Loop analysis informations */
100 typedef struct loop_info_t {
101 unsigned calls; /* number of calls */
102 unsigned loads; /* number of load nodes */
103 unsigned invariant_loads;
104 unsigned stores; /* number of store nodes */
105 unsigned blocks; /* number of blocks in the loop */
106 unsigned opnodes_n; /* nodes that probably result in an instruction */
107 /* unsigned opnodes_head; */
108 unsigned outs; /* outs without keepalives */
111 /* Information about the current loop */
112 static loop_info_t loop_info;
114 /* A walker may start visiting a condition chain with these nodes. */
115 static out_edge *cond_chain_entries;
117 static unsigned head_inversion_node_count;
118 static unsigned head_inversion_node_limit;
119 static unsigned head_inversion_block_count;
121 static unsigned enable_peeling;
122 static unsigned enable_inversion;
126 * ============= AUXILIARY FUNCTIONS =====================================
131 * Creates object on the heap, and adds it to a linked list to free it later.
133 static node_info *new_node_info(void) {
134 node_info *l = XMALLOCZ(node_info);
135 l->freelistnext = link_node_state_list;
136 link_node_state_list = l;
142 static node_info *get_node_info(ir_node *n)
144 return ((node_info *)get_irn_link(n));
147 /* Allocates a node_info struct for the given node. For use with a walker. */
148 static void alloc_node_info(ir_node *node, void *env)
152 state = new_node_info();
153 set_irn_link(node, (void *)state);
156 static void free_node_info(void)
160 n = link_node_state_list;
162 node_info *next = n->freelistnext;
167 link_node_state_list = NULL;
171 * Use the linked list to reset the reused values of all node_infos
172 * Reset in particular the copy attribute as copy_walk uses it to determine a present copy
174 static void reset_node_infos(void)
177 next = link_node_state_list;
178 while(next->freelistnext) {
179 node_info *cur = next;
180 next = cur->freelistnext;
190 static ir_node *get_copy(ir_node *n)
192 return ((node_info *)get_irn_link(n))->copy;
195 /* Links the node to its copy */
196 static void set_copy(ir_node *n, ir_node *copy)
198 ((node_info *)get_irn_link(n) )->copy = copy;
201 /* Returns 0 if the node or block is not in cur_loop
202 * NOTE: get_irn_loop returns the ir_node loop attribute.
203 * But it seems only to be set correctly to blocks!
204 * Thus, without get_block this function does not work.
206 static unsigned is_in_loop(ir_node *node)
208 /*DB((dbg, LEVEL_5, "node %ld inloop %d\n", node->node_nr, get_irn_loop(get_block(node)) == cur_loop));*/
209 /*DB((dbg, LEVEL_5, "node %ld loop of node %d loop %d\n", node->node_nr, get_irn_loop(get_block(node)), cur_loop)); */
210 return (get_irn_loop(get_block(node)) == cur_loop);
213 /* Returns if the given be is an alien edge. This is the case when the pred is not in the loop. */
214 static unsigned is_alien_edge(ir_node *n, int i)
216 return(!is_in_loop(get_irn_n(n, i)));
219 /* used for block walker */
220 static void reset_block_mark(ir_node *node, void * env)
224 DB((dbg, LEVEL_5, "UNMARK ..."));
225 DB((dbg, LEVEL_5, " UNMARK %ld\n", get_irn_node_nr(node)));
228 set_Block_mark(node, 0);
231 static unsigned is_nodesblock_marked(ir_node* node)
233 return (get_Block_mark(get_block(node)));
236 /* Returns the number of blocks in a loop. */
237 int get_loop_n_blocks(ir_loop *loop) {
240 elements = get_loop_n_elements(loop);
242 for(e=0; e<elements; e++) {
243 loop_element elem = get_loop_element(loop, e);
244 if (is_ir_node(elem.kind) && is_Block(elem.node)) {
245 /*DB((dbg, LEVEL_5, "%ld is block \n", elem.node->node_nr));*/
253 * Add newpred at position pos to node and also add the corresponding value to the phis.
254 * Requires block phi list.
256 static int duplicate_preds(ir_node* node, unsigned pos, ir_node* newpred)
263 assert(is_Block(node) && "duplicate_preds is only allowed for blocks");
265 DB((dbg, LEVEL_5, "duplicate_preds(node %ld, pos %d, newpred %ld)\n",
266 get_irn_node_nr(node), pos, get_irn_node_nr(newpred)));
268 block_arity = get_irn_arity(node);
270 NEW_ARR_A(ir_node*, ins, block_arity + 1);
271 for (i = 0; i < block_arity; ++i)
272 ins[i] = get_irn_n(node, i);
273 ins[block_arity] = newpred;
275 set_irn_in(node, block_arity + 1, ins);
277 for_each_phi(node, phi) {
278 int phi_arity = get_irn_arity(phi);
279 DB((dbg, LEVEL_5, "duplicate_preds: fixing phi %ld\n", get_irn_node_nr(phi)));
281 NEW_ARR_A(ir_node *, ins, block_arity + 1);
282 for (i=0; i < phi_arity; ++i) {
283 DB((dbg, LEVEL_5, "in %ld\n", get_irn_node_nr(get_irn_n(phi, i))));
284 ins[i] = get_irn_n(phi, i);
286 ins[block_arity] = get_irn_n(phi, pos);
287 set_irn_in(phi, block_arity + 1, ins);
289 /* return new position */
293 /* Finds loop head and loop_info */
294 static void get_loop_info(ir_node *node, void *env)
296 unsigned node_in_loop, pred_in_loop;
300 /*DB((dbg, LEVEL_5, "node %+F\n", node));*/
302 get_node_info(node)->done = 1;
304 arity = get_irn_arity(node);
305 for (i = 0; i < arity; i++) {
306 ir_node *pred = get_irn_n(node, i);
308 pred_in_loop = is_in_loop(pred);
309 node_in_loop = is_in_loop(node);
311 /* collect some loop information */
313 /*DB((dbg, LEVEL_1, "--------------------------------- inloop %+F\n", node));*/
320 if (!is_Block(node) && !is_Proj(node) && !is_Phi(node))
321 ++loop_info.opnodes_n;
324 /* Find the loops head/the blocks with cfpred outside of the loop */
325 if (is_Block(node) && node_in_loop && !pred_in_loop && loop_cf_head_valid) {
326 ir_node *cfgpred = get_Block_cfgpred(node, i);
328 if (!is_in_loop(cfgpred)) {
329 DB((dbg, LEVEL_1, "potential head %+F because inloop and pred %+F not inloop\n", node, pred));
330 /* another head? We do not touch this. */
331 if (loop_cf_head && loop_cf_head != node) {
332 loop_cf_head_valid = 0;
341 /* Adds all nodes pointing into the loop to loop_entries and also finds the loops head */
342 static void get_loop_outs(ir_node *node, void *env)
344 unsigned node_in_loop, pred_in_loop;
348 arity = get_irn_arity(node);
349 for (i = 0; i < arity; i++) {
350 ir_node *pred = get_irn_n(node, i);
352 pred_in_loop = is_in_loop(pred);
353 node_in_loop = is_in_loop(node);
355 if (pred_in_loop && !node_in_loop) {
358 entry.pred_irn_n = i;
359 ARR_APP1(out_edge, cur_loop_outs, entry);
360 if (node != get_irg_end(current_ir_graph))
367 * Finds invariant loads and marks them as invariant.
368 * (has to be post walk)
370 static unsigned get_invariants(ir_node *node, void *env)
373 int arity = get_irn_arity(node);
376 /* RETURN, no preds to visit */
377 if (arity == 0) return 0;
380 ir_node *pred = get_Load_ptr(node);
381 if ( (get_Load_volatility(node) == volatility_non_volatile) &
385 || get_node_info(node)->invariant ) ) {
386 get_node_info(node)->invariant = 1;
387 ++loop_info.invariant_loads;
390 get_node_info(node)->invariant = 0;
395 /* find loop variant preds */
396 for(i = 0; i < arity; ++i) {
397 ir_node *pred = get_irn_n(node, i);
399 if ( is_in_loop(pred) /* outside loop is loop invariant */
400 && !is_Const(pred) /* constants */
401 && !is_SymConst(pred) /* SymConst */
402 && !get_node_info(node)->invariant ) { /* pred is marked as invariant */
408 get_node_info(node)->invariant = 1;
410 get_node_info(node)->invariant = 0;
416 static ir_node *ssa_second_def;
417 static ir_node *ssa_second_def_block;
420 * Walks the graph bottom up, searching for definitions and create phis.
421 * (Does not handle the special case where the second definition is in the block of the user
422 * of the original definition because it is not necessary here.)
424 static ir_node *search_def_and_create_phis(ir_node *block, ir_mode *mode)
432 DB((dbg, LEVEL_5, "ssa search_def_and_create_phis: block %ld\n", get_irn_node_nr(block)));
434 /* Prevents creation of phi that would be bad anyway.
435 * Dead and bad blocks. */
436 if (get_irn_arity(block) < 1 || is_Bad(block))
439 if (block == ssa_second_def_block) {
440 DB((dbg, LEVEL_5, "ssa found second definition: use second def %ld\n", get_irn_node_nr(ssa_second_def)));
441 return ssa_second_def;
444 /* already processed this block? */
445 if (irn_visited(block)) {
446 ir_node *value = get_node_info(block)->link;
447 DB((dbg, LEVEL_5, "ssa already visited: use linked %ld\n", get_irn_node_nr(value)));
451 irg = get_irn_irg(block);
452 assert(block != get_irg_start_block(irg));
454 /* a Block with only 1 predecessor needs no Phi */
455 n_cfgpreds = get_Block_n_cfgpreds(block);
456 if (n_cfgpreds == 1) {
457 ir_node *pred_block = get_Block_cfgpred_block(block, 0);
460 DB((dbg, LEVEL_5, "ssa 1 pred: walk pred %ld\n", get_irn_node_nr(pred_block)));
462 value = search_def_and_create_phis(pred_block, mode);
463 get_node_info(block)->link = value;
464 mark_irn_visited(block);
469 /* create a new Phi */
470 NEW_ARR_A(ir_node*, in, n_cfgpreds);
471 for(i = 0; i < n_cfgpreds; ++i)
472 in[i] = new_Unknown(mode);
474 phi = new_r_Phi(block, n_cfgpreds, in, mode);
476 /* Important: always keep block phi list up to date. */
477 add_Block_phi(block, phi);
478 /* EVERY node is assumed to have a node_info linked. */
479 alloc_node_info(phi, NULL);
481 DB((dbg, LEVEL_5, "ssa phi creation: link new phi %ld to block %ld\n", get_irn_node_nr(phi), get_irn_node_nr(block)));
483 get_node_info(block)->link = phi;
484 mark_irn_visited(block);
486 /* set Phi predecessors */
487 for(i = 0; i < n_cfgpreds; ++i) {
488 ir_node *pred_block = get_Block_cfgpred_block(block, i);
489 ir_node *pred_val = search_def_and_create_phis(pred_block, mode);
490 DB((dbg, LEVEL_5, "ssa phi pred:phi %ld, pred %ld\n", get_irn_node_nr(phi), get_irn_node_nr(pred_val)));
491 set_irn_n(phi, i, pred_val);
497 * Given a set of values this function constructs SSA-form for the users of the
498 * first value (the users are determined through the out-edges of the value).
499 * Uses the irn_visited flags. Works without using the dominance tree.
501 static void construct_ssa(ir_node *orig_block, ir_node *orig_val,
502 ir_node *second_block, ir_node *second_val)
506 const ir_edge_t *edge;
507 const ir_edge_t *next;
509 assert(orig_block && orig_val && second_block && second_val &&
510 "no parameter of construct_ssa may be NULL");
512 /* no need to do anything */
513 if (orig_val == second_val)
516 irg = get_irn_irg(orig_val);
518 ir_reserve_resources(irg, IR_RESOURCE_IRN_VISITED);
519 inc_irg_visited(irg);
521 mode = get_irn_mode(orig_val);
522 get_node_info(orig_block)->link = orig_val;
523 mark_irn_visited(orig_block);
525 ssa_second_def_block = second_block;
526 ssa_second_def = second_val;
528 /* Only fix the users of the first, i.e. the original node */
529 foreach_out_edge_safe(orig_val, edge, next) {
530 ir_node *user = get_edge_src_irn(edge);
531 int j = get_edge_src_pos(edge);
532 ir_node *user_block = get_nodes_block(user);
539 DB((dbg, LEVEL_5, "original user %ld\n", get_irn_node_nr(user)));
542 ir_node *pred_block = get_Block_cfgpred_block(user_block, j);
543 newval = search_def_and_create_phis(pred_block, mode);
545 newval = search_def_and_create_phis(user_block, mode);
548 /* If we get a bad node the user keeps the original in. No second definition needed. */
549 if (newval != user && !is_Bad(newval))
550 set_irn_n(user, j, newval);
553 ir_free_resources(irg, IR_RESOURCE_IRN_VISITED);
556 /* get the number of backedges without alien bes */
557 static int get_backedge_n(ir_node *loophead, unsigned with_alien)
561 int arity = get_irn_arity(loophead);
562 for (i = 0; i < arity; ++i) {
563 ir_node *pred = get_irn_n(loophead, i);
564 if (is_backedge(loophead, i) && ( with_alien || is_in_loop(pred)) )
571 * Sets the nodes backedges, according to its predecessors link.
573 static void fix_backedge_info(ir_node *node)
576 for (i = 0; i < get_irn_arity(node); ++i) {
577 ir_node *pred = get_irn_n(node, i);
578 if (get_node_info(pred)->link != NULL) {
579 set_backedge(node, i);
580 get_node_info(pred)->link = NULL;
581 DB((dbg, LEVEL_5, "fix backedge: node %ld pred %ld is backedge\n",
582 get_irn_node_nr(node), get_irn_node_nr(pred)));
584 set_not_backedge(node, i);
585 DB((dbg, LEVEL_5, "fix backedge: node %ld pred %ld is not backedge\n",
586 get_irn_node_nr(node), get_irn_node_nr(pred)));
593 * ============= PEELING =====================================
598 * Rewires the heads after peeling.
600 static void peel_fix_heads(void)
602 ir_node **loopheadnins, **peelheadnins;
603 ir_node *loophead = loop_cf_head;
604 ir_node *peelhead = get_copy(loophead);
606 int headarity = get_irn_arity(loophead);
613 int backedges_n = get_backedge_n(loophead, 0);
615 int lhead_arity = 2 * backedges_n;
616 int phead_arity = headarity - backedges_n;
619 NEW_ARR_A(ir_node *, loopheadnins, lhead_arity );
620 NEW_ARR_A(ir_node *, peelheadnins, phead_arity );
622 for_each_phi(loophead, phi) {
623 NEW_ARR_A(ir_node *, get_node_info(phi)->ins, lhead_arity);
625 for_each_phi(peelhead, phi) {
626 NEW_ARR_A(ir_node *, get_node_info(phi)->ins, phead_arity);
629 for (i = 0; i < headarity; i++)
631 ir_node *orgjmp = get_irn_n(loophead, i);
632 ir_node *copyjmp = get_copy(orgjmp);
635 * Rewire the head blocks ins and their phi ins.
636 * Requires phi list per block.
638 if (is_backedge(loophead, i) && !is_alien_edge(loophead, i)) {
639 loopheadnins[lheadin_c] = orgjmp;
640 /* marks out edge as backedge */
641 get_node_info(orgjmp)->link = orgjmp;
642 for_each_phi(loophead, phi) {
643 get_node_info( phi )->ins[lheadin_c] = get_irn_n( phi, i) ;
647 loopheadnins[lheadin_c] = copyjmp; /* former bes of the peeled code origin now from the loophead */
648 /* marks out edge as normal edge */
649 get_node_info(copyjmp)->link = NULL;
650 /* get_irn_n( get_copy_of(phi), i) <!=> get_copy_of(get_irn_n( phi, i))
651 * Order is crucial! Predecessors outside of the loop are non existent.
652 * The copy (cloned with its ins!) has pred i,
653 * but phis pred i might not have a copy of itself.
655 for_each_phi(loophead, phi) {
656 get_node_info( phi )->ins[lheadin_c] = get_irn_n( get_copy(phi), i) ;
660 peelheadnins[pheadin_c] = orgjmp;
661 /* marks out edge as normal edge */
662 get_node_info(orgjmp)->link = NULL;
663 for_each_phi(peelhead, phi) {
664 get_node_info( phi )->ins[pheadin_c] = get_irn_n(phi, i);
670 assert(pheadin_c == ARR_LEN(peelheadnins) &&
671 lheadin_c == ARR_LEN(loopheadnins) &&
672 "the constructed head arities do not match the predefined arities");
675 * assign the ins to the nodes
677 set_irn_in(loophead, ARR_LEN(loopheadnins), loopheadnins);
678 set_irn_in(peelhead, ARR_LEN(peelheadnins), peelheadnins);
680 /* Fixes the backedge information according to the link.
681 * Following loop optimizations might depend on it. */
682 fix_backedge_info(loophead);
683 fix_backedge_info(peelhead);
685 for_each_phi(loophead, phi) {
686 ir_node **ins = get_node_info( phi )->ins;
687 set_irn_in(phi, lhead_arity, ins);
690 for_each_phi(peelhead, phi) {
691 ir_node **ins = get_node_info( phi )->ins;
692 set_irn_in(phi, phead_arity, ins);
697 * Create a raw copy (ins are still the old ones) of the given node.
699 static ir_node *rawcopy_node(ir_node *node)
704 cp = exact_copy(node);
706 cpstate = new_node_info();
707 set_irn_link(cp, cpstate);
708 mark_irn_visited(cp);
713 * This walker copies all walked nodes.
714 * If the walk_condition is true for a node, it is walked.
715 * All nodes node_info->copy attributes has to be NULL prior to every to every walk.
717 static void copy_walk(ir_node *node, walker_condition *walk_condition, ir_loop *set_loop)
723 ir_graph *irg = current_ir_graph;
724 node_info *node_info = get_node_info(node);
727 * break condition and cycle resolver, creating temporary node copies
729 if (get_irn_visited(node) >= get_irg_visited(irg)) {
730 /* Here we rely on nodestate's copy being initialized with NULL */
731 DB((dbg, LEVEL_5, "copy_walk: We have already visited %ld\n", get_irn_node_nr(node)));
732 if (node_info->copy == NULL) {
733 /* if (!is_Const(node) && !is_SymConst(node)) { */
734 cp = rawcopy_node(node);
737 node_info->copy = cp;
739 DB((dbg, LEVEL_5, "The TEMP copy of %ld is created %ld\n", get_irn_node_nr(node), get_irn_node_nr(cp)));
745 mark_irn_visited(node);
747 if (!is_Block(node)) {
748 ir_node *pred = get_nodes_block(node);
749 if (walk_condition(pred))
750 DB((dbg, LEVEL_5, "walk block %ld\n", get_irn_node_nr(pred)));
751 copy_walk(pred, walk_condition, set_loop);
754 arity = get_irn_arity(node);
756 NEW_ARR_A(ir_node *, cpin, arity);
758 for (i = get_irn_arity(node) - 1; i >= 0; --i) {
759 ir_node *pred = get_irn_n(node, i);
761 if (walk_condition(pred)) {
762 DB((dbg, LEVEL_5, "walk node %ld\n", get_irn_node_nr(pred)));
763 copy_walk(pred, walk_condition, set_loop);
764 cpin[i] = get_copy(pred);
765 DB((dbg, LEVEL_5, "copy of %ld gets new in %ld which is copy of %ld\n",
766 get_irn_node_nr(node), get_irn_node_nr(get_copy(pred)), get_irn_node_nr(pred)));
772 /* copy node / finalize temp node */
773 if (node_info->copy == NULL) {
774 /* No temporary copy existent */
776 /* Do not copy constants TODO right? */
777 /* if (!is_Const(node) && !is_SymConst(node)) { */
778 cp = rawcopy_node(node);
781 node_info->copy = cp;
783 DB((dbg, LEVEL_5, "The FINAL copy of %ld is CREATED %ld\n", get_irn_node_nr(node), get_irn_node_nr(cp)));
785 /* temporary copy is existent but without correct ins */
787 DB((dbg, LEVEL_5, "The FINAL copy of %ld is EXISTENT %ld\n", get_irn_node_nr(node), get_irn_node_nr(cp)));
790 if (!is_Block(node)) {
791 ir_node *cpblock = get_copy(get_nodes_block(node));
793 set_nodes_block(cp, cpblock );
794 /* fix the phi information in attr.phis */
796 add_Block_phi(cpblock, cp);
798 /* macroblock info has not been copied */
799 set_Block_MacroBlock(cp, cp);
802 set_irn_loop(cp, set_loop);
803 set_irn_in(cp, ARR_LEN(cpin), cpin);
806 /* Loop peeling, and fix the cf for the loop entry nodes, which have now more preds */
807 static void peel(out_edge *loop_outs)
810 ir_node **entry_buffer;
813 ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED);
815 NEW_ARR_A(ir_node *, entry_buffer, ARR_LEN(loop_outs));
817 /* duplicate loop walk */
818 inc_irg_visited(current_ir_graph);
820 for(i = 0; i < ARR_LEN(loop_outs); i++) {
821 out_edge entry = loop_outs[i];
822 ir_node *node = entry.node;
823 ir_node *pred = get_irn_n(entry.node, entry.pred_irn_n);
825 if (is_Block(node)) {
826 copy_walk(pred, is_in_loop, NULL);
827 duplicate_preds(node, entry.pred_irn_n, get_copy(pred) );
829 copy_walk(pred, is_in_loop, NULL);
830 if (!is_End(node)) /* leave out keepalives */
831 /* Node is user of a value defined inside the loop.
832 * We'll need a phi since we duplicated the loop. */
833 /* cannot construct_ssa here, because it needs another walker */
834 entry_buffer[entry_c++] = pred;
838 ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED);
840 /* Rewires the 2 heads */
843 /* Generate phis for values from peeled code and original loop */
844 for(i = 0; i < entry_c; i++)
846 ir_node *cppred, *block, *cpblock, *pred;
848 /* It is not possible to use
849 * pred = get_irn_n(entry.node, entry.pred_irn_n);
850 * because we might have changed the nodes predecessors in construct_ssa
852 pred = entry_buffer[i];
853 cppred = get_copy(pred);
854 block = get_nodes_block(pred);
855 cpblock = get_nodes_block(cppred);
856 construct_ssa(block, pred, cpblock, cppred);
861 * Populates head_entries with (node, pred_pos) tuple
862 * whereas the node's pred at pred_pos is in the head but not the node itself.
863 * Head and condition chain blocks must be marked.
865 static void get_head_outs(ir_node *node, void *env)
868 int arity = get_irn_arity(node);
871 DB((dbg, LEVEL_5, "get head entries %ld \n", get_irn_node_nr(node)));
873 for(i = 0; i < arity; ++i) {
874 /* node is not in the head, but the predecessor is.
875 * (head or loop chain nodes are marked) */
878 DB((dbg, LEVEL_5, "... "));
879 DB((dbg, LEVEL_5, "node %ld marked %d (0) pred %d marked %d (1) \n",
880 node->node_nr, is_nodesblock_marked(node),i, is_nodesblock_marked(get_irn_n(node, i))));
883 if (!is_nodesblock_marked(node) && is_nodesblock_marked(get_irn_n(node, i))) {
886 entry.pred_irn_n = i;
888 "Found head chain entry %ld @%d because !inloop %ld and inloop %ld\n",
889 get_irn_node_nr(node), i, get_irn_node_nr(node), get_irn_node_nr(get_irn_n(node, i))));
890 ARR_APP1(out_edge, cur_head_outs, entry);
896 * Find condition chains, and add them to be inverted, until the node count exceeds the limit.
897 * A block belongs to the chain if a condition branches out of the loop.
898 * Returns if the given block belongs to the condition chain.
899 * TODO do not (or do?) invert, if all blocks have loop outs.
901 static unsigned find_condition_chains(ir_node *block) {
902 const ir_edge_t *edge;
906 DB((dbg, LEVEL_1, "condition_chains for block %ld\n", get_irn_node_nr(block)));
908 /* we need all outs, including keeps (TODO firm function for that??) */
909 foreach_out_edge_kind(block, edge, EDGE_KIND_NORMAL) {
913 /* We do not want to collect more nodes from condition chains, than the limit allows us to.
914 * Also, leave at least one block as body. */
915 if (head_inversion_node_count + nodes_n > head_inversion_node_limit
916 || head_inversion_block_count + 1 == loop_info.blocks) {
917 set_Block_mark(block, 0);
918 DB((dbg, LEVEL_1, "block %ld over limit or no blocks to invert\n",
919 get_irn_node_nr(block)));
923 /* First: check our successors, and add all succs that are outside of the loop to the list */
924 foreach_block_succ(block, edge) {
925 ir_node *src = get_edge_src_irn( edge );
926 int pos = get_edge_src_pos( edge );
928 if (!is_in_loop(src)) {
930 /*printf(" src %ld @ %d into block %ld \n", src->node_nr, pos, block->node_nr);*/
934 entry.pred_irn_n = pos;
935 ARR_APP1(out_edge, cond_chain_entries, entry);
936 mark_irn_visited(src);
940 /* this block is not part of the chain,
941 * because the chain would become too big or we have no succ outside of the loop */
943 set_Block_mark(block, 0);
946 set_Block_mark(block, 1);
947 ++head_inversion_block_count;
948 DB((dbg, LEVEL_1, "block %ld is part of condition chain\n", get_irn_node_nr(block)));
949 head_inversion_node_count += nodes_n;
952 /* Second: walk all successors, and add them to the list if they are not part of the chain */
953 foreach_block_succ(block, edge) {
955 ir_node *src = get_edge_src_irn( edge );
956 int pos = get_edge_src_pos( edge );
958 /* already done cases */
959 if (!is_in_loop( src ) || (get_irn_visited(src) >= get_irg_visited(current_ir_graph))) {
963 mark_irn_visited(src);
964 DB((dbg, LEVEL_5, "condition chain walk %ld\n", get_irn_node_nr(src)));
965 inchain = find_condition_chains( src );
967 /* if successor is not part of chain we need to collect its outs */
971 entry.pred_irn_n = pos;
972 ARR_APP1(out_edge, cond_chain_entries, entry);
980 * Loop inversion, peeling, unrolling.
982 static void inversion_fix_heads(void)
984 ir_node **loopheadnins, **invheadnins;
985 ir_node *loophead = loop_cf_head;
986 ir_node *invhead = get_copy(loophead);
988 int headarity = get_irn_arity(loophead);
995 int backedges_n = get_backedge_n(loophead, 0);
996 int lhead_arity = backedges_n;
997 int ihead_arity = headarity - backedges_n;
999 /* new in arrays for all phis in the head blocks */
1000 NEW_ARR_A(ir_node *, loopheadnins, lhead_arity);
1001 NEW_ARR_A(ir_node *, invheadnins, ihead_arity);
1003 for_each_phi(loophead, phi) {
1004 NEW_ARR_A(ir_node *, get_node_info(phi)->ins, lhead_arity);
1006 for_each_phi(invhead, phi) {
1007 NEW_ARR_A(ir_node *, get_node_info(phi)->ins, ihead_arity);
1010 for (i = 0; i < headarity; i++) {
1011 ir_node *pred = get_irn_n(loophead, i);
1014 * Rewire the head blocks ins and their phi ins.
1015 * Requires phi list per block.
1017 if (is_backedge(loophead, i) && !is_alien_edge(loophead, i)) {
1018 /* just copy these edges */
1019 loopheadnins[lheadin_c] = pred;
1020 get_node_info(pred)->link = NULL; /*(void *)is_backedge(loophead, i);*/
1021 for_each_phi(loophead, phi) {
1022 get_node_info(phi)->ins[lheadin_c] = get_irn_n(phi, i);
1026 invheadnins[iheadin_c] = pred;
1027 get_node_info(pred)->link = (void *)1;
1028 for_each_phi(invhead, phi) {
1029 get_node_info(phi)->ins[iheadin_c] = get_irn_n(phi, i) ;
1035 /* assign the ins to the head blocks */
1036 set_irn_in(loophead, ARR_LEN(loopheadnins), loopheadnins);
1037 set_irn_in(invhead, ARR_LEN(invheadnins), invheadnins);
1039 /* FIX and set former be to normal edges */
1040 fix_backedge_info(loophead);
1041 fix_backedge_info(invhead);
1043 /* assign the ins for the phis */
1044 for_each_phi(loophead, phi) {
1045 ir_node **ins = get_node_info(phi)->ins;
1046 set_irn_in(phi, lhead_arity, ins);
1049 for_each_phi(invhead, phi) {
1050 ir_node **ins = get_node_info(phi)->ins;
1051 set_irn_in(phi, ihead_arity, ins);
1055 static void inversion_walk(out_edge *head_entries)
1060 ir_node **entry_buffer;
1061 ir_node **head_phi_assign;
1063 NEW_ARR_A(ir_node *, entry_buffer, ARR_LEN(head_entries));
1065 head_phi_assign = NEW_ARR_F(ir_node *, 0);
1067 /* Find assignments in the condition chain, to construct_ssa for them after the loop inversion. */
1068 for_each_phi(loop_cf_head , phi) {
1069 for(i=0; i<get_irn_arity(phi); ++i) {
1070 ir_node *def = get_irn_n(phi, i);
1071 if (is_nodesblock_marked(def)) {
1072 ARR_APP1(ir_node *, head_phi_assign, def);
1077 ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED);
1080 * duplicate condition chain
1082 inc_irg_visited(current_ir_graph);
1084 for(i = 0; i < ARR_LEN(head_entries); ++i) {
1085 out_edge entry = head_entries[i];
1086 ir_node *node = entry.node;
1087 ir_node *pred = get_irn_n(entry.node, entry.pred_irn_n);
1089 if (is_Block(node)) {
1090 DB((dbg, LEVEL_5, "\nINIT walk block %ld\n", get_irn_node_nr(pred)));
1091 copy_walk(pred, is_nodesblock_marked, cur_loop);
1092 duplicate_preds(node, entry.pred_irn_n, get_copy(pred) );
1094 DB((dbg, LEVEL_5, "\nInit walk node %ld\n", get_irn_node_nr(pred)));
1095 copy_walk(pred, is_nodesblock_marked, cur_loop);
1097 /* ignore keepalives */
1099 /* Node is user of a value assigned inside the loop.
1100 * We will need a phi since we duplicated the head. */
1101 entry_buffer[entry_c++] = pred;
1105 ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED);
1107 inversion_fix_heads();
1109 /* Generate phis for users of values assigned in the condition chain and read in the loops body */
1110 for(i = 0; i < entry_c; i++) {
1111 ir_node *cppred, *block, *cpblock, *pred;
1113 /* It is not possible to use
1114 * pred = get_irn_n(entry.node, entry.pred_irn_n);
1115 * because we might have changed the nodes predecessors in construct_ssa
1117 pred = entry_buffer[i];
1118 cppred = get_copy(pred);
1119 block = get_nodes_block(pred);
1120 cpblock = get_nodes_block(cppred);
1122 "construct_ssa (loop out value) original %ld and clone %ld\n",
1123 get_irn_node_nr(pred), get_irn_node_nr(cppred)));
1124 construct_ssa(block, pred, cpblock, cppred);
1127 /* Generate phis for values that are assigned in the condition chain
1128 * but not read in the loops body.
1130 for(i = 0; i < ARR_LEN(head_phi_assign); ++i) {
1131 ir_node *def_block, *inhead_phi_def, *inv_def_block, *inv_inhead_phi_def;
1133 /* Note: construct_ssa only fixes the FIRST nodes usage. */
1134 inhead_phi_def = head_phi_assign[i];
1135 inv_inhead_phi_def = get_copy(inhead_phi_def);
1136 def_block = get_nodes_block(inhead_phi_def);
1137 inv_def_block = get_nodes_block(inv_inhead_phi_def);
1139 "construct_ssa (condition chain out values) original %ld and clone %ld\n",
1140 get_irn_node_nr(inv_inhead_phi_def), get_irn_node_nr(inhead_phi_def)));
1141 /*construct_ssa(inv_def_block, inv_inhead_phi_def, def_block, inhead_phi_def);*/
1142 construct_ssa(def_block, inhead_phi_def, inv_def_block, inv_inhead_phi_def);
1145 loop_cf_head = get_copy(loop_cf_head);
1150 void loop_peeling(void)
1152 cur_loop_outs = NEW_ARR_F(out_edge, 0);
1153 irg_walk_graph( current_ir_graph, get_loop_outs, NULL, NULL );
1155 DB((dbg, LEVEL_3, "is endless loop: %d (no outs but keepalives)\n", loop_info.outs == 0));
1157 (void) get_invariants;
1160 /*inc_irg_visited(current_ir_graph);*/
1161 loop_walker( loop_outs, NULL, get_invariants, NULL );
1163 /* This could be improved with knowledge about variable range. */
1164 if (loop_info.stores == 0 && loop_info.invariant_loads > 0)
1168 peel(cur_loop_outs);
1170 /* DEBUG_ONLY(dump_ir_block_graph(current_ir_graph, "-peeled1"));*/
1175 set_irg_doms_inconsistent(current_ir_graph);
1177 set_irg_loopinfo_inconsistent(current_ir_graph);
1178 /* TODO Are they really inconsistent (ins set with firm functions)? */
1179 set_irg_outs_inconsistent(current_ir_graph);
1181 DEL_ARR_F(cur_loop_outs);
1184 /* Loop inversion */
1185 void loop_inversion(void)
1187 unsigned do_inversion = 1;
1189 head_inversion_node_limit = 13371337;
1192 /* unsigned max_loop_opnodes = 2000000; */
1193 /*unsigned max_opnodes = 1337;*/
1195 /* Loop complexity too high */
1196 if (loop_info.opnodes_n > max_opnodes)
1201 /* Search for condition chains. */
1202 ir_reserve_resources(current_ir_graph, IR_RESOURCE_BLOCK_MARK);
1204 /* Did not work as expected */
1205 /* irg_block_walk_graph(current_ir_graph, reset_block_mark, NULL, NULL); */
1207 irg_walk_graph(current_ir_graph, reset_block_mark, NULL, NULL);
1209 loop_info.blocks = get_loop_n_blocks(cur_loop);
1210 cond_chain_entries = NEW_ARR_F(out_edge, 0);
1212 head_inversion_node_count = 0;
1213 head_inversion_block_count = 0;
1215 set_Block_mark(loop_cf_head, 1);
1216 mark_irn_visited(loop_cf_head);
1217 inc_irg_visited(current_ir_graph);
1219 find_condition_chains(loop_cf_head);
1221 /* TODO assume number of phis to be created. prevent inversion in case ...*/
1223 DB((dbg, LEVEL_1, "Loop contains %d blocks.\n", loop_info.blocks));
1224 if (loop_info.blocks < 2) {
1226 DB((dbg, LEVEL_1, "Loop contains %d (less than 2) blocks => No Inversion done.\n", loop_info.blocks));
1229 /* We catch endless loops here too,
1230 * because they do not have a condition chain. */
1231 if (head_inversion_block_count < 1) {
1233 DB((dbg, LEVEL_1, "Loop contains %d (less than 1) invertible blocks => No Inversion done.\n", head_inversion_block_count));
1237 cur_head_outs = NEW_ARR_F(out_edge, 0);
1239 /* get all edges pointing into the head or condition chain */
1240 irg_walk_graph(current_ir_graph, get_head_outs, NULL, NULL);
1241 inversion_walk(cur_head_outs);
1243 DEL_ARR_F(cur_head_outs);
1245 set_irg_doms_inconsistent(current_ir_graph);
1247 set_irg_loopinfo_inconsistent(current_ir_graph);
1248 /* TODO Are they really inconsistent (ins set with firm functions)? */
1249 set_irg_outs_inconsistent(current_ir_graph);
1253 DEL_ARR_F(cond_chain_entries);
1254 ir_free_resources(current_ir_graph, IR_RESOURCE_BLOCK_MARK);
1258 static void init_analyze(ir_loop *loop)
1260 /* Init new for every loop */
1263 loop_cf_head = NULL;
1264 loop_cf_head_valid = 1;
1265 loop_inv_head = NULL;
1266 loop_peeled_head = NULL;
1268 loop_info.calls = 0;
1269 loop_info.invariant_loads = 0;
1270 loop_info.loads = 0;
1271 loop_info.stores = 0;
1272 loop_info.opnodes_n = 0;
1273 loop_info.blocks = 0;
1276 DB((dbg, LEVEL_1, " >>>> current loop includes node %ld <<<\n", get_irn_node_nr(get_loop_node(loop, 0))));
1278 irg_walk_graph(current_ir_graph, get_loop_info, NULL, NULL);
1280 /* RETURN if there is no valid head */
1281 if (!loop_cf_head || !loop_cf_head_valid) {
1282 DB((dbg, LEVEL_1, "\n**************************************************\n"));
1283 DB((dbg, LEVEL_1, "* No valid loop head. Nothing done. *\n"));
1284 DB((dbg, LEVEL_1, "**************************************************\n"));
1290 if (enable_inversion)
1293 /* TODO uncomment lat0r */
1295 /* RETURN if there is a call in the loop */
1296 if (loop_info.calls)
1300 DB((dbg, LEVEL_1, " <<<< end of loop with node %ld >>>>\n", get_irn_node_nr(get_loop_node(loop, 0))));
1303 /* Find most inner loops and send them to analyze_loop */
1304 static void find_most_inner_loop(ir_loop *loop)
1306 /* descend into sons */
1307 int sons = get_loop_n_sons(loop);
1309 /*DBG((dbg, LEVEL_5, "current loop head %ld is done %d\n",
1310 * get_loop_node(loop, 0)->node_nr, get_node_info(get_loop_node(loop, 0))->done );*/
1316 el_n = get_loop_n_elements(loop);
1318 for (i=0; i < el_n; ++i)
1320 elem = get_loop_element(loop, i);
1321 /* We can only rely on the blocks,
1322 * as the loop attribute of the nodes seems not to be set. */
1323 if (is_ir_node(elem.kind) && is_Block(elem.node)) {
1324 ARR_APP1(ir_node *, loops, elem.node);
1325 DB((dbg, LEVEL_1, "Found most inner loop (contains block %+F)\n", elem.node));
1329 /*init_analyze(loop);
1333 for(s=0; s<sons; s++) {
1334 find_most_inner_loop(get_loop_son(loop, s));
1343 void loop_optimization(ir_graph *irg)
1349 link_node_state_list = NULL;
1350 set_current_ir_graph(irg);
1354 assure_irg_outs(irg);
1355 /* NOTE: sets only the loop attribute of blocks, not nodes */
1356 /* NOTE: Kills links */
1357 assure_cf_loop(irg);
1359 ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1360 collect_phiprojs(irg);
1361 ir_free_resources(irg, IR_RESOURCE_IRN_LINK);
1363 /* allocate node_info for additional information on nodes */
1364 ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK);
1365 /* irg_walk_graph(current_ir_graph, alloc_node_info, NULL, NULL); */
1367 loop = get_irg_loop(irg);
1368 sons = get_loop_n_sons(loop);
1371 * assure_cf_loop() creates a completely new loop tree every time.
1372 * Thus we cannot optimize the loop, assure_cf_loop and continue with the next loop,
1373 * as the next loop must be searched with a new search and is not distinguishable from the
1374 * already done loop.
1375 * The links of the loops are also not available anymore (to store a "loop done" flag).
1376 * Therefore we save a block per loop.
1377 * We rely on the loop optimizations not to remove any block from the loop.
1378 * Later, we fetch the blocks loop attribute, as it is updated by assure_cf_loop.
1379 * TODO Try to not destroy these informations.
1381 loops = NEW_ARR_F(ir_node *, 0);
1383 for (nr = 0; nr < sons; ++nr) {
1384 find_most_inner_loop(get_loop_son(loop, nr));
1387 for (i = 0; i < ARR_LEN(loops); ++i) {
1390 /* not that efficient */
1392 ir_free_resources(irg, IR_RESOURCE_IRN_LINK);
1395 edges_assure(current_ir_graph);
1396 assure_irg_outs(current_ir_graph);
1398 /* NOTE: sets only the loop attribute of blocks */
1399 /* NOTE: Kills links */
1400 /* TODO Try not to destroy backedge info. */
1401 assure_cf_loop(current_ir_graph);
1403 /*construct_backedges(current_ir_graph);*/
1405 /* not that efficient either... */
1406 irg_walk_graph(current_ir_graph, alloc_node_info, NULL, NULL);
1407 ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK);
1409 loop = get_irn_loop(loops[i]);
1417 ir_free_resources(irg, IR_RESOURCE_IRN_LINK);
1418 ir_free_resources(irg, IR_RESOURCE_PHI_LIST);
1421 void do_loop_inversion(ir_graph *irg)
1424 enable_inversion = 1;
1426 DB((dbg, LEVEL_1, " >>> inversion (Startnode %ld) <<<\n",
1427 get_irn_node_nr(get_irg_start(irg))));
1429 loop_optimization(irg);
1431 DB((dbg, LEVEL_1, " >>> inversion done (Startnode %ld) <<<\n",
1432 get_irn_node_nr(get_irg_start(irg))));
1435 void do_loop_peeling(ir_graph *irg)
1438 enable_inversion = 0;
1442 DB((dbg, LEVEL_1, " >>> peeling is disabled atm. <<<\n"));
1445 DB((dbg, LEVEL_1, " >>> peeling (Startnode %ld) <<<\n",
1446 get_irn_node_nr(get_irg_start(irg))));
1448 loop_optimization(irg);
1450 DB((dbg, LEVEL_1, " >>> peeling done (Startnode %ld) <<<\n",
1451 get_irn_node_nr(get_irg_start(irg))));
1455 void firm_init_loop_opt(void)
1457 FIRM_DBG_REGISTER(dbg, "firm.opt.loop");