2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @author Christian Helmer
23 * @brief loop inversion and loop unrolling
30 #include "iroptimize.h"
48 #include "irbackedge_t.h"
49 #include "irphase_t.h"
53 DEBUG_ONLY(static firm_dbg_module_t *dbg);
56 * Convenience macro for iterating over every phi node of the given block.
57 * Requires phi list per block.
59 #define for_each_phi(block, phi) \
60 for ((phi) = get_Block_phis( (block) ); (phi) ; (phi) = get_Phi_next((phi)))
62 #define for_each_phi_safe(head, phi, next) \
63 for ((phi) = (head), (next) = (head) ? get_Phi_next((head)) : NULL; \
64 (phi) ; (phi) = (next), (next) = (next) ? get_Phi_next((next)) : NULL)
66 /* currently processed loop */
67 static ir_loop *cur_loop;
69 /* Condition for performing copy_walk. */
70 typedef unsigned walker_condition(ir_node *);
72 /* Node and position of a predecessor */
73 typedef struct out_edge {
79 typedef struct inversion_node_info {
82 struct inversion_node_info *free_list_next; /* linked list to free all node_infos */
83 } inversion_node_info;
86 typedef struct node_info {
89 struct ir_node *free_list_next; /* linked list to free all node_infos */
92 /* Head of the list for freeing node_infos */
93 static ir_node *free_list;
95 /* A walker may start visiting the current loop with these nodes. */
96 static out_edge *cur_loop_outs;
98 /* Outs of the nodes head. */
99 static out_edge *cur_head_outs;
101 /* Information about the loop head */
102 static ir_node *loop_head = NULL;
103 static unsigned loop_head_valid = 1;
105 /* List of all inner loops, that are processed. */
106 static ir_loop **loops;
109 static ir_node *loop_inv_head = NULL;
111 static ir_node *loop_peeled_head = NULL;
113 /* Loop analysis informations */
114 typedef struct loop_info_t {
115 unsigned blocks; /* number of blocks in the loop */
116 unsigned calls; /* number of calls */
119 ir_node *new_loop_head;
122 /* Information about the current loop */
123 static loop_info_t loop_info;
125 /* Outs of the condition chain (loop inversion). */
126 static out_edge *cond_chain_entries;
128 /* Arity of the unrolled loop heads. */
129 static int unroll_arity;
131 static ir_phase *phase;
136 static unsigned head_inversion_node_count;
137 static unsigned inversion_head_node_limit;
138 static unsigned head_inversion_block_count;
140 static unsigned enable_peeling;
141 static unsigned enable_inversion;
142 static unsigned enable_unrolling;
145 /* Creates node_info. A linked list keeps all node_infos to free them again. */
146 static node_info *new_node_info(ir_node *node)
148 node_info *l = XMALLOCZ(node_info);
149 l->free_list_next = free_list;
155 /* Returns linked node_info for given node. */
156 static node_info *get_node_info(ir_node *n)
158 return ((node_info *)get_irn_link(n));
161 /* Allocates a node_info struct for the given node */
162 static void alloc_node_info(ir_node *node)
165 state = new_node_info(node);
166 set_irn_link(node, (void *)state);
169 /* Frees node info and copy array for given node. Returns next node of free_list. */
170 static ir_node *free_info(ir_node *node)
172 node_info *info = get_node_info(node);
173 ir_node *next = info->free_list_next;
174 /*DB((dbg, LEVEL_1, "FREE %N\n", node));*/
175 DEL_ARR_F(info->copy);
177 set_irn_link(node, NULL);
181 /* Free all created node_infos, including the copy arrays. */
182 static void free_node_info(void)
184 ir_node *node = free_list;
186 node = free_info(node);
191 /* Free node_infos of blocks only, including the copy arrays. */
192 static void free_node_info_blocks(void)
194 ir_node *node = free_list;
195 ir_node *new_freelist = NULL;
198 if (is_Block(node)) {
199 next = free_info(node);
201 next = get_node_info(node)->free_list_next;
202 get_node_info(node)->free_list_next = new_freelist;
207 free_list = new_freelist;
210 /* Reset nodes link. For use with a walker. */
211 static void reset_link(ir_node *node, void *env)
214 set_irn_link(node, NULL);
217 static void set_inversion_copy(ir_node *n, ir_node *cp)
219 phase_set_irn_data(phase, n, cp);
222 /* Returns a nodes copy. */
223 static ir_node *get_copy(ir_node *n, int index)
225 ir_node *cp = ((node_info *)get_irn_link(n))->copy[index];
229 /* Returns a nodes copy if it exists, or else the node. */
230 static ir_node *get_inversion_copy(ir_node *n)
232 ir_node *cp = (ir_node *)phase_get_irn_data(phase, n);
236 /* Returns a nodes copy, or node if there is no node info linked. */
237 static ir_node *get_copy_safe(ir_node *node, int index)
239 node_info *info = (node_info *)get_irn_link(node);
241 return get_copy(node, index);
246 /* Assigns the given array to a nodes node_info */
247 static void set_copy_arr(ir_node *n, ir_node **arr)
249 ((node_info *)get_irn_link(n))->copy = arr;
252 /* Assigns a copy with given index to a node.
253 * Prerequirements are existing node_info and copy array. */
254 static void set_copy(ir_node *n, int index, ir_node *copy)
256 ((node_info *)get_irn_link(n))->copy[index] = copy;
259 /* Returns 0 if the node or block is not in cur_loop. */
260 static unsigned is_in_loop(ir_node *node)
262 return (get_irn_loop(get_block(node)) == cur_loop);
265 /* Returns if the given be is an alien edge.
266 * This is the case when the pred is not in the loop. */
267 static unsigned is_alien_edge(ir_node *n, int i)
269 return(!is_in_loop(get_irn_n(n, i)));
272 static unsigned is_own_backedge(ir_node *n, int pos)
274 return (is_backedge(n, pos) && is_in_loop(get_irn_n(n, pos)));
277 /* Resets block mark for given node. For use with walker */
278 static void reset_block_mark(ir_node *node, void * env)
283 set_Block_mark(node, 0);
286 static unsigned is_nodes_block_marked(ir_node* node)
289 return get_Block_mark(node);
291 return get_Block_mark(get_block(node));
294 /* Returns the number of blocks in a loop. */
295 static int get_loop_n_blocks(ir_loop *loop)
299 elements = get_loop_n_elements(loop);
301 for (e=0; e<elements; e++) {
302 loop_element elem = get_loop_element(loop, e);
303 if (is_ir_node(elem.kind) && is_Block(elem.node))
310 static void extend_ins_by_copy(ir_node *block, int pos)
315 int arity = get_irn_arity(block);
316 int new_arity = arity + 1;
317 assert(is_Block(block));
319 NEW_ARR_A(ir_node *, ins, new_arity);
321 /* Extend block by copy of definition at pos */
322 for(i = 0; i < arity; ++i) {
323 ins[i] = get_irn_n(block, i);
325 ins[i] = get_inversion_copy(get_irn_n(block, pos));
327 DB((dbg, LEVEL_5, "Extend block %N by %N\n", block, ins[i]));
329 /* Fixes backedges. We rely on correct backedges. */
330 set_irn_in(block, new_arity, ins);
332 /* Extend block phis by copy of definition at pos */
333 for_each_phi(block, phi) {
335 for(i = 0; i < arity; ++i)
336 ins[i] = get_irn_n(phi, i);
337 pred = get_irn_n(phi, pos);
338 cp = get_inversion_copy(pred);
339 /* If the phis in is not in the condition chain (eg. a constant),
340 * there is no copy. */
346 DB((dbg, LEVEL_5, "Extend phi %N by %N\n", phi, ins[i]));
347 set_irn_in(phi, new_arity, ins);
352 * Extends all blocks which succeed the loop with the copied loops outs.
353 * Also extends the blocks phis. Requires block phi list.
354 * Returns an updated list of loop outs of the original loop outs.
356 static out_edge *extend_ins(out_edge *outs, int copies)
361 ir_node *phi, *next_phi, *new_phi, *head, *new_block;
362 int old_arity, new_arity;
366 inc_irg_visited(current_ir_graph);
368 new_outs = NEW_ARR_F(out_edge, 0);
371 for (i = 0; i < ARR_LEN(outs); ++i) {
372 out_edge edge = outs[i];
373 ir_node *node = edge.node;
374 DB((dbg, LEVEL_5, "outs %N %d\n", node, edge.pos));
378 for (i = 0; i < ARR_LEN(outs); ++i) {
379 out_edge edge = outs[i];
380 ir_node *node = edge.node;
381 int in_c, positions_n;
384 if (!is_Block(node) && !is_Bad(node)) {
389 ir_node *p = get_nodes_block(node);
390 for(c = 0; c < get_irn_arity(p); ++c) {
391 if (is_in_loop(get_irn_n(p, c))) {
397 ARR_APP1(out_edge, new_outs, edge);
398 DB((dbg, LEVEL_5, "new out %N\n", node));
403 ARR_APP1(out_edge, new_outs, edge);
404 DB((dbg, LEVEL_5, "new out %N\n", node));
408 /* CONTINUE if node already processed. */
409 if (irn_visited(node))
415 positions = NEW_ARR_F(int, 0);
418 /* Search for all occurences of the current node, and save the in positions. */
419 for (p = 0; p < ARR_LEN(outs); ++p) {
420 ir_node *cur = outs[p].node;
421 int d_pos = outs[p].pos;
424 DB((dbg, LEVEL_5, "Loop successor %N with pred @%d in loop\n",
426 ARR_APP1(int, positions, d_pos);
427 mark_irn_visited(cur);
432 old_arity = get_irn_arity(node);
433 new_arity = old_arity + (positions_n * copies);
435 ins = NEW_ARR_F(ir_node*, new_arity);
436 bes = NEW_ARR_F(int, new_arity);
438 /* extend block ins */
439 for (in_c = 0; in_c < old_arity; ++in_c) {
440 ins[in_c] = get_irn_n(node, in_c);
441 bes[in_c] = is_backedge(node, in_c);
443 for (p = 0; p < positions_n; ++p) {
444 for (c = 0; c < copies; ++c) {
445 ir_node *cp = get_copy_safe(get_irn_n(node, positions[p]), c);
446 DB((dbg, LEVEL_5, "%N (new_arity %d) @%d = %N\n",
447 node, new_arity, in_c, cp));
449 bes[in_c] = is_backedge(node, positions[p]);
454 assert(new_arity == in_c);
456 head = get_Block_phis(node);
457 new_block = new_r_Block(get_irn_irg(node), new_arity, ins);
458 set_irn_loop(new_block, get_irn_loop(node));
460 for (in_c = 0; in_c < new_arity; ++in_c) {
462 set_backedge(new_block, in_c);
468 set_Block_phis(node, NULL);
470 phi_ins = NEW_ARR_F(ir_node *, new_arity);
471 phi_bes = NEW_ARR_F(int, new_arity);
473 for_each_phi_safe(head, phi, next_phi) {
474 for (in_c = 0; in_c < old_arity; ++in_c) {
475 phi_ins[in_c] = get_irn_n(phi, in_c);
476 phi_bes[in_c] = is_backedge(phi, in_c);
479 for (p = 0; p < positions_n; ++p) {
480 for(c = 0; c < copies; ++c) {
482 pred = get_irn_n(phi, positions[p]);
483 DB((dbg, LEVEL_5, "Phi %N pred @%d is %N\n",
484 phi, positions[p], pred));
485 cp = get_copy_safe(pred, c);
487 DB((dbg, LEVEL_5, "Phi %N in %d is %N\n",
489 phi_bes[in_c] = is_backedge(phi, positions[p]);
494 new_phi = new_r_Phi(new_block, new_arity, phi_ins, get_irn_mode(phi));
496 for (in_c = 0; in_c < new_arity; ++in_c) {
498 set_backedge(new_phi, in_c);
500 exchange(phi, new_phi);
501 DB((dbg, LEVEL_5, "exch Phi %N %N\n",phi, new_phi));
503 add_Block_phi(new_block, new_phi);
509 exchange(node, new_block);
510 set_Block_MacroBlock(new_block, new_block);
511 DB((dbg, LEVEL_5, "exch block %N %N\n", node, new_block));
513 DEL_ARR_F(positions);
519 * Finds loop head and loop_info.
521 static void get_loop_info(ir_node *node, void *env)
523 unsigned node_in_loop, pred_in_loop;
527 arity = get_irn_arity(node);
528 for (i = 0; i < arity; i++) {
529 ir_node *pred = get_irn_n(node, i);
531 pred_in_loop = is_in_loop(pred);
532 node_in_loop = is_in_loop(node);
534 /* collect some loop information */
540 /* Find the loops head/the blocks with cfpred outside of the loop */
541 if (is_Block(node) && node_in_loop && !pred_in_loop && loop_head_valid) {
542 ir_node *cfgpred = get_Block_cfgpred(node, i);
544 if (!is_in_loop(cfgpred)) {
545 DB((dbg, LEVEL_5, "potential head %+F because inloop and pred %+F not inloop\n",
547 /* another head? We do not touch this. */
548 if (loop_head && loop_head != node) {
558 static void correct_phis(ir_node *node, void *env)
562 if (is_Phi(node) && get_irn_arity(node) == 1) {
565 in[0] = get_irn_n(node, 0);
567 exch = new_rd_Phi(get_irn_dbg_info(node),
568 get_nodes_block(node), 1, in,
571 exchange(node, exch);
572 /* TODO make sure visited is copied */
578 /* Adds all nodes pointing into the loop to loop_entries and also finds the loops head */
579 static void get_loop_outs(ir_node *node, void *env)
581 unsigned node_in_loop, pred_in_loop;
585 arity = get_irn_arity(node);
586 for (i = 0; i < arity; ++i) {
587 ir_node *pred = get_irn_n(node, i);
589 pred_in_loop = is_in_loop(pred);
590 node_in_loop = is_in_loop(node);
592 if (pred_in_loop && !node_in_loop) {
596 ARR_APP1(out_edge, cur_loop_outs, entry);
597 if (is_Block(node)) {
599 loop_info.cf_out = entry;
601 DB((dbg, LEVEL_5, "loop outs %N\n", node));
606 static ir_node *ssa_second_def;
607 static ir_node *ssa_second_def_block;
610 * Walks the graph bottom up, searching for definitions and creates phis.
612 static ir_node *search_def_and_create_phis(ir_node *block, ir_mode *mode)
620 DB((dbg, LEVEL_5, "ssa search_def_and_create_phis: block %N\n", block));
622 /* Prevents creation of phi that would be bad anyway.
623 * Dead and bad blocks. */
624 if (get_irn_arity(block) < 1 || is_Bad(block)) {
625 DB((dbg, LEVEL_5, "ssa bad %N\n", block));
629 if (block == ssa_second_def_block) {
630 DB((dbg, LEVEL_5, "ssa found second definition: use second def %N\n", ssa_second_def));
631 return ssa_second_def;
634 /* already processed this block? */
635 if (irn_visited(block)) {
636 ir_node *value = get_irn_link(block);
637 DB((dbg, LEVEL_5, "ssa already visited: use linked %N\n", value));
641 irg = get_irn_irg(block);
642 assert(block != get_irg_start_block(irg));
644 /* a Block with only 1 predecessor needs no Phi */
645 n_cfgpreds = get_Block_n_cfgpreds(block);
646 if (n_cfgpreds == 1) {
647 ir_node *pred_block = get_Block_cfgpred_block(block, 0);
650 DB((dbg, LEVEL_5, "ssa 1 pred: walk pred %N\n", pred_block));
652 value = search_def_and_create_phis(pred_block, mode);
653 set_irn_link(block, value);
654 mark_irn_visited(block);
659 /* create a new Phi */
660 NEW_ARR_A(ir_node*, in, n_cfgpreds);
661 for (i = 0; i < n_cfgpreds; ++i)
662 in[i] = new_Unknown(mode);
664 phi = new_r_Phi(block, n_cfgpreds, in, mode);
667 /* Important: always keep block phi list up to date. */
668 add_Block_phi(block, phi);
670 DB((dbg, LEVEL_5, "ssa phi creation: link new phi %N to block %N\n", phi, block));
672 set_irn_link(block, phi);
673 mark_irn_visited(block);
675 /* set Phi predecessors */
676 for (i = 0; i < n_cfgpreds; ++i) {
678 ir_node *pred_block = get_Block_cfgpred_block(block, i);
679 assert(pred_block != NULL);
680 pred_val = search_def_and_create_phis(pred_block, mode);
681 assert(pred_val != NULL);
683 DB((dbg, LEVEL_5, "ssa phi pred:phi %N, pred %N\n", phi, pred_val));
684 set_irn_n(phi, i, pred_val);
690 * Given a set of values this function constructs SSA-form for the users of the
691 * first value (the users are determined through the out-edges of the value).
692 * Works without using the dominance tree.
694 static void construct_ssa(ir_node *orig_block, ir_node *orig_val,
695 ir_node *second_block, ir_node *second_val)
699 const ir_edge_t *edge;
700 const ir_edge_t *next;
702 assert(orig_block && orig_val && second_block && second_val &&
703 "no parameter of construct_ssa may be NULL");
705 /* no need to do anything */
706 if (orig_val == second_val)
709 irg = get_irn_irg(orig_val);
711 ir_reserve_resources(irg, IR_RESOURCE_IRN_VISITED);
712 inc_irg_visited(irg);
714 mode = get_irn_mode(orig_val);
715 set_irn_link(orig_block, orig_val);
716 mark_irn_visited(orig_block);
718 ssa_second_def_block = second_block;
719 ssa_second_def = second_val;
721 /* Only fix the users of the first, i.e. the original node */
722 foreach_out_edge_safe(orig_val, edge, next) {
723 ir_node *user = get_edge_src_irn(edge);
724 int j = get_edge_src_pos(edge);
725 ir_node *user_block = get_nodes_block(user);
732 DB((dbg, LEVEL_5, "original user %N\n", user));
735 ir_node *pred_block = get_Block_cfgpred_block(user_block, j);
736 newval = search_def_and_create_phis(pred_block, mode);
738 newval = search_def_and_create_phis(user_block, mode);
741 /* If we get a bad node the user keeps the original in. No second definition needed. */
742 if (newval != user && !is_Bad(newval))
743 set_irn_n(user, j, newval);
746 ir_free_resources(irg, IR_RESOURCE_IRN_VISITED);
750 /* Construct ssa for def and all of its copies.
751 * NOTE: Overwrites links of blocks.
753 static ir_node *construct_ssa_n(ir_node *def, ir_node *user, int copies)
757 irg = get_irn_irg(def);
761 ir_reserve_resources(irg, IR_RESOURCE_IRN_VISITED);
762 inc_irg_visited(current_ir_graph);
763 mode = get_irn_mode(def);
765 set_irn_link(get_nodes_block(def), def);
766 mark_irn_visited(get_nodes_block(def));
768 for (i = 0; i < copies; ++i) {
769 ir_node *cp = get_copy(def, i);
770 ir_node *block = get_nodes_block(cp);
771 set_irn_link(block, cp);
772 mark_irn_visited(block);
774 DB((dbg, LEVEL_5, "ssa mark cp %N of def %N usr %N\n", cp, def, user));
779 user_arity = get_irn_arity(user);
780 for (i = 0; i < user_arity; ++i) {
781 ir_node *user_block = get_nodes_block(user);
782 if (get_irn_n(user, i) != def)
785 DB((dbg, LEVEL_5, "ssa_n found edge of user %N\n", user));
788 ir_node *pred_block = get_Block_cfgpred_block(user_block, i);
789 newval = search_def_and_create_phis(pred_block, mode);
791 newval = search_def_and_create_phis(user_block, mode);
793 /*TODO bads should not happen! */
794 if (newval != user) {
795 DB((dbg, LEVEL_5, "=> ssa_n new in is %N\n", newval));
796 set_irn_n(user, i, newval);
797 /*if (is_Phi(newval) && get_nodes_block(newval)== user_block) {
798 ir_free_resources(irg, IR_RESOURCE_IRN_VISITED);
803 /* BREAK, as we found and handled the definition */
807 if (is_Bad(newval)) {
808 DB((dbg, LEVEL_5, "=> ssa_n new in is BAD %N in graph %N\n", newval, current_ir_graph));
809 add_End_keepalive(get_irg_end(current_ir_graph), newval);
810 dump_ir_block_graph(current_ir_graph, "-bad");
817 ir_free_resources(irg, IR_RESOURCE_IRN_VISITED);
821 /* Returns the number of backedges with or without alien bes. */
822 static int get_backedge_n(ir_node *loophead, unsigned with_alien)
826 int arity = get_irn_arity(loophead);
827 for (i = 0; i < arity; ++i) {
828 ir_node *pred = get_irn_n(loophead, i);
829 if (is_backedge(loophead, i) && (with_alien || is_in_loop(pred)))
836 * Returns a raw copy of the given node.
837 * Attributes are kept/set according to the needs of loop inversion.
839 static ir_node *copy_node_inversion(ir_node *node)
844 cp = exact_copy(node);
845 arity = get_irn_arity(node);
847 for (i = 0; i < arity; ++i) {
848 if (is_backedge(node, i))
852 set_inversion_copy(node, cp);
853 DB((dbg, LEVEL_5, "set copy of %N to cp %N \n", node, cp));
856 /* We may not keep the old macroblock. */
857 set_Block_MacroBlock(cp, cp);
858 set_Block_mark(cp, 0);
865 * This walker copies all walked nodes.
866 * If the walk_condition is true for a node, it is copied.
867 * All nodes node_info copy attributes have to be NULL prior to every walk.
869 static void copy_walk(ir_node *node, walker_condition *walk_condition,
876 ir_graph *irg = current_ir_graph;
879 * break condition and cycle resolver, creating temporary node copies
881 if (get_irn_visited(node) >= get_irg_visited(irg)) {
882 /* Here we rely on nodestate's copy being initialized with NULL */
883 DB((dbg, LEVEL_5, "copy_walk: We have already visited %N\n", node));
884 if (get_inversion_copy(node) == NULL) {
885 cp = copy_node_inversion(node);
886 DB((dbg, LEVEL_5, "The TEMP copy of %N is created %N\n", node, cp));
892 mark_irn_visited(node);
894 if (!is_Block(node)) {
895 ir_node *pred = get_nodes_block(node);
896 if (walk_condition(pred))
897 DB((dbg, LEVEL_5, "walk block %N\n", pred));
898 copy_walk(pred, walk_condition, set_loop);
901 arity = get_irn_arity(node);
903 NEW_ARR_A(ir_node *, cpin, arity);
905 for (i = 0; i < arity; ++i) {
906 ir_node *pred = get_irn_n(node, i);
908 if (walk_condition(pred)) {
909 DB((dbg, LEVEL_5, "walk node %N\n", pred));
910 copy_walk(pred, walk_condition, set_loop);
911 cpin[i] = get_inversion_copy(pred);
912 DB((dbg, LEVEL_5, "copy of %N gets new in %N which is copy of %N\n",
913 node, get_inversion_copy(pred), pred));
919 /* copy node / finalize temp node */
920 if (get_inversion_copy(node) == NULL) {
921 /* No temporary copy existent */
922 cp = copy_node_inversion(node);
923 DB((dbg, LEVEL_5, "The FINAL copy of %N is CREATED %N\n", node, cp));
925 /* temporary copy is existent but without correct ins */
926 cp = get_inversion_copy(node);
927 DB((dbg, LEVEL_5, "The FINAL copy of %N is EXISTENT %N\n", node, cp));
930 if (!is_Block(node)) {
931 ir_node *cpblock = get_inversion_copy(get_nodes_block(node));
933 set_nodes_block(cp, cpblock );
935 add_Block_phi(cpblock, cp);
938 set_irn_loop(cp, set_loop);
940 /* Keeps phi list of temporary node. */
941 set_irn_in(cp, ARR_LEN(cpin), cpin);
944 /* Creates a new node from a given one, but with custom ins. */
945 static ir_node *copy_node_changed(
946 ir_node *node, ir_node *block, int arity, ir_node **ins, ir_loop *loop)
949 DB((dbg, LEVEL_5, "New node from %N in %N block %N arity %d\n",
950 node, get_irn_irg(node), block, arity));
953 cp = new_rd_Phi(get_irn_dbg_info(node),
961 /* We want to keep phi nodes with arity 1, as it makes rewiring a lot easier. */
962 cp = new_ir_node(get_irn_dbg_info(node),
970 /* By any means, keep the blocks phi list up-to-date,
971 because regaining it would destroy the links. */
973 add_Block_phi(block, cp);
975 if (get_irn_op(node) == get_irn_op(cp))
976 copy_node_attr(get_irn_irg(node), node, cp);
978 /*new_backedge_info(cp); function removed */
980 set_Block_MacroBlock(cp, cp);
982 set_irn_loop(cp, loop);
984 /* Blocks are copied first, so that future phi nodes populate the phi list. */
986 set_Block_phis(cp, NULL);
988 DB((dbg, LEVEL_5, "New node %N in %N block %N arity %d\n",
989 cp, get_irn_irg(cp), block, arity));
993 /* To be exchangeable, the unknown needs to have proper graph set. */
994 static ir_node *new_node_unknown(ir_node *block, ir_mode *mode)
996 ir_node *new = new_ir_node(NULL, get_irn_irg(block), block, op_Unknown, mode, 0, NULL);
1001 * Walker, to copy alle nodes that meet the walk_condition.
1002 * Blocks are copied first, which is important for the creation of the phi lists.
1005 static ir_node *copy_walk_n(ir_node *node, walker_condition *walk_condition, int copy_index, int alloc_cp_arr)
1009 unsigned headblock = 0;
1010 ir_node *block, *cp, *new_cp;
1011 ir_node **ins = NULL;
1013 if (irn_visited(node)) {
1014 cp = get_copy(node, copy_index);
1016 DB((dbg, LEVEL_5, "Visited %N\n", node));
1019 DB((dbg, LEVEL_5, "Visited. Return copy %N\n", cp));
1023 if (!is_Block(node)) {
1024 ir_node *unknown = new_node_unknown(
1025 get_nodes_block(node),
1026 get_irn_mode(node));
1027 DB((dbg, LEVEL_5, " #### Create Unknown %N for %N\n", unknown, node));
1028 set_copy(node, copy_index, unknown);
1032 DB((dbg, LEVEL_5, "Walk %N\n", node));
1033 mark_irn_visited(node);
1035 /* Allocate array of copies */
1036 if (alloc_cp_arr > 0) {
1037 ir_node **arr = NEW_ARR_F(ir_node *, alloc_cp_arr);
1038 alloc_node_info(node);
1039 for (i = 0; i < alloc_cp_arr; ++i)
1041 set_copy_arr(node, arr);
1046 if (!is_Block(node)) {
1047 ir_node *orig_block = get_nodes_block(node);
1048 DB((dbg, LEVEL_5, "current node: %N about to walk block %N\n", node, orig_block));
1049 /* Check walk_condition for blocks not necessary */
1051 if (orig_block == loop_head && is_Phi(node))
1054 block = copy_walk_n(orig_block, walk_condition, copy_index, alloc_cp_arr);
1055 DB((dbg, LEVEL_5, "current node: %N block %N blocks copy %N\n",
1056 node, orig_block, block));
1058 if (node == loop_head)
1063 arity = get_irn_arity(node);
1066 /* Headblock and headblock-phi case */
1068 DB((dbg, LEVEL_5, "Headblock/Phi from headblock\n"));
1069 NEW_ARR_A(ir_node *, ins, unroll_arity);
1071 for (i = 0; i < arity; ++i) {
1073 ir_node *pred = get_irn_n(node, i);
1074 if (is_backedge(loop_head, i)) {
1075 if (walk_condition(pred)) {
1076 ret = copy_walk_n(pred, walk_condition, copy_index, alloc_cp_arr);
1077 DB((dbg, LEVEL_5, "in %d = %N walked\n", i, ret));
1080 DB((dbg, LEVEL_5, "in %d = %N not walked\n", i, ret));
1085 arity = unroll_arity;
1088 NEW_ARR_A(ir_node *, ins, arity);
1090 for (i = 0; i < arity; ++i) {
1092 ir_node *pred = get_irn_n(node, i);
1093 if (walk_condition(pred)) {
1094 ret = copy_walk_n(pred, walk_condition, copy_index, alloc_cp_arr);
1095 DB((dbg, LEVEL_5, "in %d = %N walked\n", i, ret));
1098 DB((dbg, LEVEL_5, "in %d = %N not walked\n", i, ret));
1104 /* NOW, after the walk, we may check if the blocks copy is already present. */
1105 cp = get_copy(node, copy_index);
1106 if (cp != NULL && !is_Unknown(cp)) {
1107 DB((dbg, LEVEL_5, "Must be Block %N copy is already present\n", node, cp));
1108 assert(is_Block(cp) &&
1109 "sanity: The copy should have been a block.");
1113 new_cp = copy_node_changed(node, block, arity, ins, cur_loop);
1114 set_copy(node, copy_index, new_cp);
1116 if (cp != NULL && is_Unknown(cp)) {
1117 DB((dbg, LEVEL_5, "Found unknown %N in %N now exchanged by %N in %N\n",
1118 cp, get_irn_irg(cp), new_cp, get_irn_irg(new_cp)));
1119 exchange(cp, new_cp);
1125 * Populates head_entries with (node, pred_pos) tuple
1126 * whereas the node's pred at pred_pos is in the head but not the node itself.
1127 * Head and condition chain blocks must be marked.
1129 static void get_head_outs(ir_node *node, void *env)
1132 int arity = get_irn_arity(node);
1135 DB((dbg, LEVEL_5, "get head entries %N \n", node));
1137 for (i = 0; i < arity; ++i) {
1138 /* node is not in the head, but the predecessor is.
1139 * (head or loop chain nodes are marked) */
1141 DB((dbg, LEVEL_5, "... "));
1142 DB((dbg, LEVEL_5, "node %N marked %d (0) pred %d marked %d (1) \n",
1143 node, is_nodes_block_marked(node), i,
1144 is_nodes_block_marked(get_irn_n(node, i))));
1146 if (!is_nodes_block_marked(node) && is_nodes_block_marked(get_irn_n(node, i))) {
1150 ARR_APP1(out_edge, cur_head_outs, entry);
1152 if (is_in_loop(node) && is_Block(node)) {
1153 loop_info.new_loop_head = node;
1154 DB((dbg, LEVEL_5, " SET NEW LOOPHEAD to %N\n", node));
1161 * Find condition chains, and add them to be inverted
1162 * A block belongs to the chain if a condition branches out of the loop.
1163 * Returns 1 if the given block belongs to the condition chain.
1165 static unsigned find_condition_chains(ir_node *block)
1167 const ir_edge_t *edge;
1171 DB((dbg, LEVEL_5, "condition_chains for block %N\n", block));
1173 /* Collect all outs, including keeps. */
1174 foreach_out_edge_kind(block, edge, EDGE_KIND_NORMAL) {
1178 /* Leave at least one block as body. */
1179 if (head_inversion_node_count + nodes_n > inversion_head_node_limit
1180 || head_inversion_block_count + 1 == loop_info.blocks) {
1181 set_Block_mark(block, 0);
1186 /* First: check our successors,
1187 * and add all of them, which are outside of the loop, to the list */
1188 foreach_block_succ(block, edge) {
1189 ir_node *src = get_edge_src_irn( edge );
1190 int pos = get_edge_src_pos( edge );
1192 if (!is_in_loop(src)) {
1198 ARR_APP1(out_edge, cond_chain_entries, entry);
1199 mark_irn_visited(src);
1204 set_Block_mark(block, 0);
1207 set_Block_mark(block, 1);
1208 ++head_inversion_block_count;
1209 DB((dbg, LEVEL_5, "block %N is part of condition chain\n", block));
1210 head_inversion_node_count += nodes_n;
1213 /* Second: walk all successors,
1214 * and add them to the list if they are not part of the chain */
1215 foreach_block_succ(block, edge) {
1217 ir_node *src = get_edge_src_irn( edge );
1218 int pos = get_edge_src_pos( edge );
1221 if (!is_in_loop( src ) || irn_visited(src)) {
1225 mark_irn_visited(src);
1226 DB((dbg, LEVEL_5, "condition chain walk %N\n", src));
1227 inchain = find_condition_chains(src);
1229 /* if successor is not part of chain we need to collect its outs */
1234 ARR_APP1(out_edge, cond_chain_entries, entry);
1241 * Rewires the copied condition chain. Removes backedges.
1242 * as this condition chain is prior to the loop.
1243 * Copy of loop_head must have phi list and old (unfixed) backedge info of the loop head.
1244 * (loop_head is already fixed, we cannot rely on it.)
1246 static void fix_copy_inversion(void)
1251 ir_node *phi, *next;
1252 ir_node *head_cp = get_inversion_copy(loop_head);
1253 int arity = get_irn_arity(head_cp);
1254 int backedges = get_backedge_n(head_cp, 0);
1255 int new_arity = arity - backedges;
1259 NEW_ARR_A(ir_node *, ins, new_arity);
1262 /* Remove block backedges */
1263 for(i = 0; i < arity; ++i) {
1264 if (!is_backedge(head_cp, i))
1265 ins[pos++] = get_irn_n(head_cp, i);
1268 new_head = new_Block(new_arity, ins);
1270 phis = NEW_ARR_F(ir_node *, 0);
1272 for_each_phi_safe(get_Block_phis(head_cp), phi, next) {
1274 NEW_ARR_A(ir_node *, ins, new_arity);
1276 for(i = 0; i < arity; ++i) {
1277 if (!is_backedge(head_cp, i))
1278 ins[pos++] = get_irn_n(phi, i);
1280 new_phi = new_rd_Phi(get_irn_dbg_info(phi),
1281 new_head, new_arity, ins,
1283 ARR_APP1(ir_node *, phis, new_phi);
1287 for_each_phi_safe(get_Block_phis(head_cp), phi, next) {
1288 exchange(phi, phis[pos++]);
1291 exchange(head_cp, new_head);
1297 static ir_node *fix_inner_cc_definitions(ir_node *phi, ir_node *pred)
1301 ir_node **head_phi_ins;
1302 ir_node *new_loop_head = loop_info.new_loop_head;
1303 int new_loop_head_arity = get_irn_arity(new_loop_head);
1304 DB((dbg, LEVEL_5, "NEW HEAD %N arity %d\n", new_loop_head, new_loop_head_arity));
1306 NEW_ARR_A(ir_node *, head_phi_ins, new_loop_head_arity);
1308 for(i = 0; i < new_loop_head_arity; ++i) {
1309 /* Rely on correct backedge info for the new loop head
1310 * is set by extend_ins_by_copy. */
1311 if(is_nodes_block_marked(get_irn_n(new_loop_head, i))) {
1312 head_phi_ins[i] = pred;
1314 head_phi_ins[i] = get_inversion_copy(pred);
1316 DB((dbg, LEVEL_5, "NEW HEAD %N in %N %N\n", new_loop_head, head_phi_ins[i], current_ir_graph));
1319 head_phi= new_r_Phi(new_loop_head,
1320 new_loop_head_arity,
1324 add_Block_phi(new_loop_head, head_phi);
1326 DB((dbg, LEVEL_5, "fix inner cc definitions: new head %N has new phi %N from cc phi %N @%N\n",
1327 new_loop_head, head_phi, phi, pred));
1333 /* Puts the original condition chain at the end of the loop, subsequently to the body.
1334 * Relies on block phi list and correct backedges.
1336 static void fix_head_inversion(void)
1340 ir_node *phi, *next;
1342 int arity = get_irn_arity(loop_head);
1343 int backedges = get_backedge_n(loop_head, 0);
1344 int new_arity = backedges;
1348 NEW_ARR_A(ir_node *, ins, new_arity);
1351 /* Keep only backedges */
1352 for(i = 0; i < arity; ++i) {
1353 if (is_own_backedge(loop_head, i))
1354 ins[pos++] = get_irn_n(loop_head, i);
1357 new_head = new_Block(new_arity, ins);
1359 phis = NEW_ARR_F(ir_node *, 0);
1361 for_each_phi(loop_head, phi) {
1364 NEW_ARR_A(ir_node *, ins, new_arity);
1367 for(i = 0; i < arity; ++i) {
1368 ir_node *pred = get_irn_n(phi, i);
1370 if (is_own_backedge(loop_head, i)) {
1371 DB((dbg, LEVEL_5, " !!! PHI not useful %N\n", phi));
1373 /* If definition is in the condition chain,
1374 * we need to create a phi in the new loop head */
1375 if (is_nodes_block_marked(pred))
1376 ins[pos++] = fix_inner_cc_definitions(phi, pred);
1381 new_phi = new_rd_Phi(get_irn_dbg_info(phi),
1382 new_head, new_arity, ins,
1385 ARR_APP1(ir_node *, phis, new_phi);
1387 DB((dbg, LEVEL_5, "fix inverted head should exch %N by %N\n", phi, new_phi));
1391 for_each_phi_safe(get_Block_phis(loop_head), phi, next) {
1392 DB((dbg, LEVEL_5, "fix inverted exch phi %N by %N\n", phi, phis[pos]));
1393 if (phis[pos] != phi)
1394 exchange(phi, phis[pos++]);
1399 DB((dbg, LEVEL_5, "fix inverted head exch head block %N by %N\n", loop_head, new_head));
1400 exchange(loop_head, new_head);
1403 /* Does the loop inversion. */
1404 static void inversion_walk(out_edge *head_entries)
1409 * The order of rewiring bottom-up is crucial.
1410 * Any change of the order leads to lost information that would be needed later.
1413 ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED);
1415 /* 1. clone condition chain */
1416 inc_irg_visited(current_ir_graph);
1418 for (i = 0; i < ARR_LEN(head_entries); ++i) {
1419 out_edge entry = head_entries[i];
1420 ir_node *pred = get_irn_n(entry.node, entry.pos);
1422 DB((dbg, LEVEL_5, "\nInit walk block %N\n", pred));
1424 copy_walk(pred, is_nodes_block_marked, cur_loop);
1427 ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED);
1429 /* 2. Extends the head control flow successors ins
1430 * by the definitions of the copied head node. */
1431 for (i = 0; i < ARR_LEN(head_entries); ++i) {
1432 out_edge head_out = head_entries[i];
1434 if (is_Block(head_out.node))
1435 extend_ins_by_copy(head_out.node, head_out.pos);
1438 /* 3. construct_ssa for users of definitions in the condition chain,
1439 * as there is now a second definition. */
1440 for (i = 0; i < ARR_LEN(head_entries); ++i) {
1441 out_edge head_out = head_entries[i];
1442 /* Ignore keepalives */
1443 if (is_End(head_out.node))
1446 ir_node *is_in_cc = get_nodes_block(get_irn_n(head_out.node, head_out.pos));
1447 /* Construct ssa for definitions in the condition chain. */
1448 /* TODO Why do we need the check for is_in_cc? All definitions should be there... */
1449 if (!is_Block(head_out.node) && is_nodes_block_marked(is_in_cc)) {
1450 ir_node *pred, *cppred, *block, *cpblock;
1452 pred = get_irn_n(head_out.node, head_out.pos);
1453 cppred = get_inversion_copy(pred);
1454 block = get_nodes_block(pred);
1455 cpblock = get_nodes_block(cppred);
1456 construct_ssa(block, pred, cpblock, cppred);
1460 /* 4. Remove the ins which are no backedges from the original condition chain,
1461 * as the cc is now subsequently to the body. */
1462 fix_head_inversion();
1464 /* 5. Remove the backedges of the copied condition chain,
1465 * because it is going to be the new 'head' in advance to the loop */
1466 fix_copy_inversion();
1469 /* Analyse loop, if inversion is possible or reasonable. Then do the inversion. */
1470 static void loop_inversion(void)
1472 unsigned do_inversion = 1;
1474 inversion_head_node_limit = INT_MAX;
1475 ir_reserve_resources(current_ir_graph, IR_RESOURCE_BLOCK_MARK);
1477 /* Reset block marks.
1478 * We use block marks to flag blocks of the original condition chain. */
1479 irg_walk_graph(current_ir_graph, reset_block_mark, NULL, NULL);
1481 loop_info.blocks = get_loop_n_blocks(cur_loop);
1482 cond_chain_entries = NEW_ARR_F(out_edge, 0);
1484 head_inversion_node_count = 0;
1485 head_inversion_block_count = 0;
1487 set_Block_mark(loop_head, 1);
1488 mark_irn_visited(loop_head);
1490 /* Use phase to keep copy of nodes from the condition chain. */
1491 phase = new_phase(current_ir_graph, phase_irn_init_default);
1493 inc_irg_visited(current_ir_graph);
1495 /* Search for condition chains. */
1496 find_condition_chains(loop_head);
1498 DB((dbg, LEVEL_3, "Loop contains %d blocks.\n", loop_info.blocks));
1500 if (loop_info.blocks < 2) {
1503 "Loop contains %d (less than 2) blocks => No Inversion done.\n",
1507 /* We also catch endless loops here,
1508 * because they do not have a condition chain. */
1509 if (head_inversion_block_count < 1) {
1512 "Loop contains %d (less than 1) invertible blocks => No Inversion done.\n",
1513 head_inversion_block_count));
1517 cur_head_outs = NEW_ARR_F(out_edge, 0);
1519 /* Get all edges pointing into the condition chain. */
1520 irg_walk_graph(current_ir_graph, get_head_outs, NULL, NULL);
1522 /* Do the inversion */
1523 inversion_walk(cur_head_outs);
1525 DEL_ARR_F(cur_head_outs);
1526 /* Duplicated blocks changed doms */
1527 set_irg_doms_inconsistent(current_ir_graph);
1528 /* Loop content changed */
1529 set_irg_loopinfo_inconsistent(current_ir_graph);
1530 /* TODO are they? */
1531 set_irg_outs_inconsistent(current_ir_graph);
1536 DEL_ARR_F(cond_chain_entries);
1537 ir_free_resources(current_ir_graph, IR_RESOURCE_BLOCK_MARK);
1541 * Rewire floating copies of the current loop.
1543 static void place_copies(int copies)
1545 ir_node *loophead = loop_head;
1546 int headarity = get_irn_arity(loophead);
1551 /* Append the copies to the existing loop. */
1552 for (i = 0; i < headarity; ++i) {
1553 /* Build unrolled loop top down */
1554 if (is_backedge(loophead, i) && !is_alien_edge(loophead, i)) {
1555 for (c = 0; c < copies; ++c) {
1556 ir_node *upper_head;
1557 ir_node *lower_head = get_copy(loophead, c);
1558 ir_node *upper_pred;
1561 upper_head = loophead;
1562 upper_pred = get_irn_n(loophead, i);
1564 upper_head = get_copy(loophead, c - 1);
1565 upper_pred = get_copy_safe(get_irn_n(loophead, i), c - 1);
1567 set_irn_n(lower_head, pos, upper_pred);
1568 for_each_phi(loophead, phi) {
1569 ir_node *lower_phi, *upper_phi, *upper_pred_phi;
1570 lower_phi = get_copy(phi, c);
1573 upper_pred_phi = get_irn_n(phi, i);
1575 upper_phi = get_copy(phi, c - 1);
1576 upper_pred_phi = get_copy_safe(get_irn_n(phi, i), c - 1);
1579 DB((dbg, LEVEL_5, "Rewire %N @%d to %N\n",
1580 lower_phi, pos, upper_pred_phi));
1582 /* Relying on phi nodes with 1 in to be present. */
1583 assert(is_Phi(lower_phi));
1585 /* Do not fix phis here. Chained phis will cause a defective copy array. */
1587 if (get_irn_arity(lower_phi) == 1) {
1588 ir_node *single_in[1];
1590 single_in[0] = upper_pred_phi;
1592 cp = new_rd_Phi(get_irn_dbg_info(lower_phi),
1593 get_nodes_block(lower_phi), 1, single_in,
1594 get_irn_mode(lower_phi));
1596 set_copy(phi, c, cp);
1597 exchange(lower_phi, cp);
1600 set_irn_n(lower_phi, pos, upper_pred_phi);
1605 /* Fix the topmost loop heads backedges. */
1606 set_irn_n(loophead, i, get_copy(get_irn_n(loophead, i), copies - 1));
1607 for_each_phi(loophead, phi) {
1608 ir_node *last_cp = get_copy_safe(get_irn_n(phi, i), copies - 1);
1609 set_irn_n(phi, i, last_cp);
1615 /* Copies the cur_loop */
1616 static void copy_loop(out_edge *cur_loop_outs, int copies)
1619 unroll_arity = get_backedge_n(loop_head, 0);
1621 ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED);
1624 for (c = 0; c < copies; ++c) {
1626 inc_irg_visited(current_ir_graph);
1628 DB((dbg, LEVEL_5, " ### Copy_loop copy n: %d ###\n", c));
1629 for (i = 0; i < ARR_LEN(cur_loop_outs); ++i) {
1630 out_edge entry = cur_loop_outs[i];
1631 ir_node *pred = get_irn_n(entry.node, entry.pos);
1633 copy_walk_n(pred, is_in_loop, c, c ? 0 : copies);
1637 ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED);
1641 static void create_duffs_device(int unroll_number)
1643 (void)unroll_number;
1648 static unsigned is_loop_invariant_val(ir_node *node)
1652 if (! is_in_loop(node))
1655 /* TODO all cases? and how to use the dom tree for that. */
1658 for (i = 0; i < get_irn_arity(node); ++i) {
1659 if (is_own_backedge(get_nodes_block(node), i) && get_irn_n(node, i) != node)
1667 static ir_node *is_loop_iteration(ir_node *node)
1670 ir_node *found_add = NULL;
1672 if (!is_in_loop(node))
1678 /*TODO all cases? and how do i use the dom tree for that.*/
1681 for (i = 0; i < get_irn_arity(node); ++i) {
1683 if (! is_own_backedge(get_nodes_block(node), i))
1685 new_found = get_irn_n(node,i);
1687 if (found_add && found_add != new_found)
1690 found_add = new_found;
1697 static unsigned get_unroll_decision(out_edge *cur_loop_outs)
1700 ir_node *pred0, *pred1, *projx, *cond, *projres, *loop_condition;
1701 ir_node *iteration, *add, *iteration_phi, *end_val, *step, *add_in0, *add_in1;
1703 (void) cur_loop_outs;
1707 /* There is more than 1 condition for the loop exit. We cannot cope this with duffs device. */
1708 if (loop_info.cf_outs != 1)
1711 /*TODO test for impact on performance */
1712 if (loop_info.calls > 0)
1716 projx = get_irn_n(loop_info.cf_out.node, loop_info.cf_out.pos);
1717 cond = get_irn_n(projx, 0);
1718 projres = get_irn_n(cond, 0);
1719 loop_condition = get_irn_n(projres, 0);
1721 DB((dbg, LEVEL_2, " loop condition %N\n", loop_condition));
1723 /* One in of the loop condition needs to be loop invariant.
1724 * The other in is assigned by an add.
1725 * The add uses a loop invariant value
1726 * and another value that depends on a loop invariant start value and the add itself.
1737 /* Find the loop invariant in */
1738 /* Assumption that condition is cmp or mod.
1740 pred0 = get_irn_n(loop_condition, 0);
1741 pred1 = get_irn_n(loop_condition, 1);
1743 if (is_loop_invariant_val(pred0)) {
1749 if (is_loop_invariant_val(pred1)) {
1750 if (end_val != NULL)
1751 /* RETURN. loop condition does not change during the loop. */
1758 DB((dbg, LEVEL_2, " end_val %N\n", end_val));
1760 /* Check for constraint of remaining in. */
1761 add = is_loop_iteration(iteration);
1765 DB((dbg, LEVEL_2, " add %N\n", add));
1767 add_in0 = get_irn_n(add, 0);
1768 add_in1 = get_irn_n(add, 1);
1770 if (is_loop_invariant_val(add_in0)) {
1772 iteration_phi = add_in1;
1775 if (is_loop_invariant_val(add_in1)) {
1779 iteration_phi = add_in0;
1782 DB((dbg, LEVEL_2, " step %N\n", step));
1784 if (! is_Phi(iteration_phi)) {
1785 if (is_Phi(iteration))
1786 iteration_phi = iteration;
1791 DB((dbg, LEVEL_2, " #### COUNTING LOOP step %N end %N phi %N\n", step, end_val, iteration_phi));
1796 mode = get_irn_mode(add);
1798 block = new_Block(0, NULL);
1799 /*set_irg_current_Block(current_ir_graph, block);*/
1802 unroll_mod_const = new_Const(unroll_number);
1803 module = new_r_Mod(block, new_NoMem(), end_val, step, mode, op_pin_state_pinned);
1815 static void unroll_loop(void)
1818 unsigned unroll_number;
1820 cur_loop_outs = NEW_ARR_F(out_edge, 0);
1823 irg_walk_graph(current_ir_graph, get_loop_outs, NULL, NULL);
1825 unroll_number = get_unroll_decision(cur_loop_outs);
1827 /*dump_ir_block_graph(current_ir_graph, "-pre");*/
1829 if (unroll_number) {
1830 create_duffs_device(unroll_number);
1832 /* Copies the loop and allocs node_info */
1833 copy_loop(cur_loop_outs, unroll_number - 1);
1834 /*dump_ir_block_graph(current_ir_graph, "-cp");*/
1836 /* Line up the floating copies. */
1837 place_copies(unroll_number - 1);
1838 /*dump_ir_block_graph(current_ir_graph, "-ord");*/
1840 /* Extend loop successors */
1841 ssa_outs = extend_ins(cur_loop_outs, unroll_number - 1);
1842 /*dump_ir_block_graph(current_ir_graph, "-fixed")*/ ;
1844 /* Links are going to be overwritten */
1845 free_node_info_blocks();
1847 /* Generate phis for all loop outs */
1848 for (i = 0; i < ARR_LEN(ssa_outs); ++i) {
1849 out_edge entry = ssa_outs[i];
1850 ir_node *node = entry.node;
1851 ir_node *pred = get_irn_n(entry.node, entry.pos);
1853 DB((dbg, LEVEL_5, "Do? construct_ssa_n def %N node %N pos %d\n",
1854 pred, node, entry.pos));
1856 if (!is_End(node)) {
1857 DB((dbg, LEVEL_5, "construct_ssa_n def %N node %N pos %d\n",
1858 pred, node, entry.pos));
1860 construct_ssa_n(pred, node, unroll_number - 1);
1862 construct_ssa_n(pred, new_phi, unroll_number - 1);*/
1865 /*dump_ir_block_graph(current_ir_graph, "-ssa-done");*/
1870 irg_walk_graph(current_ir_graph, correct_phis, NULL, NULL);
1872 DEL_ARR_F(ssa_outs);
1874 set_irg_doms_inconsistent(current_ir_graph);
1875 set_irg_loopinfo_inconsistent(current_ir_graph);
1876 set_irg_outs_inconsistent(current_ir_graph);
1878 DEL_ARR_F(cur_loop_outs);
1881 /* Initialization and */
1882 static void init_analyze(ir_loop *loop)
1884 /* Init new for every loop */
1888 loop_head_valid = 1;
1889 loop_inv_head = NULL;
1890 loop_peeled_head = NULL;
1892 loop_info.calls = 0;
1893 loop_info.blocks = 0;
1894 loop_info.cf_outs = 0;
1896 DB((dbg, LEVEL_2, " >>>> current loop includes node %N <<<\n",
1897 get_loop_node(loop, 0)));
1899 irg_walk_graph(current_ir_graph, get_loop_info, NULL, NULL);
1901 /* RETURN if there is no valid head */
1902 if (!loop_head || !loop_head_valid) {
1903 DB((dbg, LEVEL_2, "No valid loop head. Nothing done.\n"));
1907 DB((dbg, LEVEL_2, "Loophead: %N\n", loop_head));
1909 if (enable_inversion)
1912 if (enable_unrolling)
1915 DB((dbg, LEVEL_2, " <<<< end of loop with node %N >>>>\n",
1916 get_loop_node(loop, 0)));
1919 /* Find most inner loops and send them to analyze_loop */
1920 static void find_most_inner_loop(ir_loop *loop)
1922 /* descend into sons */
1923 int sons = get_loop_n_sons(loop);
1926 ARR_APP1(ir_loop *, loops, loop);
1929 for (s=0; s<sons; s++) {
1930 find_most_inner_loop(get_loop_son(loop, s));
1936 * Assure preconditions are met and go through all loops.
1938 void loop_optimization(ir_graph *irg)
1945 set_current_ir_graph(irg);
1948 assure_irg_outs(irg);
1950 /* NOTE: sets only the loop attribute of blocks, not nodes */
1951 /* NOTE: Kills links */
1952 assure_cf_loop(irg);
1954 ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK | IR_RESOURCE_PHI_LIST);
1955 collect_phiprojs(irg);
1956 ir_free_resources(irg, IR_RESOURCE_IRN_LINK);
1958 loop = get_irg_loop(irg);
1959 sons = get_loop_n_sons(loop);
1961 loops = NEW_ARR_F(ir_loop *, 0);
1962 /* List all inner loops */
1963 for (nr = 0; nr < sons; ++nr) {
1964 find_most_inner_loop(get_loop_son(loop, nr));
1967 ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK);
1968 /* Set all links NULL */
1969 irg_walk_graph(current_ir_graph, reset_link, NULL, NULL);
1971 for (i = 0; i < ARR_LEN(loops); ++i) {
1972 ir_loop *loop = loops[i];
1975 /*dump_ir_block_graph(current_ir_graph, "-part");*/
1976 collect_phiprojs(irg);
1977 irg_walk_graph(current_ir_graph, reset_link, NULL, NULL);
1980 /*dump_ir_block_graph(current_ir_graph, "-full");*/
1983 ir_free_resources(irg, IR_RESOURCE_IRN_LINK);
1984 ir_free_resources(irg, IR_RESOURCE_PHI_LIST);
1987 void do_loop_unrolling(ir_graph *irg)
1989 enable_unrolling = 1;
1991 enable_inversion = 0;
1993 DB((dbg, LEVEL_2, " >>> unrolling (Startnode %N) <<<\n",
1994 get_irg_start(irg)));
1996 loop_optimization(irg);
1998 DB((dbg, LEVEL_2, " >>> unrolling done (Startnode %N) <<<\n",
1999 get_irg_start(irg)));
2002 void do_loop_inversion(ir_graph *irg)
2004 enable_unrolling = 0;
2006 enable_inversion = 1;
2008 DB((dbg, LEVEL_2, " >>> inversion (Startnode %N) <<<\n",
2009 get_irg_start(irg)));
2011 loop_optimization(irg);
2013 DB((dbg, LEVEL_2, " >>> inversion done (Startnode %N) <<<\n",
2014 get_irg_start(irg)));
2017 void do_loop_peeling(ir_graph *irg)
2019 enable_unrolling = 0;
2021 enable_inversion = 0;
2023 DB((dbg, LEVEL_2, " >>> peeling (Startnode %N) <<<\n",
2024 get_irg_start(irg)));
2026 loop_optimization(irg);
2028 DB((dbg, LEVEL_2, " >>> peeling done (Startnode %N) <<<\n",
2029 get_irg_start(irg)));
2033 ir_graph_pass_t *loop_inversion_pass(const char *name)
2035 return def_graph_pass(name ? name : "loop_inversion", do_loop_inversion);
2038 ir_graph_pass_t *loop_unroll_pass(const char *name)
2040 return def_graph_pass(name ? name : "loop_unroll", do_loop_unrolling);
2043 ir_graph_pass_t *loop_peeling_pass(const char *name)
2045 return def_graph_pass(name ? name : "loop_peeling", do_loop_peeling);
2048 void firm_init_loop_opt(void)
2050 FIRM_DBG_REGISTER(dbg, "firm.opt.loop");