2 * Copyright (C) 1995-2010 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @author Christian Helmer
23 * @brief loop inversion and loop unrolling
30 #include "iroptimize.h"
48 #include "irbackedge_t.h"
49 #include "irphase_t.h"
53 DEBUG_ONLY(static firm_dbg_module_t *dbg);
56 * Convenience macro for iterating over every phi node of the given block.
57 * Requires phi list per block.
59 #define for_each_phi(block, phi) \
60 for ((phi) = get_Block_phis( (block) ); (phi) ; (phi) = get_Phi_next((phi)))
62 #define for_each_phi_safe(head, phi, next) \
63 for ((phi) = (head), (next) = (head) ? get_Phi_next((head)) : NULL; \
64 (phi) ; (phi) = (next), (next) = (next) ? get_Phi_next((next)) : NULL)
66 /* currently processed loop */
67 static ir_loop *cur_loop;
69 /* Condition for performing copy_walk. */
70 typedef unsigned walker_condition(ir_node *);
72 typedef unsigned is_live_edge(ir_node *, int);
74 /* Node and position of a predecessor */
75 typedef struct out_edge {
81 /* Node info for inversion*/
82 typedef struct inversion_node_info {
85 struct inversion_node_info *free_list_next; /* linked list to free all node_infos */
86 } inversion_node_info;
88 /* Node info for unrolling. */
89 typedef struct unrolling_node_info {
92 struct ir_node *free_list_next; /* linked list to free all node_infos */
93 } unrolling_node_info;
95 /* Head of the list for freeing node_infos */
96 static ir_node *free_list;
98 /* A walker may start visiting the current loop with these nodes. */
99 static out_edge *cur_loop_outs;
101 /* Outs of the nodes head. */
102 static out_edge *cur_head_outs;
104 /* Information about the loop head */
105 static ir_node *loop_head = NULL;
106 static unsigned loop_head_valid = 1;
108 /* List of all inner loops, that are processed. */
109 static ir_loop **loops;
112 static ir_node *loop_inv_head = NULL;
114 static ir_node *loop_peeled_head = NULL;
116 /* Loop analysis informations */
117 typedef struct loop_info_t {
118 unsigned blocks; /* number of blocks in the loop */
120 unsigned calls; /* number of calls */
123 ir_node **new_loop_heads;
127 ir_node *iteration_phi;
131 /* Information about the current loop */
132 static loop_info_t loop_info;
134 /* Outs of the condition chain (loop inversion). */
135 static ir_node **cc_blocks;
136 static out_edge *cond_chain_entries;
137 static out_edge *head_df_loop;
139 /* Arity of the unrolled loop heads. */
140 static int unroll_arity;
141 static int unroll_number;
143 /* Phase is used to keep copies of nodes. */
144 static ir_phase *phase;
146 /* Prototype of initialization for unrolling phase. */
147 void *init_unrolling_phase(ir_phase *ph, const ir_node *n, void *old);
152 static unsigned head_inversion_node_count;
153 /*static unsigned inversion_head_node_limit;*/
154 static unsigned head_inversion_block_count;
156 /* As the preconditions are the same, these flags,
157 * set depending on which function called, are used to determine what to do. */
158 static unsigned enable_peeling;
159 static unsigned enable_inversion;
160 static unsigned enable_unrolling;
162 /* Reset nodes link. For use with a walker. */
163 static void reset_link(ir_node *node, void *env)
166 set_irn_link(node, NULL);
169 static void set_unroll_copy(ir_node *n, int nr, ir_node *cp)
171 unrolling_node_info *info = (unrolling_node_info *)phase_get_irn_data(phase, n);
172 info->copies[nr] = cp;
175 /* Returns a nodes copy if it exists, or else the node. */
176 static ir_node *get_unroll_copy(ir_node *n, int nr)
178 unrolling_node_info *info = (unrolling_node_info *)phase_get_irn_data(phase, n);
179 ir_node *cp = info->copies[nr];
183 /* Getter and setter for inversion copy. */
184 static void set_inversion_copy(ir_node *n, ir_node *cp)
186 phase_set_irn_data(phase, n, cp);
189 static ir_node *get_inversion_copy(ir_node *n)
191 ir_node *cp = (ir_node *)phase_get_irn_data(phase, n);
195 /* Returns 0 if the node or block is not in cur_loop. */
196 static unsigned is_in_loop(ir_node *node)
198 return (get_irn_loop(get_block(node)) == cur_loop);
201 static ir_node *get_copy_safe(ir_node *n, int pos)
208 /* Returns if the given be is an alien edge.
209 * This is the case when the pred is not in the loop. */
210 static unsigned is_alien_edge(ir_node *n, int i)
212 return(!is_in_loop(get_irn_n(n, i)));
215 static unsigned is_own_backedge(ir_node *n, int pos)
217 return (is_backedge(n, pos) && is_in_loop(get_irn_n(n, pos)));
220 /* Resets block mark for given node. For use with walker */
221 static void reset_block_mark(ir_node *node, void * env)
226 set_Block_mark(node, 0);
229 static unsigned is_nodes_block_marked(ir_node* node)
232 return get_Block_mark(node);
234 return get_Block_mark(get_block(node));
237 /* Returns the number of blocks in a loop. */
238 static int get_loop_n_blocks(ir_loop *loop)
242 elements = get_loop_n_elements(loop);
244 for (e=0; e<elements; e++) {
245 loop_element elem = get_loop_element(loop, e);
246 if (is_ir_node(elem.kind) && is_Block(elem.node))
253 static int extend_irn(ir_node *n, ir_node *new)
257 int arity = get_irn_arity(n);
258 int new_arity = arity + 1;
260 NEW_ARR_A(ir_node *, ins, new_arity);
262 for(i = 0; i < arity; ++i) {
263 ins[i] = get_irn_n(n, i);
267 set_irn_in(n, new_arity, ins);
268 /* arity equals the position of the new node */
273 static void extend_ins_by_copy(ir_node *block, int pos)
278 assert(is_Block(block));
280 /* Extend block by copy of definition at pos */
281 pred = get_irn_n(block, pos);
282 new_in = get_inversion_copy(pred);
283 DB((dbg, LEVEL_5, "Extend block %N by %N cp of %N\n", block, new_in, pred));
284 extend_irn(block, new_in);
286 /* Extend block phis by copy of definition at pos */
287 for_each_phi(block, phi) {
290 pred = get_irn_n(phi, pos);
291 cp = get_inversion_copy(pred);
292 /* If the phis in is not in the condition chain (eg. a constant),
293 * there is no copy. */
299 DB((dbg, LEVEL_5, "Extend phi %N by %N cp of %N\n", phi, new_in, pred));
300 extend_irn(phi, new_in);
305 * Extends all blocks which succeed the loop with the copied loops outs.
306 * Also extends the blocks phis. Requires block phi list.
307 * Returns an updated list of loop outs of the original loop outs.
309 static out_edge *extend_ins(out_edge *outs, int copies)
314 ir_node *phi, *next_phi, *new_phi, *head, *new_block;
315 int old_arity, new_arity;
319 inc_irg_visited(current_ir_graph);
321 new_outs = NEW_ARR_F(out_edge, 0);
324 for (i = 0; i < ARR_LEN(outs); ++i) {
325 out_edge edge = outs[i];
326 ir_node *node = edge.node;
327 DB((dbg, LEVEL_5, "outs %N %d\n", node, edge.pos));
331 for (i = 0; i < ARR_LEN(outs); ++i) {
332 out_edge edge = outs[i];
333 ir_node *node = edge.node;
334 int in_c, positions_n;
337 if (!is_Block(node) && !is_Bad(node)) {
342 ir_node *p = get_nodes_block(node);
343 for(c = 0; c < get_irn_arity(p); ++c) {
344 if (is_in_loop(get_irn_n(p, c))) {
350 ARR_APP1(out_edge, new_outs, edge);
351 DB((dbg, LEVEL_5, "new out %N\n", node));
356 ARR_APP1(out_edge, new_outs, edge);
357 DB((dbg, LEVEL_5, "new out %N\n", node));
361 /* CONTINUE if node already processed. */
362 if (irn_visited(node))
368 positions = NEW_ARR_F(int, 0);
371 /* Search for all occurences of the current node, and save the in positions. */
372 for (p = 0; p < ARR_LEN(outs); ++p) {
373 ir_node *cur = outs[p].node;
374 int d_pos = outs[p].pos;
377 DB((dbg, LEVEL_5, "Loop successor %N with pred @%d in loop\n",
379 ARR_APP1(int, positions, d_pos);
380 mark_irn_visited(cur);
385 old_arity = get_irn_arity(node);
386 new_arity = old_arity + (positions_n * copies);
388 ins = NEW_ARR_F(ir_node*, new_arity);
389 bes = NEW_ARR_F(int, new_arity);
391 /* extend block ins */
392 for (in_c = 0; in_c < old_arity; ++in_c) {
393 ins[in_c] = get_irn_n(node, in_c);
394 bes[in_c] = is_backedge(node, in_c);
396 for (p = 0; p < positions_n; ++p) {
397 for (c = 0; c < copies; ++c) {
398 ir_node *cp = get_copy_safe(get_irn_n(node, positions[p]), c);
399 DB((dbg, LEVEL_5, "%N (new_arity %d) @%d = %N\n",
400 node, new_arity, in_c, cp));
402 bes[in_c] = is_backedge(node, positions[p]);
407 assert(new_arity == in_c);
409 head = get_Block_phis(node);
410 new_block = new_r_Block(get_irn_irg(node), new_arity, ins);
411 set_irn_loop(new_block, get_irn_loop(node));
413 for (in_c = 0; in_c < new_arity; ++in_c) {
415 set_backedge(new_block, in_c);
421 set_Block_phis(node, NULL);
423 phi_ins = NEW_ARR_F(ir_node *, new_arity);
424 phi_bes = NEW_ARR_F(int, new_arity);
426 for_each_phi_safe(head, phi, next_phi) {
427 for (in_c = 0; in_c < old_arity; ++in_c) {
428 phi_ins[in_c] = get_irn_n(phi, in_c);
429 phi_bes[in_c] = is_backedge(phi, in_c);
432 for (p = 0; p < positions_n; ++p) {
433 for(c = 0; c < copies; ++c) {
435 pred = get_irn_n(phi, positions[p]);
436 DB((dbg, LEVEL_5, "Phi %N pred @%d is %N\n",
437 phi, positions[p], pred));
438 cp = get_copy_safe(pred, c);
440 DB((dbg, LEVEL_5, "Phi %N in %d is %N\n",
442 phi_bes[in_c] = is_backedge(phi, positions[p]);
447 new_phi = new_r_Phi(new_block, new_arity, phi_ins, get_irn_mode(phi));
449 for (in_c = 0; in_c < new_arity; ++in_c) {
451 set_backedge(new_phi, in_c);
453 DB((dbg, LEVEL_5, "extend ins exch phi %N %N\n", phi, new_phi));
454 exchange(phi, new_phi);
455 DB((dbg, LEVEL_5, "exch Phi %N %N\n",phi, new_phi));
457 add_Block_phi(new_block, new_phi);
463 DB((dbg, LEVEL_5, "extend ins exch block %N %N\n", node, new_block));
464 exchange(node, new_block);
465 set_Block_MacroBlock(new_block, new_block);
466 DB((dbg, LEVEL_5, "exch block %N %N\n", node, new_block));
468 DEL_ARR_F(positions);
474 * Finds loop head and some loop_info as calls or else if necessary.
476 static void get_loop_info(ir_node *node, void *env)
478 unsigned node_in_loop, pred_in_loop;
482 arity = get_irn_arity(node);
483 for (i = 0; i < arity; i++) {
484 ir_node *pred = get_irn_n(node, i);
486 pred_in_loop = is_in_loop(pred);
487 node_in_loop = is_in_loop(node);
489 /* collect some loop information */
498 /* Find the loops head/the blocks with cfpred outside of the loop */
499 if (is_Block(node) && node_in_loop && !pred_in_loop && loop_head_valid) {
500 ir_node *cfgpred = get_Block_cfgpred(node, i);
502 if (!is_in_loop(cfgpred)) {
503 DB((dbg, LEVEL_5, "potential head %+F because inloop and pred %+F not inloop\n",
505 /* another head? We do not touch this. */
506 if (loop_head && loop_head != node) {
518 static void correct_phis(ir_node *node, void *env)
522 if (is_Phi(node) && get_irn_arity(node) == 1) {
525 in[0] = get_irn_n(node, 0);
527 exch = new_rd_Phi(get_irn_dbg_info(node),
528 get_nodes_block(node), 1, in,
531 exchange(node, exch);
532 /* TODO make sure visited is copied */
539 /* Finds loophead and all use-def and cf edges. */
540 static void get_loop_outs(ir_node *node, void *env)
542 unsigned node_in_loop, pred_in_loop;
546 arity = get_irn_arity(node);
547 for (i = 0; i < arity; ++i) {
548 ir_node *pred = get_irn_n(node, i);
550 pred_in_loop = is_in_loop(pred);
551 node_in_loop = is_in_loop(node);
553 if (pred_in_loop && !node_in_loop) {
557 ARR_APP1(out_edge, cur_loop_outs, entry);
558 if (is_Block(node)) {
560 loop_info.cf_out = entry;
562 DB((dbg, LEVEL_5, "loop outs %N\n", node));
567 static ir_node *ssa_second_def;
568 static ir_node *ssa_second_def_block;
569 static unsigned found_sec_def;
572 * Walks the graph bottom up, searching for definitions and creates phis.
574 static ir_node *search_def_and_create_phis(ir_node *block, ir_mode *mode, int first)
582 DB((dbg, LEVEL_5, "ssa search_def_and_create_phis: block %N\n", block));
584 /* Prevents creation of phi that would be bad anyway.
585 * Dead and bad blocks. */
586 if (get_irn_arity(block) < 1 || is_Bad(block)) {
587 DB((dbg, LEVEL_5, "ssa bad %N\n", block));
591 if (block == ssa_second_def_block && !first) {
592 DB((dbg, LEVEL_5, "ssa found second definition: use second def %N\n", ssa_second_def));
594 return ssa_second_def;
597 /* already processed this block? */
598 if (irn_visited(block)) {
599 ir_node *value = (ir_node *) get_irn_link(block);
600 DB((dbg, LEVEL_5, "ssa already visited: use linked %N\n", value));
604 irg = get_irn_irg(block);
605 assert(block != get_irg_start_block(irg));
607 /* a Block with only 1 predecessor needs no Phi */
608 n_cfgpreds = get_Block_n_cfgpreds(block);
609 if (n_cfgpreds == 1) {
610 ir_node *pred_block = get_Block_cfgpred_block(block, 0);
613 DB((dbg, LEVEL_5, "ssa 1 pred: walk pred %N\n", pred_block));
615 value = search_def_and_create_phis(pred_block, mode, 0);
616 set_irn_link(block, value);
617 mark_irn_visited(block);
622 /* create a new Phi */
623 NEW_ARR_A(ir_node*, in, n_cfgpreds);
624 for (i = 0; i < n_cfgpreds; ++i)
625 in[i] = new_Unknown(mode);
627 phi = new_r_Phi(block, n_cfgpreds, in, mode);
628 /* Important: always keep block phi list up to date. */
629 add_Block_phi(block, phi);
630 DB((dbg, LEVEL_5, "ssa phi creation: link new phi %N to block %N\n", phi, block));
631 set_irn_link(block, phi);
632 mark_irn_visited(block);
634 /* set Phi predecessors */
635 for (i = 0; i < n_cfgpreds; ++i) {
637 ir_node *pred_block = get_Block_cfgpred_block(block, i);
638 assert(pred_block != NULL);
639 pred_val = search_def_and_create_phis(pred_block, mode, 0);
641 assert(pred_val != NULL);
643 DB((dbg, LEVEL_5, "ssa phi pred:phi %N, pred %N\n", phi, pred_val));
644 set_irn_n(phi, i, pred_val);
650 static unsigned live_cond(ir_node *node, int pos)
652 ir_node *cp = get_inversion_copy(loop_head);
653 if(node==loop_head) {
654 return is_own_backedge(loop_head, pos);
655 } else if (node == cp) {
656 return !is_own_backedge(cp, pos);
664 static void construct_ssa_special(ir_node *orig_block, ir_node *orig_val,
665 ir_node *second_block, ir_node *second_val, is_live_edge *cond)
669 const ir_edge_t *edge;
670 const ir_edge_t *next;
674 assert(orig_block && orig_val && second_block && second_val &&
675 "no parameter of construct_ssa may be NULL");
677 /* no need to do anything */
678 if (orig_val == second_val)
681 irg = get_irn_irg(orig_val);
683 ir_reserve_resources(irg, IR_RESOURCE_IRN_VISITED);
684 inc_irg_visited(irg);
686 mode = get_irn_mode(orig_val);
687 set_irn_link(orig_block, orig_val);
688 mark_irn_visited(orig_block);
690 ssa_second_def_block = second_block;
691 ssa_second_def = second_val;
694 users = NEW_ARR_F(out_edge, 0);
696 foreach_out_edge_safe(orig_val, edge, next) {
697 ir_node *user = get_edge_src_irn(edge);
698 int j = get_edge_src_pos(edge);
706 ARR_APP1(out_edge, users, e);
709 /* Only fix the users of the first, i.e. the original node */
710 for (i = 0; i < ARR_LEN(users); ++i) {
711 out_edge e = users[i];
712 ir_node *user = e.node;
714 ir_node *user_block = get_nodes_block(user);
721 DB((dbg, LEVEL_5, "USER %N of original %N\n", user, orig_val));
724 int u_arity = get_irn_arity(user);
728 NEW_ARR_A(ir_node *, ins, u_arity);
729 for(i = 0; i < u_arity; ++i) {
731 ir_node *pred_block = get_Block_cfgpred_block(user_block, i);
732 DB((dbg, LEVEL_5, "SEARCH %N?\n", pred_block));
733 if (! cond(user_block, i)) {
734 DB((dbg, LEVEL_5, "SEARCH %N NO\n", pred_block));
735 ins[i] = get_irn_n(user, i);
738 ins[i] = search_def_and_create_phis(pred_block, mode, 1);
739 DB((dbg, LEVEL_5, "SEARCH %N RETURNED %N\n", ins[i]));
742 set_irn_in(user, u_arity, ins);
745 newval = search_def_and_create_phis(user_block, mode, 1);
746 if (newval != user && !is_Bad(newval))
747 set_irn_n(user, j, newval);
751 ir_free_resources(irg, IR_RESOURCE_IRN_VISITED);
757 * Given a set of values this function constructs SSA-form for the users of the
758 * first value (the users are determined through the out-edges of the value).
759 * Works without using the dominance tree.
761 static void construct_ssa(ir_node *orig_block, ir_node *orig_val,
762 ir_node *second_block, ir_node *second_val)
766 const ir_edge_t *edge;
767 const ir_edge_t *next;
769 assert(orig_block && orig_val && second_block && second_val &&
770 "no parameter of construct_ssa may be NULL");
772 /* no need to do anything */
773 if (orig_val == second_val)
776 irg = get_irn_irg(orig_val);
778 ir_reserve_resources(irg, IR_RESOURCE_IRN_VISITED);
779 inc_irg_visited(irg);
781 mode = get_irn_mode(orig_val);
782 set_irn_link(orig_block, orig_val);
783 mark_irn_visited(orig_block);
785 ssa_second_def_block = second_block;
786 ssa_second_def = second_val;
788 /* Only fix the users of the first, i.e. the original node */
789 foreach_out_edge_safe(orig_val, edge, next) {
790 ir_node *user = get_edge_src_irn(edge);
791 int j = get_edge_src_pos(edge);
792 ir_node *user_block = get_nodes_block(user);
799 DB((dbg, LEVEL_5, "original user %N\n", user));
802 ir_node *pred_block = get_Block_cfgpred_block(user_block, j);
803 newval = search_def_and_create_phis(pred_block, mode, 1);
805 newval = search_def_and_create_phis(user_block, mode, 1);
807 if (newval != user && !is_Bad(newval))
808 set_irn_n(user, j, newval);
811 ir_free_resources(irg, IR_RESOURCE_IRN_VISITED);
815 /* Construct ssa for def and all of its copies.
816 * NOTE: Overwrites links of blocks.
818 static ir_node *construct_ssa_n(ir_node *def, ir_node *user, int copies)
823 ir_graph *irg = get_irn_irg(def);
825 ir_reserve_resources(irg, IR_RESOURCE_IRN_VISITED);
826 inc_irg_visited(current_ir_graph);
827 mode = get_irn_mode(def);
829 set_irn_link(get_nodes_block(def), def);
830 mark_irn_visited(get_nodes_block(def));
832 for (i = 0; i < copies; ++i) {
833 ir_node *cp = get_copy(def, i);
834 ir_node *block = get_nodes_block(cp);
835 set_irn_link(block, cp);
836 mark_irn_visited(block);
838 DB((dbg, LEVEL_5, "ssa mark cp %N of def %N usr %N\n", cp, def, user));
842 user_arity = get_irn_arity(user);
843 for (i = 0; i < user_arity; ++i) {
844 ir_node *user_block = get_nodes_block(user);
845 if (get_irn_n(user, i) != def)
848 DB((dbg, LEVEL_5, "ssa_n found edge of user %N\n", user));
851 ir_node *pred_block = get_Block_cfgpred_block(user_block, i);
852 newval = search_def_and_create_phis(pred_block, mode);
854 newval = search_def_and_create_phis(user_block, mode);
856 /*TODO bads should not happen! */
857 if (newval != user) {
858 DB((dbg, LEVEL_5, "=> ssa_n new in is %N\n", newval));
859 set_irn_n(user, i, newval);
860 /*if (is_Phi(newval) && get_nodes_block(newval)== user_block) {
861 ir_free_resources(irg, IR_RESOURCE_IRN_VISITED);
866 /* BREAK, as we found and handled the definition */
870 if (is_Bad(newval)) {
871 DB((dbg, LEVEL_5, "=> ssa_n new in is BAD %N in graph %N\n", newval, current_ir_graph));
872 add_End_keepalive(get_irg_end(current_ir_graph), newval);
873 dump_ir_block_graph(current_ir_graph, "-bad");
880 ir_free_resources(irg, IR_RESOURCE_IRN_VISITED);
885 /* Returns the number of backedges with or without alien bes. */
886 static int get_backedge_n(ir_node *loophead, unsigned with_alien)
890 int arity = get_irn_arity(loophead);
891 for (i = 0; i < arity; ++i) {
892 ir_node *pred = get_irn_n(loophead, i);
893 if (is_backedge(loophead, i) && (with_alien || is_in_loop(pred)))
900 * Returns a raw copy of the given node.
901 * Attributes are kept/set according to the needs of loop inversion.
903 static ir_node *copy_node_inversion(ir_node *node)
908 cp = exact_copy(node);
909 arity = get_irn_arity(node);
911 for (i = 0; i < arity; ++i) {
912 if (is_backedge(node, i))
916 set_inversion_copy(node, cp);
917 DB((dbg, LEVEL_5, "set copy of %N to cp %N \n", node, cp));
920 /* We may not keep the old macroblock. */
921 set_Block_MacroBlock(cp, cp);
922 set_Block_mark(cp, 0);
929 * This walker copies all walked nodes.
930 * If the walk_condition is true for a node, it is copied.
931 * All nodes node_info copy attributes have to be NULL prior to every walk.
932 * TODO temporary nodes not necessary anymore.
934 static void copy_walk(ir_node *node, walker_condition *walk_condition,
941 ir_graph *irg = current_ir_graph;
944 * break condition and cycle resolver, creating temporary node copies
946 if (get_irn_visited(node) >= get_irg_visited(irg)) {
947 /* Here we rely on nodestate's copy being initialized with NULL */
948 DB((dbg, LEVEL_5, "copy_walk: We have already visited %N\n", node));
949 if (get_inversion_copy(node) == NULL) {
950 cp = copy_node_inversion(node);
951 DB((dbg, LEVEL_5, "The TEMP copy of %N is created %N\n", node, cp));
957 mark_irn_visited(node);
959 if (!is_Block(node)) {
960 ir_node *pred = get_nodes_block(node);
961 if (walk_condition(pred))
962 DB((dbg, LEVEL_5, "walk block %N\n", pred));
963 copy_walk(pred, walk_condition, set_loop);
966 arity = get_irn_arity(node);
968 NEW_ARR_A(ir_node *, cpin, arity);
970 for (i = 0; i < arity; ++i) {
971 ir_node *pred = get_irn_n(node, i);
973 if (walk_condition(pred)) {
974 DB((dbg, LEVEL_5, "walk node %N\n", pred));
975 copy_walk(pred, walk_condition, set_loop);
976 cpin[i] = get_inversion_copy(pred);
977 DB((dbg, LEVEL_5, "copy of %N gets new in %N which is copy of %N\n",
978 node, get_inversion_copy(pred), pred));
984 /* copy node / finalize temp node */
985 if (get_inversion_copy(node) == NULL) {
986 /* No temporary copy existent */
987 cp = copy_node_inversion(node);
988 DB((dbg, LEVEL_5, "The FINAL copy of %N is CREATED %N\n", node, cp));
990 /* temporary copy is existent but without correct ins */
991 cp = get_inversion_copy(node);
992 DB((dbg, LEVEL_5, "The FINAL copy of %N is EXISTENT %N\n", node, cp));
995 if (!is_Block(node)) {
996 ir_node *cpblock = get_inversion_copy(get_nodes_block(node));
998 set_nodes_block(cp, cpblock );
1000 add_Block_phi(cpblock, cp);
1003 /* Keeps phi list of temporary node. */
1004 set_irn_in(cp, ARR_LEN(cpin), cpin);
1007 /* Creates a new node from a given one, but with custom ins. */
1008 static ir_node *copy_node_changed(
1009 ir_node *node, ir_node *block, int arity, ir_node **ins, ir_loop *loop)
1012 DB((dbg, LEVEL_5, "New node from %N in %N block %N arity %d\n",
1013 node, get_irn_irg(node), block, arity));
1016 cp = new_rd_Phi(get_irn_dbg_info(node),
1020 get_irn_mode(node));
1024 /* We want to keep phi nodes with arity 1, as it makes rewiring a lot easier. */
1025 cp = new_ir_node(get_irn_dbg_info(node),
1034 add_Block_phi(block, cp);
1036 if (get_irn_op(node) == get_irn_op(cp))
1037 copy_node_attr(get_irn_irg(node), node, cp);
1039 /*new_backedge_info(cp); function removed */
1041 set_Block_MacroBlock(cp, cp);
1043 set_irn_loop(cp, loop);
1045 /* Blocks are copied first, so that future phi nodes populate the phi list. */
1047 set_Block_phis(cp, NULL);
1049 DB((dbg, LEVEL_5, "New node %N in %N block %N arity %d\n",
1050 cp, get_irn_irg(cp), block, arity));
1054 /* To be exchangeable, the unknown needs to have proper graph set. */
1055 static ir_node *new_node_unknown(ir_node *block, ir_mode *mode)
1057 ir_node *new = new_ir_node(NULL, get_irn_irg(block), block, op_Unknown, mode, 0, NULL);
1062 * Walker, to copy alle nodes that meet the walk_condition.
1063 * Blocks are copied first, which is important for the creation of the phi lists.
1066 static ir_node *copy_walk_n(ir_node *node, walker_condition *walk_condition, int copy_index)
1070 unsigned headblock = 0;
1071 ir_node *block, *cp, *new_cp;
1072 ir_node **ins = NULL;
1074 if (irn_visited(node)) {
1075 cp = get_unroll_copy(node, copy_index);
1077 DB((dbg, LEVEL_5, "Visited %N\n", node));
1080 DB((dbg, LEVEL_5, "Visited. Return copy %N\n", cp));
1084 if (!is_Block(node)) {
1085 ir_node *unknown = new_node_unknown(
1086 get_nodes_block(node),
1087 get_irn_mode(node));
1088 DB((dbg, LEVEL_5, " #### Create Unknown %N for %N\n", unknown, node));
1089 set_unroll_copy(node, copy_index, unknown);
1093 DB((dbg, LEVEL_5, "Walk %N\n", node));
1094 mark_irn_visited(node);
1096 /* Allocate array of copies */
1097 if (alloc_cp_arr > 0) {
1098 ir_node **arr = NEW_ARR_F(ir_node *, alloc_cp_arr);
1099 alloc_node_info(node);
1100 for (i = 0; i < alloc_cp_arr; ++i)
1102 set_copy_arr(node, arr);
1108 if (!is_Block(node)) {
1109 ir_node *orig_block = get_nodes_block(node);
1110 DB((dbg, LEVEL_5, "current node: %N about to walk block %N\n", node, orig_block));
1111 /* Check walk_condition for blocks not necessary */
1113 if (orig_block == loop_head && is_Phi(node))
1116 block = copy_walk_n(orig_block, walk_condition, copy_index);
1117 DB((dbg, LEVEL_5, "current node: %N block %N blocks copy %N\n",
1118 node, orig_block, block));
1120 if (node == loop_head)
1125 arity = get_irn_arity(node);
1128 /* Headblock and headblock-phi case */
1130 DB((dbg, LEVEL_5, "Headblock/Phi from headblock\n"));
1131 NEW_ARR_A(ir_node *, ins, unroll_arity);
1133 for (i = 0; i < arity; ++i) {
1135 ir_node *pred = get_irn_n(node, i);
1136 if (is_backedge(loop_head, i)) {
1137 if (walk_condition(pred)) {
1138 ret = copy_walk_n(pred, walk_condition, copy_index);
1139 DB((dbg, LEVEL_5, "in %d = %N walked\n", i, ret));
1142 DB((dbg, LEVEL_5, "in %d = %N not walked\n", i, ret));
1147 arity = unroll_arity;
1150 NEW_ARR_A(ir_node *, ins, arity);
1152 for (i = 0; i < arity; ++i) {
1154 ir_node *pred = get_irn_n(node, i);
1155 if (walk_condition(pred)) {
1156 ret = copy_walk_n(pred, walk_condition, copy_index);
1157 DB((dbg, LEVEL_5, "in %d = %N walked\n", i, ret));
1160 DB((dbg, LEVEL_5, "in %d = %N not walked\n", i, ret));
1166 /* NOW, after the walk, we may check if the blocks copy is already present. */
1167 cp = get_unroll_copy(node, copy_index);
1168 if (cp != NULL && !is_Unknown(cp)) {
1169 DB((dbg, LEVEL_5, "Must be Block %N copy is already present\n", node, cp));
1170 assert(is_Block(cp) &&
1171 "sanity: The copy should have been a block.");
1175 new_cp = copy_node_changed(node, block, arity, ins, cur_loop);
1176 set_unroll_copy(node, copy_index, new_cp);
1178 if (cp != NULL && is_Unknown(cp)) {
1179 DB((dbg, LEVEL_5, "Found unknown %N in %N now exchanged by %N in %N\n",
1180 cp, get_irn_irg(cp), new_cp, get_irn_irg(new_cp)));
1181 exchange(cp, new_cp);
1188 static void unmark_not_allowed_cc_blocks(void)
1190 int blocks = ARR_LEN(cc_blocks);
1193 for(i = 0; i < blocks; ++i) {
1194 ir_node *block = cc_blocks[i];
1195 if (block == loop_head)
1198 if (block != loop_head) {
1200 int arity = get_irn_arity(block);
1202 for(a = 0; a < arity; ++a) {
1203 if (! is_nodes_block_marked(get_irn_n(block, a))) {
1204 set_Block_mark(block, 0);
1205 --head_inversion_block_count;
1206 DB((dbg, LEVEL_5, "Removed %N from cc (blocks in cc %d)\n",
1207 block, head_inversion_block_count));
1216 * Populates head_entries with (node, pred_pos) tuple
1217 * whereas the node's pred at pred_pos is in the head but not the node itself.
1218 * Head and condition chain blocks must be marked.
1220 static void get_head_outs(ir_node *node, void *env)
1223 int arity = get_irn_arity(node);
1226 for (i = 0; i < arity; ++i) {
1227 if (!is_nodes_block_marked(node) && is_nodes_block_marked(get_irn_n(node, i))) {
1231 entry.pred = get_irn_n(node, i);
1232 ARR_APP1(out_edge, cur_head_outs, entry);
1237 arity = get_irn_arity(loop_head);
1239 if (is_Phi(node) && get_nodes_block(node) == loop_head) {
1240 for (i = 0; i < arity; ++i) {
1241 if (is_own_backedge(loop_head, i)) {
1242 if (is_nodes_block_marked(get_irn_n(node, i))) {
1246 entry.pred = get_irn_n(node, i);
1247 ARR_APP1(out_edge, head_df_loop, entry);
1248 DB((dbg, LEVEL_5, "Found incc assignment node %N @%d is pred %N, graph %N %N\n",
1249 node, i, entry.pred, current_ir_graph, get_irg_start_block(current_ir_graph)));
1256 if (is_Phi(node) && get_nodes_block(node) == loop_head)
1258 unsigned needs_ssa = 1;
1259 unsigned loop_invar = 1;
1260 for (i = 0; i < arity; ++i) {
1261 if (is_own_backedge(node, i)) {
1262 if (get_irn_n(node, i) != node)
1265 if (is_nodes_block_marked(get_irn_n(node, i)))
1269 /* Construct ssa for all uses of this phi, as there will be a copy of it. */
1270 if(needs_ssa && !loop_invar) {
1271 const ir_edge_t *edge;
1272 DB((dbg, LEVEL_5, "NEEDS SSA users of phi %N\n", node));
1273 foreach_out_edge(node, edge) {
1275 entry.node = get_edge_src_irn(edge);
1276 DB((dbg, LEVEL_5, "NEEDS SSA user %N\n", entry.node));
1277 entry.pos = get_edge_src_pos(edge);
1278 ARR_APP1(out_edge, cur_head_outs, entry);
1286 * Find condition chains, and add them to be inverted
1287 * A block belongs to the chain if a condition branches out of the loop.
1288 * Returns 1 if the given block belongs to the condition chain.
1290 static unsigned find_condition_chain(ir_node *block)
1292 const ir_edge_t *edge;
1294 unsigned has_be = 0;
1297 mark_irn_visited(block);
1299 DB((dbg, LEVEL_5, "condition_chains for block %N\n", block));
1302 foreach_out_edge(block, edge) {
1303 ir_node *src = get_edge_src_irn( edge );
1305 if (!is_Block(src) && !is_Jmp(src)) {
1310 foreach_block_succ(block, edge) {
1311 ir_node *src = get_edge_src_irn( edge );
1312 int pos = get_edge_src_pos( edge );
1314 if (!is_in_loop(src))
1317 /* Inverting blocks with backedge outs leads to a cf edge
1318 * from the inverted head, into the inverted head (skipping the body).
1319 * As the body becomes the new loop head,
1320 * this would introduce another loop in the existing loop.
1321 * This loop inversion cannot cope with this complex case. */
1322 if (is_backedge(src, pos)) {
1328 /* We need all predecessors to already belong to the condition chain.
1339 if ((jmp_only == 1 || mark == 1) && has_be == 0) {
1340 set_Block_mark(block, 1);
1341 ++head_inversion_block_count;
1342 DB((dbg, LEVEL_5, "block %N is part of condition chain\n", block));
1343 /*head_inversion_node_count += nodes_n;*/
1344 ARR_APP1(ir_node *, cc_blocks, block);
1346 set_Block_mark(block, 0);
1349 foreach_block_succ(block, edge) {
1350 ir_node *src = get_edge_src_irn( edge );
1352 if (is_in_loop(src) && ! irn_visited(src))
1353 find_condition_chain(src);
1360 /* Second: walk all successors,
1361 * and add them to the list if they are not part of the chain */
1362 foreach_block_succ(block, edge) {
1364 ir_node *src = get_edge_src_irn( edge );
1365 int pos = get_edge_src_pos( edge );
1368 if (!is_in_loop( src ) || irn_visited(src)) {
1372 mark_irn_visited(src);
1373 DB((dbg, LEVEL_5, "condition chain walk %N\n", src));
1374 inchain = find_condition_chains(src);
1376 /* if successor is not part of chain we need to collect its outs */
1381 ARR_APP1(out_edge, cond_chain_entries, entry);
1389 * Rewires the copied condition chain. Removes backedges.
1390 * as this condition chain is prior to the loop.
1391 * Copy of loop_head must have phi list and old (unfixed) backedge info of the loop head.
1392 * (loop_head is already fixed, we cannot rely on it.)
1394 static void fix_copy_inversion(void)
1399 ir_node *phi, *next;
1400 ir_node *head_cp = get_inversion_copy(loop_head);
1401 int arity = get_irn_arity(head_cp);
1402 int backedges = get_backedge_n(head_cp, 0);
1403 int new_arity = arity - backedges;
1407 NEW_ARR_A(ir_node *, ins, new_arity);
1410 /* Remove block backedges */
1411 for(i = 0; i < arity; ++i) {
1412 if (!is_backedge(head_cp, i))
1413 ins[pos++] = get_irn_n(head_cp, i);
1416 new_head = new_Block(new_arity, ins);
1418 phis = NEW_ARR_F(ir_node *, 0);
1420 for_each_phi_safe(get_Block_phis(head_cp), phi, next) {
1422 NEW_ARR_A(ir_node *, ins, new_arity);
1424 for(i = 0; i < arity; ++i) {
1425 if (!is_backedge(head_cp, i))
1426 ins[pos++] = get_irn_n(phi, i);
1428 new_phi = new_rd_Phi(get_irn_dbg_info(phi),
1429 new_head, new_arity, ins,
1431 ARR_APP1(ir_node *, phis, new_phi);
1435 for_each_phi_safe(get_Block_phis(head_cp), phi, next) {
1436 exchange(phi, phis[pos++]);
1439 exchange(head_cp, new_head);
1445 /* Takes a phi from the inverted loop head and its pred
1446 * which is assigned in the condition chain.
1448 * Returns new phi, created in the new loop head.
1449 * If there is an assignment in the condition chain
1450 * with a user also in the condition chain,
1451 * the dominance frontier is in the new loop head.
1452 * The dataflow loop is completely in the condition chain.
1456 * Phi_cp | | copied condition chain
1460 * Phi* | | new loop head with newly created phi.
1462 * Phi | | original, inverted condition chain
1467 static ir_node *fix_inner_cc_definitions(ir_node *phi, ir_node *pred)
1471 ir_node **head_phi_ins;
1472 ir_node *new_loop_head = loop_info.new_loop_head;
1473 int new_loop_head_arity = get_irn_arity(new_loop_head);
1474 for(h = 0; h < ARR_LEN(loop_info.new_loop_heads); ++h) {
1475 ir_node *head = loop_info.new_loop_heads[h];
1477 inc_irg_visited(current_ir_graph);
1479 mark_irn_visited(get_nodes_Block(pred));
1480 mark_irn_visited(get_inversion_copy(get_nodes_Block(pred)));
1482 search_def_and_create_phis(head, get_irn_mode(phi));
1489 DB((dbg, LEVEL_5, "NEW HEAD %N arity %d\n", new_loop_head, new_loop_head_arity));
1491 NEW_ARR_A(ir_node *, head_phi_ins, new_loop_head_arity);
1493 for(i = 0; i < new_loop_head_arity; ++i) {
1494 /* Rely on correct backedge info for the new loop head
1495 * is set by extend_ins_by_copy. */
1497 DB((dbg, LEVEL_5, "CHECK mark of pred of %N @%d %N\n",
1498 new_loop_head, i, get_irn_n(new_loop_head, i)));
1500 if(is_nodes_block_marked(get_irn_n(new_loop_head, i))) {
1501 head_phi_ins[i] = pred;
1502 DB((dbg, LEVEL_5, "NEW HEAD %N pred %d %N is in cc\n", new_loop_head, i, pred));
1504 ir_node *cp = get_inversion_copy(pred);
1505 /* As pred is in the condition chain there has to be a copy. */
1507 head_phi_ins[i] = cp;
1509 DB((dbg, LEVEL_5, "NEW HEAD %N pred %d %N is in not in cc so get copy %N\n",
1510 new_loop_head, i, pred, head_phi_ins[i] ));
1512 DB((dbg, LEVEL_5, "NEW HEAD %N in %N %N\n",
1513 new_loop_head, head_phi_ins[i], current_ir_graph));
1516 head_phi= new_r_Phi(new_loop_head,
1517 new_loop_head_arity,
1521 add_Block_phi(new_loop_head, head_phi);
1523 DB((dbg, LEVEL_5, "fix inner cc definitions: new head %N has new phi %N from cc phi %N @%N\n",
1524 new_loop_head, head_phi, phi, pred));
1532 /* Puts the original condition chain at the end of the loop,
1533 * subsequently to the body.
1534 * Relies on block phi list and correct backedges.
1536 static void fix_head_inversion(void)
1540 ir_node *phi, *next;
1542 int arity = get_irn_arity(loop_head);
1543 int backedges = get_backedge_n(loop_head, 0);
1544 int new_arity = backedges;
1548 NEW_ARR_A(ir_node *, ins, new_arity);
1551 /* Keep only backedges */
1552 for(i = 0; i < arity; ++i) {
1553 if (is_own_backedge(loop_head, i))
1554 ins[pos++] = get_irn_n(loop_head, i);
1557 new_head = new_Block(new_arity, ins);
1559 phis = NEW_ARR_F(ir_node *, 0);
1561 for_each_phi(loop_head, phi) {
1563 DB((dbg, LEVEL_5, "Fixing phi %N of loop head\n", phi));
1565 NEW_ARR_A(ir_node *, ins, new_arity);
1568 for (i = 0; i < arity; ++i) {
1569 ir_node *pred = get_irn_n(phi, i);
1571 if (is_own_backedge(loop_head, i)) {
1572 /* If assignment is in the condition chain,
1573 * we need to create a phi in the new loop head.
1574 * This can only happen for df, not cf. See find_condition_chains. */
1575 if (is_nodes_block_marked(pred)) {
1576 /* Cannot do this here. */
1577 ins[pos++] = pred; /*fix_inner_cc_definitions(phi, pred);*/
1585 new_phi = new_rd_Phi(get_irn_dbg_info(phi),
1586 new_head, new_arity, ins,
1589 ARR_APP1(ir_node *, phis, new_phi);
1591 DB((dbg, LEVEL_5, "fix inverted head should exch %N by %N (arity %d)\n", phi, new_phi, pos ));
1595 for_each_phi_safe(get_Block_phis(loop_head), phi, next) {
1596 DB((dbg, LEVEL_5, "fix inverted exch phi %N by %N\n", phi, phis[pos]));
1597 if (phis[pos] != phi)
1598 exchange(phi, phis[pos++]);
1603 DB((dbg, LEVEL_5, "fix inverted head exch head block %N by %N\n", loop_head, new_head));
1604 exchange(loop_head, new_head);
1607 /* Does the loop inversion. */
1608 static void inversion_walk(out_edge *head_entries)
1613 * The order of rewiring bottom-up is crucial.
1614 * Any change of the order leads to lost information that would be needed later.
1617 ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED);
1619 /* 1. clone condition chain */
1620 inc_irg_visited(current_ir_graph);
1622 for (i = 0; i < ARR_LEN(head_entries); ++i) {
1623 out_edge entry = head_entries[i];
1624 ir_node *pred = get_irn_n(entry.node, entry.pos);
1626 DB((dbg, LEVEL_5, "\nInit walk block %N\n", pred));
1628 copy_walk(pred, is_nodes_block_marked, cur_loop);
1631 ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED);
1633 /* 2. Extends the head control flow successors ins
1634 * with the definitions of the copied head node. */
1635 for (i = 0; i < ARR_LEN(head_entries); ++i) {
1636 out_edge head_out = head_entries[i];
1638 if (is_Block(head_out.node))
1639 extend_ins_by_copy(head_out.node, head_out.pos);
1642 /* 3. construct_ssa for users of definitions in the condition chain,
1643 * as there is now a second definition. */
1644 for (i = 0; i < ARR_LEN(head_entries); ++i) {
1645 out_edge head_out = head_entries[i];
1647 /* Ignore keepalives */
1648 if (is_End(head_out.node))
1651 /*if (!is_Block(head_out.node))
1652 assert(is_nodes_block_marked(head_out.pred) && "damn");*/
1654 /* Construct ssa for assignments in the condition chain. */
1655 if (!is_Block(head_out.node)) {
1656 ir_node *pred, *cppred, *block, *cpblock;
1658 pred = head_out.pred;
1659 cppred = get_inversion_copy(pred);
1660 block = get_nodes_block(pred);
1661 cpblock = get_nodes_block(cppred);
1662 construct_ssa(block, pred, cpblock, cppred);
1666 for (i = 0; i < ARR_LEN(head_df_loop); ++i) {
1667 out_edge head_out = head_df_loop[i];
1669 /* Construct ssa for assignments in the condition chain. */
1670 ir_node *pred, *cppred, *block, *cpblock;
1672 pred = head_out.pred;
1673 cppred = get_inversion_copy(pred);
1674 assert(cppred && pred);
1675 block = get_nodes_block(pred);
1676 cpblock = get_nodes_block(cppred);
1677 construct_ssa(block, pred, cpblock, cppred);
1680 /* 4. Remove the ins which are no backedges from the original condition chain
1681 * as the cc is now subsequently to the body. */
1682 fix_head_inversion();
1684 /* 5. Remove the backedges of the copied condition chain,
1685 * because it is going to be the new 'head' in advance to the loop */
1686 fix_copy_inversion();
1689 /* Analyse loop, if inversion is possible or reasonable. Then do the inversion. */
1690 static void loop_inversion(void)
1692 unsigned do_inversion = 1;
1693 unsigned has_cc = 0;
1694 /*inversion_head_node_limit = INT_MAX;*/
1695 ir_reserve_resources(current_ir_graph, IR_RESOURCE_BLOCK_MARK);
1697 /* Reset block marks.
1698 * We use block marks to flag blocks of the original condition chain. */
1699 irg_walk_graph(current_ir_graph, reset_block_mark, NULL, NULL);
1701 loop_info.blocks = get_loop_n_blocks(cur_loop);
1702 cond_chain_entries = NEW_ARR_F(out_edge, 0);
1703 head_df_loop = NEW_ARR_F(out_edge, 0);
1705 head_inversion_node_count = 0;
1706 head_inversion_block_count = 0;
1708 /* Use phase to keep copy of nodes from the condition chain. */
1709 phase = new_phase(current_ir_graph, phase_irn_init_default);
1711 /* Search for condition chains. */
1712 cc_blocks = NEW_ARR_F(ir_node *, 0);
1713 inc_irg_visited(current_ir_graph);
1714 has_cc = find_condition_chain(loop_head);
1716 unmark_not_allowed_cc_blocks();
1717 DEL_ARR_F(cc_blocks);
1720 DB((dbg, LEVEL_3, "Loop contains %d blocks.\n", loop_info.blocks));
1722 if (loop_info.blocks < 2) {
1725 "Loop contains %d (less than 2) blocks => No Inversion done.\n",
1730 /* We also catch endless loops here,
1731 * because they do not have a condition chain. */
1732 if (head_inversion_block_count < 1) {
1735 "Loop contains %d (less than 1) invertible blocks => No Inversion done.\n",
1736 head_inversion_block_count));
1740 cur_head_outs = NEW_ARR_F(out_edge, 0);
1742 /* Get all edges pointing into the condition chain. */
1743 irg_walk_graph(current_ir_graph, get_head_outs, NULL, NULL);
1745 /* Do the inversion */
1746 inversion_walk(cur_head_outs);
1748 DEL_ARR_F(cur_head_outs);
1750 /* Duplicated blocks changed doms */
1751 set_irg_doms_inconsistent(current_ir_graph);
1752 /* Loop content changed */
1753 set_irg_loopinfo_inconsistent(current_ir_graph);
1754 /* TODO are they? Depends on set_irn_in and set_irn_n exchange and new_node. */
1755 set_irg_outs_inconsistent(current_ir_graph);
1760 DEL_ARR_F(cond_chain_entries);
1761 DEL_ARR_F(head_df_loop);
1763 ir_free_resources(current_ir_graph, IR_RESOURCE_BLOCK_MARK);
1767 * Rewire floating copies of the current loop.
1769 static void place_copies(int copies)
1771 ir_node *loophead = loop_head;
1772 int headarity = get_irn_arity(loophead);
1777 /* Append the copies to the existing loop. */
1778 for (i = 0; i < headarity; ++i) {
1779 /* Build unrolled loop top down */
1780 if (is_backedge(loophead, i) && !is_alien_edge(loophead, i)) {
1781 for (c = 0; c < copies; ++c) {
1782 ir_node *upper_head;
1783 ir_node *lower_head = get_unroll_copy(loophead, c);
1784 ir_node *upper_pred;
1787 upper_head = loophead;
1788 upper_pred = get_irn_n(loophead, i);
1790 upper_head = get_unroll_copy(loophead, c - 1);
1791 upper_pred = get_copy_safe(get_irn_n(loophead, i), c - 1);
1793 set_irn_n(lower_head, pos, upper_pred);
1794 for_each_phi(loophead, phi) {
1795 ir_node *lower_phi, *upper_phi, *upper_pred_phi;
1796 lower_phi = get_unroll_copy(phi, c);
1799 upper_pred_phi = get_irn_n(phi, i);
1801 upper_phi = get_unroll_copy(phi, c - 1);
1802 upper_pred_phi = get_copy_safe(get_irn_n(phi, i), c - 1);
1805 DB((dbg, LEVEL_5, "Rewire %N @%d to %N\n",
1806 lower_phi, pos, upper_pred_phi));
1808 /* Relying on phi nodes with 1 in to be present. */
1809 assert(is_Phi(lower_phi));
1811 /* Do not fix phis here. Chained phis will cause a defective copy array. */
1813 if (get_irn_arity(lower_phi) == 1) {
1814 ir_node *single_in[1];
1816 single_in[0] = upper_pred_phi;
1818 cp = new_rd_Phi(get_irn_dbg_info(lower_phi),
1819 get_nodes_block(lower_phi), 1, single_in,
1820 get_irn_mode(lower_phi));
1822 set_copy(phi, c, cp);
1823 exchange(lower_phi, cp);
1826 set_irn_n(lower_phi, pos, upper_pred_phi);
1831 /* Fix the topmost loop heads backedges. */
1832 set_irn_n(loophead, i, get_unroll_copy(get_irn_n(loophead, i), copies - 1));
1833 for_each_phi(loophead, phi) {
1834 ir_node *last_cp = get_copy_safe(get_irn_n(phi, i), copies - 1);
1835 set_irn_n(phi, i, last_cp);
1841 /* Copies the cur_loop */
1842 static void copy_loop(out_edge *cur_loop_outs, int copies)
1845 unroll_arity = get_backedge_n(loop_head, 0);
1847 ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED);
1850 for (c = 0; c < copies; ++c) {
1852 inc_irg_visited(current_ir_graph);
1854 DB((dbg, LEVEL_5, " ### Copy_loop copy n: %d ###\n", c));
1855 for (i = 0; i < ARR_LEN(cur_loop_outs); ++i) {
1856 out_edge entry = cur_loop_outs[i];
1857 ir_node *pred = get_irn_n(entry.node, entry.pos);
1859 copy_walk_n(pred, is_in_loop, c);
1863 ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED);
1868 static void create_duffs_device(int unroll_number, ir_node *start, ir_node *iter, ir_node *end)
1870 (void)unroll_number;
1881 /* Returns 1 if given node is loop invariant. */
1882 static unsigned is_loop_invariant_val(ir_node *node)
1886 if (! is_in_loop(node))
1889 /* If this is a phi of the loophead shared by more than 1 loop,
1890 * we need to check if all defs are not in the loop. */
1892 for (i = 0; i < get_irn_arity(node); ++i) {
1893 if (is_own_backedge(get_nodes_block(node), i) && get_irn_n(node, i) != node)
1901 /* Searches and returns add node, that is used to change the iteration value of the current loop.
1902 * Also finds start_val*/
1903 static unsigned get_start_and_add(ir_node *iteration_phi)
1906 ir_node *found_add = NULL;
1907 int arity = get_irn_arity(iteration_phi);
1909 for (i = 0; i < arity; ++i) {
1911 /* Find start_val which needs to be pred of the iteration_phi.
1912 * If start_val alredy known, sanity check. */
1913 if (!is_backedge(get_nodes_block(loop_info.iteration_phi), i)) {
1914 ir_node *found_start_val = get_irn_n(loop_info.iteration_phi,i);
1916 /* We already found a start_val, then it has to be the same. */
1917 if (!is_Const(found_start_val) || !mode_is_int(get_irn_mode(found_start_val))
1918 || (loop_info.start_val && found_start_val != loop_info.start_val))
1921 loop_info.start_val = found_start_val;
1924 /* The phi has to be in the loop head.
1925 * Follow all own backedges. Every value supplied from these preds of the phi
1926 * needs to origin from the same add. */
1927 if (is_own_backedge(get_nodes_block(loop_info.iteration_phi), i)) {
1928 ir_node *new_found = get_irn_n(loop_info.iteration_phi,i);
1930 if (!is_Add(new_found) || (found_add && found_add != new_found))
1933 found_add = new_found;
1939 /* Returns 1 if one pred of node is a const value and the other is not.
1940 * const_pred and other are set analogously. */
1941 static unsigned get_const_pred(ir_node *node, ir_node **const_pred, ir_node **other)
1943 ir_node *pred0 = get_irn_n(node, 0);
1944 ir_node *pred1 = get_irn_n(node, 1);
1949 if (is_Const(pred0) && mode_is_int(get_irn_mode(pred0))) {
1950 const_pred = &pred0;
1954 if (is_Const(pred1) && mode_is_int(get_irn_mode(pred1))) {
1955 if (const_pred != NULL)
1956 /* RETURN. We do not want both preds to be constant. */
1960 const_pred = &pred1;
1965 static void create_duffs_block(void)
1967 /* Create duffs device block */
1968 mode = get_irn_mode(loop_info.add)
1969 block = new_Block(0, NULL);
1970 /*set_irg_current_Block(current_ir_graph, block);*/
1972 unroll_mod_const = new_Const(unroll_number);
1979 op_pin_state_pinned);
1988 /* Check if application of duffs device is possible.
1989 * Checks mainly if cur_loop is a simple counting loop. */
1990 static unsigned get_unroll_decision(void)
1992 ir_node *projx, *cond, *projres, *loop_condition, *iteration_path;
1996 /* There is more than 1 condition for the loop, */
1997 if (loop_info.cf_outs != 1)
2000 /*TODO test for impact on performance */
2001 if (loop_info.calls > 0)
2004 /* find value on which loop exit depends */
2005 projx = get_irn_n(loop_info.cf_out.node, loop_info.cf_out.pos);
2006 cond = get_irn_n(projx, 0);
2007 projres = get_irn_n(cond, 0);
2008 loop_condition = get_irn_n(projres, 0);
2010 DB((dbg, LEVEL_2, " loop condition %N in graph %N\n", loop_condition, current_ir_graph));
2012 /* One in of the loop condition needs to be loop invariant. => end_val
2013 * The other in is assigned by an add. =>add
2014 * The add uses a loop invariant value => step
2015 * and a phi with a loop invariant start_val and the add node as ins.
2026 /* Find the end_val, which has to be loop invariant. */
2027 /* Assumption that condition is cmp or mod. TODO other? */
2028 success = get_const_pred(loop_condition, &loop_info.end_val, &iteration_path);
2032 DB((dbg, LEVEL_2, "End_val %N\n", loop_info.end_val));
2034 /* We may find the add or the phi first. */
2035 if (is_Add(iteration_path)) {
2036 loop_info.add = iteration_path;
2037 /* Preds of the add should be step and the iteration_phi */
2038 success = get_const_pred(loop_info.add, &loop_info.step, &loop_info.iteration_phi);
2042 if (! is_Phi(loop_info.iteration_phi))
2044 } else if (is_Phi(iteration_path)) {
2045 loop_info.iteration_phi = iteration_path;
2051 /* Find start_val and add node.
2052 * Does necessary sanity check of add, if it is already set. */
2053 success = get_start_and_add(loop_info.iteration_phi);
2057 /*space = get_tarval(loop_info.end_val) - get_tarval(loop_info.start_val);
2058 iterations = space / get_tarval(loop_info.step);*/
2063 /* Initialize unrolling phase. TODO alloc array here? */
2064 void *init_unrolling_phase(ir_phase *ph, const ir_node *n, void *old)
2066 unrolling_node_info *info = XMALLOCZ(unrolling_node_info);
2067 ir_node **arr = NEW_ARR_F(ir_node *, unroll_number - 1);
2069 memset(info->copies, 0, (unroll_number - 1) * sizeof(ir_node *));
2072 phase_set_irn_data(ph, n, info);
2080 static void unroll_loop(void)
2084 cur_loop_outs = NEW_ARR_F(out_edge, 0);
2087 irg_walk_graph(current_ir_graph, get_loop_outs, NULL, NULL);
2089 unroll_number = get_unroll_decision();
2092 /*dump_ir_block_graph(current_ir_graph, "-pre");*/
2094 if (unroll_number) {
2095 /* Use phase to keep copy of nodes from the condition chain. */
2096 phase = new_phase(current_ir_graph, phase_irn_init_default);
2098 /* create a new loop head and copy the loop as required. */
2099 /*create_duffs_device(unroll_number);*/
2101 /* Copies the loop */
2102 copy_loop(cur_loop_outs, unroll_number - 1);
2103 /*dump_ir_block_graph(current_ir_graph, "-cp");*/
2105 /* Line up the floating copies. */
2106 place_copies(unroll_number - 1);
2107 /*dump_ir_block_graph(current_ir_graph, "-ord");*/
2109 /* Extend loop successors */
2110 ssa_outs = extend_ins(cur_loop_outs, unroll_number - 1);
2111 /*dump_ir_block_graph(current_ir_graph, "-fixed")*/ ;
2113 /* Generate phis for all loop outs */
2114 for (i = 0; i < ARR_LEN(ssa_outs); ++i) {
2115 out_edge entry = ssa_outs[i];
2116 ir_node *node = entry.node;
2117 ir_node *pred = get_irn_n(entry.node, entry.pos);
2119 DB((dbg, LEVEL_5, "Do? construct_ssa_n def %N node %N pos %d\n",
2120 pred, node, entry.pos));
2122 if (!is_End(node)) {
2123 DB((dbg, LEVEL_5, "construct_ssa_n def %N node %N pos %d\n",
2124 pred, node, entry.pos));
2127 construct_ssa_n(pred, node, unroll_number - 1);*/
2130 construct_ssa_n(pred, new_phi, unroll_number - 1);*/
2133 /*dump_ir_block_graph(current_ir_graph, "-ssa-done");*/
2135 /*irg_walk_graph(current_ir_graph, correct_phis, NULL, NULL);*/
2137 DEL_ARR_F(ssa_outs);
2139 set_irg_doms_inconsistent(current_ir_graph);
2140 set_irg_loopinfo_inconsistent(current_ir_graph);
2141 set_irg_outs_inconsistent(current_ir_graph);
2143 DEL_ARR_F(cur_loop_outs);
2146 /* Initialization and */
2147 static void init_analyze(ir_loop *loop)
2149 /* Init new for every loop */
2153 loop_head_valid = 1;
2154 loop_inv_head = NULL;
2155 loop_peeled_head = NULL;
2157 /* Reset loop info */
2158 memset(&loop_info, 0, sizeof(loop_info_t));
2161 DB((dbg, LEVEL_2, " >>>> current loop includes node %N <<<\n",
2162 get_loop_node(loop, 0)));
2165 irg_walk_graph(current_ir_graph, get_loop_info, NULL, NULL);
2168 /* RETURN if there is no valid head */
2169 if (!loop_head || !loop_head_valid) {
2170 DB((dbg, LEVEL_2, "No valid loop head. Nothing done.\n"));
2173 DB((dbg, LEVEL_2, "Loophead: %N\n", loop_head));
2177 if (enable_inversion)
2180 if (enable_unrolling)
2184 DB((dbg, LEVEL_2, " <<<< end of loop with node %N >>>>\n",
2185 get_loop_node(loop, 0)));
2188 /* Find most inner loops and send them to analyze_loop */
2189 static void find_most_inner_loop(ir_loop *loop)
2191 /* descend into sons */
2192 int sons = get_loop_n_sons(loop);
2195 ARR_APP1(ir_loop *, loops, loop);
2198 for (s=0; s<sons; s++) {
2199 find_most_inner_loop(get_loop_son(loop, s));
2205 * Assure preconditions are met and go through all loops.
2207 void loop_optimization(ir_graph *irg)
2214 set_current_ir_graph(irg);
2217 assure_irg_outs(irg);
2219 /* NOTE: sets only the loop attribute of blocks, not nodes */
2220 /* NOTE: Kills links */
2221 assure_cf_loop(irg);
2223 ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK | IR_RESOURCE_PHI_LIST);
2224 collect_phiprojs(irg);
2225 ir_free_resources(irg, IR_RESOURCE_IRN_LINK);
2227 loop = get_irg_loop(irg);
2228 sons = get_loop_n_sons(loop);
2230 loops = NEW_ARR_F(ir_loop *, 0);
2231 /* List all inner loops */
2232 for (nr = 0; nr < sons; ++nr) {
2233 find_most_inner_loop(get_loop_son(loop, nr));
2236 ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK);
2237 /* Set all links NULL */
2238 irg_walk_graph(current_ir_graph, reset_link, NULL, NULL);
2240 for (i = 0; i < ARR_LEN(loops); ++i) {
2241 ir_loop *loop = loops[i];
2244 /*dump_ir_block_graph(current_ir_graph, "-part");*/
2245 collect_phiprojs(irg);
2248 irg_walk_graph(current_ir_graph, reset_link, NULL, NULL);
2252 assure_cf_loop(irg);
2254 dump_ir_block_graph(current_ir_graph, "-DONE");*/
2257 ir_free_resources(irg, IR_RESOURCE_IRN_LINK);
2258 ir_free_resources(irg, IR_RESOURCE_PHI_LIST);
2261 void do_loop_unrolling(ir_graph *irg)
2263 enable_unrolling = 1;
2265 enable_inversion = 0;
2267 DB((dbg, LEVEL_2, " >>> unrolling (Startnode %N) <<<\n",
2268 get_irg_start(irg)));
2270 loop_optimization(irg);
2272 DB((dbg, LEVEL_2, " >>> unrolling done (Startnode %N) <<<\n",
2273 get_irg_start(irg)));
2276 void do_loop_inversion(ir_graph *irg)
2278 enable_unrolling = 0;
2280 enable_inversion = 1;
2282 DB((dbg, LEVEL_2, " >>> inversion (Startnode %N) <<<\n",
2283 get_irg_start(irg)));
2285 loop_optimization(irg);
2287 DB((dbg, LEVEL_2, " >>> inversion done (Startnode %N) <<<\n",
2288 get_irg_start(irg)));
2291 void do_loop_peeling(ir_graph *irg)
2293 enable_unrolling = 0;
2295 enable_inversion = 0;
2297 DB((dbg, LEVEL_2, " >>> peeling (Startnode %N) <<<\n",
2298 get_irg_start(irg)));
2300 loop_optimization(irg);
2302 DB((dbg, LEVEL_2, " >>> peeling done (Startnode %N) <<<\n",
2303 get_irg_start(irg)));
2307 ir_graph_pass_t *loop_inversion_pass(const char *name)
2309 return def_graph_pass(name ? name : "loop_inversion", do_loop_inversion);
2312 ir_graph_pass_t *loop_unroll_pass(const char *name)
2314 return def_graph_pass(name ? name : "loop_unroll", do_loop_unrolling);
2317 ir_graph_pass_t *loop_peeling_pass(const char *name)
2319 return def_graph_pass(name ? name : "loop_peeling", do_loop_peeling);
2322 void firm_init_loop_opt(void)
2324 FIRM_DBG_REGISTER(dbg, "firm.opt.loop");