2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @author Christian Helmer
23 * @brief loop inversion and loop unrolling
30 #include "iroptimize.h"
48 #include "irbackedge_t.h"
49 #include "irphase_t.h"
53 DEBUG_ONLY(static firm_dbg_module_t *dbg);
56 * Convenience macro for iterating over every phi node of the given block.
57 * Requires phi list per block.
59 #define for_each_phi(block, phi) \
60 for ((phi) = get_Block_phis( (block) ); (phi) ; (phi) = get_Phi_next((phi)))
62 #define for_each_phi_safe(head, phi, next) \
63 for ((phi) = (head), (next) = (head) ? get_Phi_next((head)) : NULL; \
64 (phi) ; (phi) = (next), (next) = (next) ? get_Phi_next((next)) : NULL)
66 /* currently processed loop */
67 static ir_loop *cur_loop;
69 /* Condition for performing copy_walk. */
70 typedef unsigned walker_condition(ir_node *);
72 /* Node and position of a predecessor */
73 typedef struct out_edge {
80 typedef struct inversion_node_info {
83 struct inversion_node_info *free_list_next; /* linked list to free all node_infos */
84 } inversion_node_info;
87 typedef struct node_info {
90 struct ir_node *free_list_next; /* linked list to free all node_infos */
93 /* Head of the list for freeing node_infos */
94 static ir_node *free_list;
96 /* A walker may start visiting the current loop with these nodes. */
97 static out_edge *cur_loop_outs;
99 /* Outs of the nodes head. */
100 static out_edge *cur_head_outs;
102 /* Information about the loop head */
103 static ir_node *loop_head = NULL;
104 static unsigned loop_head_valid = 1;
106 /* List of all inner loops, that are processed. */
107 static ir_loop **loops;
110 static ir_node *loop_inv_head = NULL;
112 static ir_node *loop_peeled_head = NULL;
114 /* Loop analysis informations */
115 typedef struct loop_info_t {
116 unsigned blocks; /* number of blocks in the loop */
117 unsigned calls; /* number of calls */
120 ir_node *new_loop_head;
123 /* Information about the current loop */
124 static loop_info_t loop_info;
126 /* Outs of the condition chain (loop inversion). */
127 static out_edge *cond_chain_entries;
129 /* Arity of the unrolled loop heads. */
130 static int unroll_arity;
132 static ir_phase *phase;
137 static unsigned head_inversion_node_count;
138 static unsigned inversion_head_node_limit;
139 static unsigned head_inversion_block_count;
141 /* As the preconditions are the same, these flags,
142 * set depending on which function called, are used to determine what to do. */
143 static unsigned enable_peeling;
144 static unsigned enable_inversion;
145 static unsigned enable_unrolling;
148 /* Creates node_info. A linked list keeps all node_infos to free them again. */
149 static node_info *new_node_info(ir_node *node)
151 node_info *l = XMALLOCZ(node_info);
152 l->free_list_next = free_list;
158 /* Returns linked node_info for given node. */
159 static node_info *get_node_info(ir_node *n)
161 return ((node_info *)get_irn_link(n));
164 /* Allocates a node_info struct for the given node */
165 static void alloc_node_info(ir_node *node)
168 state = new_node_info(node);
169 set_irn_link(node, (void *)state);
172 /* Frees node info and copy array for given node. Returns next node of free_list. */
173 static ir_node *free_info(ir_node *node)
175 node_info *info = get_node_info(node);
176 ir_node *next = info->free_list_next;
177 /*DB((dbg, LEVEL_1, "FREE %N\n", node));*/
178 DEL_ARR_F(info->copy);
180 set_irn_link(node, NULL);
184 /* Free all created node_infos, including the copy arrays. */
185 static void free_node_info(void)
187 ir_node *node = free_list;
189 node = free_info(node);
194 /* Free node_infos of blocks only, including the copy arrays. */
195 static void free_node_info_blocks(void)
197 ir_node *node = free_list;
198 ir_node *new_freelist = NULL;
201 if (is_Block(node)) {
202 next = free_info(node);
204 next = get_node_info(node)->free_list_next;
205 get_node_info(node)->free_list_next = new_freelist;
210 free_list = new_freelist;
213 /* Reset nodes link. For use with a walker. */
214 static void reset_link(ir_node *node, void *env)
217 set_irn_link(node, NULL);
220 static void set_inversion_copy(ir_node *n, ir_node *cp)
222 phase_set_irn_data(phase, n, cp);
225 /* Returns a nodes copy. */
226 static ir_node *get_copy(ir_node *n, int index)
228 ir_node *cp = ((node_info *)get_irn_link(n))->copy[index];
232 /* Returns a nodes copy if it exists, or else the node. */
233 static ir_node *get_inversion_copy(ir_node *n)
235 ir_node *cp = (ir_node *)phase_get_irn_data(phase, n);
239 /* Returns a nodes copy, or node if there is no node info linked. */
240 static ir_node *get_copy_safe(ir_node *node, int index)
242 node_info *info = (node_info *)get_irn_link(node);
244 return get_copy(node, index);
249 /* Assigns the given array to a nodes node_info */
250 static void set_copy_arr(ir_node *n, ir_node **arr)
252 ((node_info *)get_irn_link(n))->copy = arr;
255 /* Assigns a copy with given index to a node.
256 * Prerequirements are existing node_info and copy array. */
257 static void set_copy(ir_node *n, int index, ir_node *copy)
259 ((node_info *)get_irn_link(n))->copy[index] = copy;
262 /* Returns 0 if the node or block is not in cur_loop. */
263 static unsigned is_in_loop(ir_node *node)
265 return (get_irn_loop(get_block(node)) == cur_loop);
268 /* Returns if the given be is an alien edge.
269 * This is the case when the pred is not in the loop. */
270 static unsigned is_alien_edge(ir_node *n, int i)
272 return(!is_in_loop(get_irn_n(n, i)));
275 static unsigned is_own_backedge(ir_node *n, int pos)
277 return (is_backedge(n, pos) && is_in_loop(get_irn_n(n, pos)));
280 /* Resets block mark for given node. For use with walker */
281 static void reset_block_mark(ir_node *node, void * env)
286 set_Block_mark(node, 0);
289 static unsigned is_nodes_block_marked(ir_node* node)
292 return get_Block_mark(node);
294 return get_Block_mark(get_block(node));
297 /* Returns the number of blocks in a loop. */
298 static int get_loop_n_blocks(ir_loop *loop)
302 elements = get_loop_n_elements(loop);
304 for (e=0; e<elements; e++) {
305 loop_element elem = get_loop_element(loop, e);
306 if (is_ir_node(elem.kind) && is_Block(elem.node))
313 static int extend_irn(ir_node *n, ir_node *new)
317 int arity = get_irn_arity(n);
318 int new_arity = arity + 1;
320 NEW_ARR_A(ir_node *, ins, new_arity);
322 for(i = 0; i < arity; ++i) {
323 ins[i] = get_irn_n(n, i);
327 set_irn_in(n, new_arity, ins);
328 /* arity equals the position of the new node */
333 static void extend_ins_by_copy(ir_node *block, int pos)
337 assert(is_Block(block));
339 /* Extend block by copy of definition at pos */
340 new_in = get_inversion_copy(get_irn_n(block, pos));
341 extend_irn(block, new_in);
343 /* Extend block phis by copy of definition at pos */
344 for_each_phi(block, phi) {
347 pred = get_irn_n(phi, pos);
348 cp = get_inversion_copy(pred);
349 /* If the phis in is not in the condition chain (eg. a constant),
350 * there is no copy. */
356 DB((dbg, LEVEL_5, "Extend phi %N by %N\n", phi, new_in));
357 extend_irn(phi, new_in);
362 * Extends all blocks which succeed the loop with the copied loops outs.
363 * Also extends the blocks phis. Requires block phi list.
364 * Returns an updated list of loop outs of the original loop outs.
366 static out_edge *extend_ins(out_edge *outs, int copies)
371 ir_node *phi, *next_phi, *new_phi, *head, *new_block;
372 int old_arity, new_arity;
376 inc_irg_visited(current_ir_graph);
378 new_outs = NEW_ARR_F(out_edge, 0);
381 for (i = 0; i < ARR_LEN(outs); ++i) {
382 out_edge edge = outs[i];
383 ir_node *node = edge.node;
384 DB((dbg, LEVEL_5, "outs %N %d\n", node, edge.pos));
388 for (i = 0; i < ARR_LEN(outs); ++i) {
389 out_edge edge = outs[i];
390 ir_node *node = edge.node;
391 int in_c, positions_n;
394 if (!is_Block(node) && !is_Bad(node)) {
399 ir_node *p = get_nodes_block(node);
400 for(c = 0; c < get_irn_arity(p); ++c) {
401 if (is_in_loop(get_irn_n(p, c))) {
407 ARR_APP1(out_edge, new_outs, edge);
408 DB((dbg, LEVEL_5, "new out %N\n", node));
413 ARR_APP1(out_edge, new_outs, edge);
414 DB((dbg, LEVEL_5, "new out %N\n", node));
418 /* CONTINUE if node already processed. */
419 if (irn_visited(node))
425 positions = NEW_ARR_F(int, 0);
428 /* Search for all occurences of the current node, and save the in positions. */
429 for (p = 0; p < ARR_LEN(outs); ++p) {
430 ir_node *cur = outs[p].node;
431 int d_pos = outs[p].pos;
434 DB((dbg, LEVEL_5, "Loop successor %N with pred @%d in loop\n",
436 ARR_APP1(int, positions, d_pos);
437 mark_irn_visited(cur);
442 old_arity = get_irn_arity(node);
443 new_arity = old_arity + (positions_n * copies);
445 ins = NEW_ARR_F(ir_node*, new_arity);
446 bes = NEW_ARR_F(int, new_arity);
448 /* extend block ins */
449 for (in_c = 0; in_c < old_arity; ++in_c) {
450 ins[in_c] = get_irn_n(node, in_c);
451 bes[in_c] = is_backedge(node, in_c);
453 for (p = 0; p < positions_n; ++p) {
454 for (c = 0; c < copies; ++c) {
455 ir_node *cp = get_copy_safe(get_irn_n(node, positions[p]), c);
456 DB((dbg, LEVEL_5, "%N (new_arity %d) @%d = %N\n",
457 node, new_arity, in_c, cp));
459 bes[in_c] = is_backedge(node, positions[p]);
464 assert(new_arity == in_c);
466 head = get_Block_phis(node);
467 new_block = new_r_Block(get_irn_irg(node), new_arity, ins);
468 set_irn_loop(new_block, get_irn_loop(node));
470 for (in_c = 0; in_c < new_arity; ++in_c) {
472 set_backedge(new_block, in_c);
478 set_Block_phis(node, NULL);
480 phi_ins = NEW_ARR_F(ir_node *, new_arity);
481 phi_bes = NEW_ARR_F(int, new_arity);
483 for_each_phi_safe(head, phi, next_phi) {
484 for (in_c = 0; in_c < old_arity; ++in_c) {
485 phi_ins[in_c] = get_irn_n(phi, in_c);
486 phi_bes[in_c] = is_backedge(phi, in_c);
489 for (p = 0; p < positions_n; ++p) {
490 for(c = 0; c < copies; ++c) {
492 pred = get_irn_n(phi, positions[p]);
493 DB((dbg, LEVEL_5, "Phi %N pred @%d is %N\n",
494 phi, positions[p], pred));
495 cp = get_copy_safe(pred, c);
497 DB((dbg, LEVEL_5, "Phi %N in %d is %N\n",
499 phi_bes[in_c] = is_backedge(phi, positions[p]);
504 new_phi = new_r_Phi(new_block, new_arity, phi_ins, get_irn_mode(phi));
506 for (in_c = 0; in_c < new_arity; ++in_c) {
508 set_backedge(new_phi, in_c);
510 exchange(phi, new_phi);
511 DB((dbg, LEVEL_5, "exch Phi %N %N\n",phi, new_phi));
513 add_Block_phi(new_block, new_phi);
519 exchange(node, new_block);
520 set_Block_MacroBlock(new_block, new_block);
521 DB((dbg, LEVEL_5, "exch block %N %N\n", node, new_block));
523 DEL_ARR_F(positions);
529 * Finds loop head and loop_info.
531 static void get_loop_info(ir_node *node, void *env)
533 unsigned node_in_loop, pred_in_loop;
537 arity = get_irn_arity(node);
538 for (i = 0; i < arity; i++) {
539 ir_node *pred = get_irn_n(node, i);
541 pred_in_loop = is_in_loop(pred);
542 node_in_loop = is_in_loop(node);
544 /* collect some loop information */
550 /* Find the loops head/the blocks with cfpred outside of the loop */
551 if (is_Block(node) && node_in_loop && !pred_in_loop && loop_head_valid) {
552 ir_node *cfgpred = get_Block_cfgpred(node, i);
554 if (!is_in_loop(cfgpred)) {
555 DB((dbg, LEVEL_5, "potential head %+F because inloop and pred %+F not inloop\n",
557 /* another head? We do not touch this. */
558 if (loop_head && loop_head != node) {
570 static void correct_phis(ir_node *node, void *env)
574 if (is_Phi(node) && get_irn_arity(node) == 1) {
577 in[0] = get_irn_n(node, 0);
579 exch = new_rd_Phi(get_irn_dbg_info(node),
580 get_nodes_block(node), 1, in,
583 exchange(node, exch);
584 /* TODO make sure visited is copied */
591 /* Adds all nodes pointing into the loop to loop_entries and also finds the loops head */
592 static void get_loop_outs(ir_node *node, void *env)
594 unsigned node_in_loop, pred_in_loop;
598 arity = get_irn_arity(node);
599 for (i = 0; i < arity; ++i) {
600 ir_node *pred = get_irn_n(node, i);
602 pred_in_loop = is_in_loop(pred);
603 node_in_loop = is_in_loop(node);
605 if (pred_in_loop && !node_in_loop) {
609 ARR_APP1(out_edge, cur_loop_outs, entry);
610 if (is_Block(node)) {
612 loop_info.cf_out = entry;
614 DB((dbg, LEVEL_5, "loop outs %N\n", node));
619 static ir_node *ssa_second_def;
620 static ir_node *ssa_second_def_block;
623 * Walks the graph bottom up, searching for definitions and creates phis.
625 static ir_node *search_def_and_create_phis(ir_node *block, ir_mode *mode)
633 DB((dbg, LEVEL_5, "ssa search_def_and_create_phis: block %N\n", block));
635 /* Prevents creation of phi that would be bad anyway.
636 * Dead and bad blocks. */
637 if (get_irn_arity(block) < 1 || is_Bad(block)) {
638 DB((dbg, LEVEL_5, "ssa bad %N\n", block));
642 if (block == ssa_second_def_block) {
643 DB((dbg, LEVEL_5, "ssa found second definition: use second def %N\n", ssa_second_def));
644 return ssa_second_def;
647 /* already processed this block? */
648 if (irn_visited(block)) {
649 ir_node *value = get_irn_link(block);
650 DB((dbg, LEVEL_5, "ssa already visited: use linked %N\n", value));
654 irg = get_irn_irg(block);
655 assert(block != get_irg_start_block(irg));
657 /* a Block with only 1 predecessor needs no Phi */
658 n_cfgpreds = get_Block_n_cfgpreds(block);
659 if (n_cfgpreds == 1) {
660 ir_node *pred_block = get_Block_cfgpred_block(block, 0);
663 DB((dbg, LEVEL_5, "ssa 1 pred: walk pred %N\n", pred_block));
665 value = search_def_and_create_phis(pred_block, mode);
666 set_irn_link(block, value);
667 mark_irn_visited(block);
672 /* create a new Phi */
673 NEW_ARR_A(ir_node*, in, n_cfgpreds);
674 for (i = 0; i < n_cfgpreds; ++i)
675 in[i] = new_Unknown(mode);
677 phi = new_r_Phi(block, n_cfgpreds, in, mode);
679 /* Important: always keep block phi list up to date. */
680 add_Block_phi(block, phi);
682 DB((dbg, LEVEL_5, "ssa phi creation: link new phi %N to block %N\n", phi, block));
684 set_irn_link(block, phi);
685 mark_irn_visited(block);
687 /* set Phi predecessors */
688 for (i = 0; i < n_cfgpreds; ++i) {
690 ir_node *pred_block = get_Block_cfgpred_block(block, i);
691 assert(pred_block != NULL);
692 pred_val = search_def_and_create_phis(pred_block, mode);
693 assert(pred_val != NULL);
695 DB((dbg, LEVEL_5, "ssa phi pred:phi %N, pred %N\n", phi, pred_val));
696 set_irn_n(phi, i, pred_val);
703 * Given a set of values this function constructs SSA-form for the users of the
704 * first value (the users are determined through the out-edges of the value).
705 * Works without using the dominance tree.
707 static void construct_ssa(ir_node *orig_block, ir_node *orig_val,
708 ir_node *second_block, ir_node *second_val)
712 const ir_edge_t *edge;
713 const ir_edge_t *next;
715 assert(orig_block && orig_val && second_block && second_val &&
716 "no parameter of construct_ssa may be NULL");
718 /* no need to do anything */
719 if (orig_val == second_val)
722 irg = get_irn_irg(orig_val);
724 ir_reserve_resources(irg, IR_RESOURCE_IRN_VISITED);
725 inc_irg_visited(irg);
727 mode = get_irn_mode(orig_val);
728 set_irn_link(orig_block, orig_val);
729 mark_irn_visited(orig_block);
731 ssa_second_def_block = second_block;
732 ssa_second_def = second_val;
734 /* Only fix the users of the first, i.e. the original node */
735 foreach_out_edge_safe(orig_val, edge, next) {
736 ir_node *user = get_edge_src_irn(edge);
737 int j = get_edge_src_pos(edge);
738 ir_node *user_block = get_nodes_block(user);
745 DB((dbg, LEVEL_5, "original user %N\n", user));
748 ir_node *pred_block = get_Block_cfgpred_block(user_block, j);
749 newval = search_def_and_create_phis(pred_block, mode);
751 newval = search_def_and_create_phis(user_block, mode);
754 assert(!is_Bad(newval));
756 /* If we get a bad node the user keeps the original in.
757 * No second definition needed. */
758 if (newval != user && !is_Bad(newval))
759 set_irn_n(user, j, newval);
762 ir_free_resources(irg, IR_RESOURCE_IRN_VISITED);
766 /* Construct ssa for def and all of its copies.
767 * NOTE: Overwrites links of blocks.
769 static ir_node *construct_ssa_n(ir_node *def, ir_node *user, int copies)
773 irg = get_irn_irg(def);
777 ir_reserve_resources(irg, IR_RESOURCE_IRN_VISITED);
778 inc_irg_visited(current_ir_graph);
779 mode = get_irn_mode(def);
781 set_irn_link(get_nodes_block(def), def);
782 mark_irn_visited(get_nodes_block(def));
784 for (i = 0; i < copies; ++i) {
785 ir_node *cp = get_copy(def, i);
786 ir_node *block = get_nodes_block(cp);
787 set_irn_link(block, cp);
788 mark_irn_visited(block);
790 DB((dbg, LEVEL_5, "ssa mark cp %N of def %N usr %N\n", cp, def, user));
794 user_arity = get_irn_arity(user);
795 for (i = 0; i < user_arity; ++i) {
796 ir_node *user_block = get_nodes_block(user);
797 if (get_irn_n(user, i) != def)
800 DB((dbg, LEVEL_5, "ssa_n found edge of user %N\n", user));
803 ir_node *pred_block = get_Block_cfgpred_block(user_block, i);
804 newval = search_def_and_create_phis(pred_block, mode);
806 newval = search_def_and_create_phis(user_block, mode);
808 /*TODO bads should not happen! */
809 if (newval != user) {
810 DB((dbg, LEVEL_5, "=> ssa_n new in is %N\n", newval));
811 set_irn_n(user, i, newval);
812 /*if (is_Phi(newval) && get_nodes_block(newval)== user_block) {
813 ir_free_resources(irg, IR_RESOURCE_IRN_VISITED);
818 /* BREAK, as we found and handled the definition */
822 if (is_Bad(newval)) {
823 DB((dbg, LEVEL_5, "=> ssa_n new in is BAD %N in graph %N\n", newval, current_ir_graph));
824 add_End_keepalive(get_irg_end(current_ir_graph), newval);
825 dump_ir_block_graph(current_ir_graph, "-bad");
832 ir_free_resources(irg, IR_RESOURCE_IRN_VISITED);
836 /* Returns the number of backedges with or without alien bes. */
837 static int get_backedge_n(ir_node *loophead, unsigned with_alien)
841 int arity = get_irn_arity(loophead);
842 for (i = 0; i < arity; ++i) {
843 ir_node *pred = get_irn_n(loophead, i);
844 if (is_backedge(loophead, i) && (with_alien || is_in_loop(pred)))
851 * Returns a raw copy of the given node.
852 * Attributes are kept/set according to the needs of loop inversion.
854 static ir_node *copy_node_inversion(ir_node *node)
859 cp = exact_copy(node);
860 arity = get_irn_arity(node);
862 for (i = 0; i < arity; ++i) {
863 if (is_backedge(node, i))
867 set_inversion_copy(node, cp);
868 DB((dbg, LEVEL_5, "set copy of %N to cp %N \n", node, cp));
871 /* We may not keep the old macroblock. */
872 set_Block_MacroBlock(cp, cp);
873 set_Block_mark(cp, 0);
880 * This walker copies all walked nodes.
881 * If the walk_condition is true for a node, it is copied.
882 * All nodes node_info copy attributes have to be NULL prior to every walk.
883 * TODO temporary nodes not necessary anymore.
885 static void copy_walk(ir_node *node, walker_condition *walk_condition,
892 ir_graph *irg = current_ir_graph;
895 * break condition and cycle resolver, creating temporary node copies
897 if (get_irn_visited(node) >= get_irg_visited(irg)) {
898 /* Here we rely on nodestate's copy being initialized with NULL */
899 DB((dbg, LEVEL_5, "copy_walk: We have already visited %N\n", node));
900 if (get_inversion_copy(node) == NULL) {
901 cp = copy_node_inversion(node);
902 DB((dbg, LEVEL_5, "The TEMP copy of %N is created %N\n", node, cp));
908 mark_irn_visited(node);
910 if (!is_Block(node)) {
911 ir_node *pred = get_nodes_block(node);
912 if (walk_condition(pred))
913 DB((dbg, LEVEL_5, "walk block %N\n", pred));
914 copy_walk(pred, walk_condition, set_loop);
917 arity = get_irn_arity(node);
919 NEW_ARR_A(ir_node *, cpin, arity);
921 for (i = 0; i < arity; ++i) {
922 ir_node *pred = get_irn_n(node, i);
924 if (walk_condition(pred)) {
925 DB((dbg, LEVEL_5, "walk node %N\n", pred));
926 copy_walk(pred, walk_condition, set_loop);
927 cpin[i] = get_inversion_copy(pred);
928 DB((dbg, LEVEL_5, "copy of %N gets new in %N which is copy of %N\n",
929 node, get_inversion_copy(pred), pred));
935 /* copy node / finalize temp node */
936 if (get_inversion_copy(node) == NULL) {
937 /* No temporary copy existent */
938 cp = copy_node_inversion(node);
939 DB((dbg, LEVEL_5, "The FINAL copy of %N is CREATED %N\n", node, cp));
941 /* temporary copy is existent but without correct ins */
942 cp = get_inversion_copy(node);
943 DB((dbg, LEVEL_5, "The FINAL copy of %N is EXISTENT %N\n", node, cp));
946 if (!is_Block(node)) {
947 ir_node *cpblock = get_inversion_copy(get_nodes_block(node));
949 set_nodes_block(cp, cpblock );
951 add_Block_phi(cpblock, cp);
954 set_irn_loop(cp, set_loop);
956 /* Keeps phi list of temporary node. */
957 set_irn_in(cp, ARR_LEN(cpin), cpin);
960 /* Creates a new node from a given one, but with custom ins. */
961 static ir_node *copy_node_changed(
962 ir_node *node, ir_node *block, int arity, ir_node **ins, ir_loop *loop)
965 DB((dbg, LEVEL_5, "New node from %N in %N block %N arity %d\n",
966 node, get_irn_irg(node), block, arity));
969 cp = new_rd_Phi(get_irn_dbg_info(node),
977 /* We want to keep phi nodes with arity 1, as it makes rewiring a lot easier. */
978 cp = new_ir_node(get_irn_dbg_info(node),
987 add_Block_phi(block, cp);
989 if (get_irn_op(node) == get_irn_op(cp))
990 copy_node_attr(get_irn_irg(node), node, cp);
992 /*new_backedge_info(cp); function removed */
994 set_Block_MacroBlock(cp, cp);
996 set_irn_loop(cp, loop);
998 /* Blocks are copied first, so that future phi nodes populate the phi list. */
1000 set_Block_phis(cp, NULL);
1002 DB((dbg, LEVEL_5, "New node %N in %N block %N arity %d\n",
1003 cp, get_irn_irg(cp), block, arity));
1007 /* To be exchangeable, the unknown needs to have proper graph set. */
1008 static ir_node *new_node_unknown(ir_node *block, ir_mode *mode)
1010 ir_node *new = new_ir_node(NULL, get_irn_irg(block), block, op_Unknown, mode, 0, NULL);
1015 * Walker, to copy alle nodes that meet the walk_condition.
1016 * Blocks are copied first, which is important for the creation of the phi lists.
1019 static ir_node *copy_walk_n(ir_node *node, walker_condition *walk_condition, int copy_index, int alloc_cp_arr)
1023 unsigned headblock = 0;
1024 ir_node *block, *cp, *new_cp;
1025 ir_node **ins = NULL;
1027 if (irn_visited(node)) {
1028 cp = get_copy(node, copy_index);
1030 DB((dbg, LEVEL_5, "Visited %N\n", node));
1033 DB((dbg, LEVEL_5, "Visited. Return copy %N\n", cp));
1037 if (!is_Block(node)) {
1038 ir_node *unknown = new_node_unknown(
1039 get_nodes_block(node),
1040 get_irn_mode(node));
1041 DB((dbg, LEVEL_5, " #### Create Unknown %N for %N\n", unknown, node));
1042 set_copy(node, copy_index, unknown);
1046 DB((dbg, LEVEL_5, "Walk %N\n", node));
1047 mark_irn_visited(node);
1049 /* Allocate array of copies */
1050 if (alloc_cp_arr > 0) {
1051 ir_node **arr = NEW_ARR_F(ir_node *, alloc_cp_arr);
1052 alloc_node_info(node);
1053 for (i = 0; i < alloc_cp_arr; ++i)
1055 set_copy_arr(node, arr);
1060 if (!is_Block(node)) {
1061 ir_node *orig_block = get_nodes_block(node);
1062 DB((dbg, LEVEL_5, "current node: %N about to walk block %N\n", node, orig_block));
1063 /* Check walk_condition for blocks not necessary */
1065 if (orig_block == loop_head && is_Phi(node))
1068 block = copy_walk_n(orig_block, walk_condition, copy_index, alloc_cp_arr);
1069 DB((dbg, LEVEL_5, "current node: %N block %N blocks copy %N\n",
1070 node, orig_block, block));
1072 if (node == loop_head)
1077 arity = get_irn_arity(node);
1080 /* Headblock and headblock-phi case */
1082 DB((dbg, LEVEL_5, "Headblock/Phi from headblock\n"));
1083 NEW_ARR_A(ir_node *, ins, unroll_arity);
1085 for (i = 0; i < arity; ++i) {
1087 ir_node *pred = get_irn_n(node, i);
1088 if (is_backedge(loop_head, i)) {
1089 if (walk_condition(pred)) {
1090 ret = copy_walk_n(pred, walk_condition, copy_index, alloc_cp_arr);
1091 DB((dbg, LEVEL_5, "in %d = %N walked\n", i, ret));
1094 DB((dbg, LEVEL_5, "in %d = %N not walked\n", i, ret));
1099 arity = unroll_arity;
1102 NEW_ARR_A(ir_node *, ins, arity);
1104 for (i = 0; i < arity; ++i) {
1106 ir_node *pred = get_irn_n(node, i);
1107 if (walk_condition(pred)) {
1108 ret = copy_walk_n(pred, walk_condition, copy_index, alloc_cp_arr);
1109 DB((dbg, LEVEL_5, "in %d = %N walked\n", i, ret));
1112 DB((dbg, LEVEL_5, "in %d = %N not walked\n", i, ret));
1118 /* NOW, after the walk, we may check if the blocks copy is already present. */
1119 cp = get_copy(node, copy_index);
1120 if (cp != NULL && !is_Unknown(cp)) {
1121 DB((dbg, LEVEL_5, "Must be Block %N copy is already present\n", node, cp));
1122 assert(is_Block(cp) &&
1123 "sanity: The copy should have been a block.");
1127 new_cp = copy_node_changed(node, block, arity, ins, cur_loop);
1128 set_copy(node, copy_index, new_cp);
1130 if (cp != NULL && is_Unknown(cp)) {
1131 DB((dbg, LEVEL_5, "Found unknown %N in %N now exchanged by %N in %N\n",
1132 cp, get_irn_irg(cp), new_cp, get_irn_irg(new_cp)));
1133 exchange(cp, new_cp);
1139 * Populates head_entries with (node, pred_pos) tuple
1140 * whereas the node's pred at pred_pos is in the head but not the node itself.
1141 * Head and condition chain blocks must be marked.
1143 static void get_head_outs(ir_node *node, void *env)
1146 int arity = get_irn_arity(node);
1149 DB((dbg, LEVEL_5, "get head entries %N \n", node));
1151 for (i = 0; i < arity; ++i) {
1152 /* node is not in the head, but the predecessor is.
1153 * (head or loop chain nodes are marked) */
1155 DB((dbg, LEVEL_5, "node %N marked %d (0) pred %d marked %d (1) \n",
1156 node, is_nodes_block_marked(node), i,
1157 is_nodes_block_marked(get_irn_n(node, i))));
1159 if (!is_nodes_block_marked(node) && is_nodes_block_marked(get_irn_n(node, i))) {
1163 entry.pred = get_irn_n(node, i);
1164 ARR_APP1(out_edge, cur_head_outs, entry);
1166 if (is_in_loop(node) && is_Block(node)) {
1167 loop_info.new_loop_head = node;
1168 DB((dbg, LEVEL_5, " SET NEW LOOPHEAD to %N\n", node));
1173 if (is_Phi(node) && get_nodes_block(node) == loop_head)
1175 unsigned needs_ssa = 1;
1176 unsigned loop_invar = 1;
1177 for (i = 0; i < arity; ++i) {
1178 if (is_own_backedge(node, i)) {
1179 if (get_irn_n(node, i) != node)
1182 if (is_nodes_block_marked(get_irn_n(node, i)))
1186 /* Construct ssa for all uses of this phi, as there will be a copy of it. */
1187 if(needs_ssa && !loop_invar) {
1188 const ir_edge_t *edge;
1189 DB((dbg, LEVEL_5, "NEEDS SSA users of phi %N\n", node));
1190 foreach_out_edge(node, edge) {
1192 entry.node = get_edge_src_irn(edge);
1193 DB((dbg, LEVEL_5, "NEEDS SSA user %N\n", entry.node));
1194 entry.pos = get_edge_src_pos(edge);
1195 ARR_APP1(out_edge, cur_head_outs, entry);
1203 * Find condition chains, and add them to be inverted
1204 * A block belongs to the chain if a condition branches out of the loop.
1205 * Returns 1 if the given block belongs to the condition chain.
1207 static unsigned find_condition_chains(ir_node *block)
1209 const ir_edge_t *edge;
1211 unsigned has_backedge = 0;
1214 DB((dbg, LEVEL_5, "condition_chains for block %N\n", block));
1216 /* Collect all outs, including keeps. */
1217 foreach_out_edge_kind(block, edge, EDGE_KIND_NORMAL) {
1221 /* Leave at least one block as body. */
1222 if (head_inversion_node_count + nodes_n > inversion_head_node_limit
1223 || head_inversion_block_count + 1 == loop_info.blocks) {
1224 set_Block_mark(block, 0);
1229 /* First: check our successors,
1230 * and add all of them, which are outside of the loop, to the list */
1231 foreach_block_succ(block, edge) {
1232 ir_node *src = get_edge_src_irn( edge );
1233 int pos = get_edge_src_pos( edge );
1235 if (!is_in_loop(src)) {
1241 ARR_APP1(out_edge, cond_chain_entries, entry);
1242 mark_irn_visited(src);
1243 } else if (is_backedge(src, pos)) {
1244 /* Inverting blocks with backedge outs leads to a cf edge
1245 * from the inverted head, into the inverted head (skipping the body).
1246 * As the body becomes the new loop head,
1247 * this would introduce another loop in the existing loop.
1248 * This loop inversion cannot cope with this complex case. */
1254 ARR_APP1(out_edge, cond_chain_entries, entry);
1255 mark_irn_visited(src);
1259 if (mark == 0 || has_backedge) {
1260 set_Block_mark(block, 0);
1263 set_Block_mark(block, 1);
1264 ++head_inversion_block_count;
1265 DB((dbg, LEVEL_5, "block %N is part of condition chain\n", block));
1266 head_inversion_node_count += nodes_n;
1269 /* Second: walk all successors,
1270 * and add them to the list if they are not part of the chain */
1271 foreach_block_succ(block, edge) {
1273 ir_node *src = get_edge_src_irn( edge );
1274 int pos = get_edge_src_pos( edge );
1277 if (!is_in_loop( src ) || irn_visited(src)) {
1281 mark_irn_visited(src);
1282 DB((dbg, LEVEL_5, "condition chain walk %N\n", src));
1283 inchain = find_condition_chains(src);
1285 /* if successor is not part of chain we need to collect its outs */
1290 ARR_APP1(out_edge, cond_chain_entries, entry);
1297 * Rewires the copied condition chain. Removes backedges.
1298 * as this condition chain is prior to the loop.
1299 * Copy of loop_head must have phi list and old (unfixed) backedge info of the loop head.
1300 * (loop_head is already fixed, we cannot rely on it.)
1302 static void fix_copy_inversion(void)
1307 ir_node *phi, *next;
1308 ir_node *head_cp = get_inversion_copy(loop_head);
1309 int arity = get_irn_arity(head_cp);
1310 int backedges = get_backedge_n(head_cp, 0);
1311 int new_arity = arity - backedges;
1315 NEW_ARR_A(ir_node *, ins, new_arity);
1318 /* Remove block backedges */
1319 for(i = 0; i < arity; ++i) {
1320 if (!is_backedge(head_cp, i))
1321 ins[pos++] = get_irn_n(head_cp, i);
1324 new_head = new_Block(new_arity, ins);
1326 phis = NEW_ARR_F(ir_node *, 0);
1328 for_each_phi_safe(get_Block_phis(head_cp), phi, next) {
1330 NEW_ARR_A(ir_node *, ins, new_arity);
1332 for(i = 0; i < arity; ++i) {
1333 if (!is_backedge(head_cp, i))
1334 ins[pos++] = get_irn_n(phi, i);
1336 new_phi = new_rd_Phi(get_irn_dbg_info(phi),
1337 new_head, new_arity, ins,
1339 ARR_APP1(ir_node *, phis, new_phi);
1343 for_each_phi_safe(get_Block_phis(head_cp), phi, next) {
1344 exchange(phi, phis[pos++]);
1347 exchange(head_cp, new_head);
1352 /* Takes a phi from the inverted loop head and its pred
1353 * which is assigned in the condition chain.
1355 * Returns new phi, created in the new loop head.
1356 * If there is an assignment in the condition chain
1357 * with a user also in the condition chain,
1358 * the dominance frontier is in the new loop head.
1359 * The dataflow loop is completely in the condition chain.
1363 * Phi_cp | | copied condition chain
1367 * Phi* | | new loop head
1369 * Phi | | original, inverted condition chain
1374 static ir_node *fix_inner_cc_definitions(ir_node *phi, ir_node *pred)
1378 ir_node **head_phi_ins;
1379 ir_node *new_loop_head = loop_info.new_loop_head;
1380 int new_loop_head_arity = get_irn_arity(new_loop_head);
1381 DB((dbg, LEVEL_5, "NEW HEAD %N arity %d\n", new_loop_head, new_loop_head_arity));
1383 NEW_ARR_A(ir_node *, head_phi_ins, new_loop_head_arity);
1385 for(i = 0; i < new_loop_head_arity; ++i) {
1386 /* Rely on correct backedge info for the new loop head
1387 * is set by extend_ins_by_copy. */
1389 DB((dbg, LEVEL_5, "CHECK mark of pred of %N @%d %N\n",
1390 new_loop_head, i, get_irn_n(new_loop_head, i)));
1392 if(is_nodes_block_marked(get_irn_n(new_loop_head, i))) {
1393 head_phi_ins[i] = pred;
1394 DB((dbg, LEVEL_5, "NEW HEAD %N pred %d %N is in cc\n", new_loop_head, i, pred));
1396 ir_node *cp = get_inversion_copy(pred);
1397 /* As pred is in the condition chain there has to be a copy. */
1399 head_phi_ins[i] = cp;
1401 DB((dbg, LEVEL_5, "NEW HEAD %N pred %d %N is in not in cc so get copy %N\n",
1402 new_loop_head, i, pred, head_phi_ins[i] ));
1404 DB((dbg, LEVEL_5, "NEW HEAD %N in %N %N\n",
1405 new_loop_head, head_phi_ins[i], current_ir_graph));
1408 head_phi= new_r_Phi(new_loop_head,
1409 new_loop_head_arity,
1413 add_Block_phi(new_loop_head, head_phi);
1415 DB((dbg, LEVEL_5, "fix inner cc definitions: new head %N has new phi %N from cc phi %N @%N\n",
1416 new_loop_head, head_phi, phi, pred));
1422 /* Puts the original condition chain at the end of the loop,
1423 * subsequently to the body.
1424 * Relies on block phi list and correct backedges.
1426 static void fix_head_inversion(void)
1430 ir_node *phi, *next;
1432 int arity = get_irn_arity(loop_head);
1433 int backedges = get_backedge_n(loop_head, 0);
1434 int new_arity = backedges;
1438 NEW_ARR_A(ir_node *, ins, new_arity);
1441 /* Keep only backedges */
1442 for(i = 0; i < arity; ++i) {
1443 if (is_own_backedge(loop_head, i))
1444 ins[pos++] = get_irn_n(loop_head, i);
1447 new_head = new_Block(new_arity, ins);
1449 phis = NEW_ARR_F(ir_node *, 0);
1451 for_each_phi(loop_head, phi) {
1454 NEW_ARR_A(ir_node *, ins, new_arity);
1457 for (i = 0; i < arity; ++i) {
1458 ir_node *pred = get_irn_n(phi, i);
1460 if (is_own_backedge(loop_head, i)) {
1461 /* If assignment is in the condition chain,
1462 * we need to create a phi in the new loop head.
1463 * This can only happen for df, not cf. See find_condition_chains. */
1464 if (is_nodes_block_marked(pred)) {
1465 ins[pos++] = fix_inner_cc_definitions(phi, pred);
1473 new_phi = new_rd_Phi(get_irn_dbg_info(phi),
1474 new_head, new_arity, ins,
1477 ARR_APP1(ir_node *, phis, new_phi);
1479 DB((dbg, LEVEL_5, "fix inverted head should exch %N by %N\n", phi, new_phi));
1483 for_each_phi_safe(get_Block_phis(loop_head), phi, next) {
1484 DB((dbg, LEVEL_5, "fix inverted exch phi %N by %N\n", phi, phis[pos]));
1485 if (phis[pos] != phi)
1486 exchange(phi, phis[pos++]);
1491 DB((dbg, LEVEL_5, "fix inverted head exch head block %N by %N\n", loop_head, new_head));
1492 exchange(loop_head, new_head);
1495 /* Does the loop inversion. */
1496 static void inversion_walk(out_edge *head_entries)
1501 * The order of rewiring bottom-up is crucial.
1502 * Any change of the order leads to lost information that would be needed later.
1505 ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED);
1507 /* 1. clone condition chain */
1508 inc_irg_visited(current_ir_graph);
1510 for (i = 0; i < ARR_LEN(head_entries); ++i) {
1511 out_edge entry = head_entries[i];
1512 ir_node *pred = get_irn_n(entry.node, entry.pos);
1514 DB((dbg, LEVEL_5, "\nInit walk block %N\n", pred));
1516 copy_walk(pred, is_nodes_block_marked, cur_loop);
1519 ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED);
1521 /* 2. Extends the head control flow successors ins
1522 * with the definitions of the copied head node. */
1523 for (i = 0; i < ARR_LEN(head_entries); ++i) {
1524 out_edge head_out = head_entries[i];
1526 if (is_Block(head_out.node))
1527 extend_ins_by_copy(head_out.node, head_out.pos);
1530 /* 3. construct_ssa for users of definitions in the condition chain,
1531 * as there is now a second definition. */
1532 for (i = 0; i < ARR_LEN(head_entries); ++i) {
1533 out_edge head_out = head_entries[i];
1535 /* Ignore keepalives */
1536 if (is_End(head_out.node))
1539 /*if (!is_Block(head_out.node))
1540 assert(is_nodes_block_marked(head_out.pred) && "damn");*/
1542 /* Construct ssa for definitions in the condition chain. */
1543 if (!is_Block(head_out.node)) {
1544 ir_node *pred, *cppred, *block, *cpblock;
1546 /* May not use get_irn_n(head_out.node, head_out.pos)
1547 * as it can be changed by construct_ssa. */
1548 pred = head_out.pred;
1549 cppred = get_inversion_copy(pred);
1550 block = get_nodes_block(pred);
1551 cpblock = get_nodes_block(cppred);
1552 construct_ssa(block, pred, cpblock, cppred);
1556 /* 4. Remove the ins which are no backedges from the original condition chain
1557 * as the cc is now subsequently to the body. */
1558 fix_head_inversion();
1560 /* 5. Remove the backedges of the copied condition chain,
1561 * because it is going to be the new 'head' in advance to the loop */
1562 fix_copy_inversion();
1565 /* Analyse loop, if inversion is possible or reasonable. Then do the inversion. */
1566 static void loop_inversion(void)
1568 unsigned do_inversion = 1;
1570 inversion_head_node_limit = INT_MAX;
1571 ir_reserve_resources(current_ir_graph, IR_RESOURCE_BLOCK_MARK);
1573 /* Reset block marks.
1574 * We use block marks to flag blocks of the original condition chain. */
1575 irg_walk_graph(current_ir_graph, reset_block_mark, NULL, NULL);
1577 loop_info.blocks = get_loop_n_blocks(cur_loop);
1578 cond_chain_entries = NEW_ARR_F(out_edge, 0);
1580 head_inversion_node_count = 0;
1581 head_inversion_block_count = 0;
1583 set_Block_mark(loop_head, 1);
1584 mark_irn_visited(loop_head);
1586 /* Use phase to keep copy of nodes from the condition chain. */
1587 phase = new_phase(current_ir_graph, phase_irn_init_default);
1589 inc_irg_visited(current_ir_graph);
1591 /* Search for condition chains. */
1592 find_condition_chains(loop_head);
1594 DB((dbg, LEVEL_3, "Loop contains %d blocks.\n", loop_info.blocks));
1596 if (loop_info.blocks < 2) {
1599 "Loop contains %d (less than 2) blocks => No Inversion done.\n",
1603 /* We also catch endless loops here,
1604 * because they do not have a condition chain. */
1605 if (head_inversion_block_count < 1) {
1608 "Loop contains %d (less than 1) invertible blocks => No Inversion done.\n",
1609 head_inversion_block_count));
1613 cur_head_outs = NEW_ARR_F(out_edge, 0);
1615 /* Get all edges pointing into the condition chain. */
1616 irg_walk_graph(current_ir_graph, get_head_outs, NULL, NULL);
1618 /* Do the inversion */
1619 inversion_walk(cur_head_outs);
1621 DEL_ARR_F(cur_head_outs);
1622 /* Duplicated blocks changed doms */
1623 set_irg_doms_inconsistent(current_ir_graph);
1624 /* Loop content changed */
1625 set_irg_loopinfo_inconsistent(current_ir_graph);
1626 /* TODO are they? Depends on set_irn_in and set_irn_n exchange and new_node. */
1627 set_irg_outs_inconsistent(current_ir_graph);
1632 DEL_ARR_F(cond_chain_entries);
1633 ir_free_resources(current_ir_graph, IR_RESOURCE_BLOCK_MARK);
1637 * Rewire floating copies of the current loop.
1639 static void place_copies(int copies)
1641 ir_node *loophead = loop_head;
1642 int headarity = get_irn_arity(loophead);
1647 /* Append the copies to the existing loop. */
1648 for (i = 0; i < headarity; ++i) {
1649 /* Build unrolled loop top down */
1650 if (is_backedge(loophead, i) && !is_alien_edge(loophead, i)) {
1651 for (c = 0; c < copies; ++c) {
1652 ir_node *upper_head;
1653 ir_node *lower_head = get_copy(loophead, c);
1654 ir_node *upper_pred;
1657 upper_head = loophead;
1658 upper_pred = get_irn_n(loophead, i);
1660 upper_head = get_copy(loophead, c - 1);
1661 upper_pred = get_copy_safe(get_irn_n(loophead, i), c - 1);
1663 set_irn_n(lower_head, pos, upper_pred);
1664 for_each_phi(loophead, phi) {
1665 ir_node *lower_phi, *upper_phi, *upper_pred_phi;
1666 lower_phi = get_copy(phi, c);
1669 upper_pred_phi = get_irn_n(phi, i);
1671 upper_phi = get_copy(phi, c - 1);
1672 upper_pred_phi = get_copy_safe(get_irn_n(phi, i), c - 1);
1675 DB((dbg, LEVEL_5, "Rewire %N @%d to %N\n",
1676 lower_phi, pos, upper_pred_phi));
1678 /* Relying on phi nodes with 1 in to be present. */
1679 assert(is_Phi(lower_phi));
1681 /* Do not fix phis here. Chained phis will cause a defective copy array. */
1683 if (get_irn_arity(lower_phi) == 1) {
1684 ir_node *single_in[1];
1686 single_in[0] = upper_pred_phi;
1688 cp = new_rd_Phi(get_irn_dbg_info(lower_phi),
1689 get_nodes_block(lower_phi), 1, single_in,
1690 get_irn_mode(lower_phi));
1692 set_copy(phi, c, cp);
1693 exchange(lower_phi, cp);
1696 set_irn_n(lower_phi, pos, upper_pred_phi);
1701 /* Fix the topmost loop heads backedges. */
1702 set_irn_n(loophead, i, get_copy(get_irn_n(loophead, i), copies - 1));
1703 for_each_phi(loophead, phi) {
1704 ir_node *last_cp = get_copy_safe(get_irn_n(phi, i), copies - 1);
1705 set_irn_n(phi, i, last_cp);
1711 /* Copies the cur_loop */
1712 static void copy_loop(out_edge *cur_loop_outs, int copies)
1715 unroll_arity = get_backedge_n(loop_head, 0);
1717 ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED);
1720 for (c = 0; c < copies; ++c) {
1722 inc_irg_visited(current_ir_graph);
1724 DB((dbg, LEVEL_5, " ### Copy_loop copy n: %d ###\n", c));
1725 for (i = 0; i < ARR_LEN(cur_loop_outs); ++i) {
1726 out_edge entry = cur_loop_outs[i];
1727 ir_node *pred = get_irn_n(entry.node, entry.pos);
1729 copy_walk_n(pred, is_in_loop, c, c ? 0 : copies);
1733 ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED);
1737 static void create_duffs_device(int unroll_number)
1739 (void)unroll_number;
1744 static unsigned is_loop_invariant_val(ir_node *node)
1748 if (! is_in_loop(node))
1751 /* TODO all cases? and how to use the dom tree for that. */
1754 for (i = 0; i < get_irn_arity(node); ++i) {
1755 if (is_own_backedge(get_nodes_block(node), i) && get_irn_n(node, i) != node)
1763 static ir_node *is_loop_iteration(ir_node *node)
1766 ir_node *found_add = NULL;
1768 if (!is_in_loop(node))
1774 /*TODO all cases? and how do i use the dom tree for that.*/
1777 for (i = 0; i < get_irn_arity(node); ++i) {
1779 if (! is_own_backedge(get_nodes_block(node), i))
1781 new_found = get_irn_n(node,i);
1783 if (found_add && found_add != new_found)
1786 found_add = new_found;
1793 static unsigned get_unroll_decision(out_edge *cur_loop_outs)
1796 ir_node *pred0, *pred1, *projx, *cond, *projres, *loop_condition;
1797 ir_node *iteration, *add, *iteration_phi, *end_val, *step, *add_in0, *add_in1;
1799 (void) cur_loop_outs;
1803 /* There is more than 1 condition for the loop exit. We cannot cope this with duffs device. */
1804 if (loop_info.cf_outs != 1)
1807 /*TODO test for impact on performance */
1808 if (loop_info.calls > 0)
1812 projx = get_irn_n(loop_info.cf_out.node, loop_info.cf_out.pos);
1813 cond = get_irn_n(projx, 0);
1814 projres = get_irn_n(cond, 0);
1815 loop_condition = get_irn_n(projres, 0);
1817 DB((dbg, LEVEL_2, " loop condition %N in graph %N\n", loop_condition, current_ir_graph));
1819 /* One in of the loop condition needs to be loop invariant.
1820 * The other in is assigned by an add.
1821 * The add uses a loop invariant value
1822 * and another value that depends on a loop invariant start value and the add itself.
1835 /* Find the loop invariant in */
1836 /* Assumption that condition is cmp or mod.
1838 pred0 = get_irn_n(loop_condition, 0);
1839 pred1 = get_irn_n(loop_condition, 1);
1841 if (is_loop_invariant_val(pred0)) {
1847 if (is_loop_invariant_val(pred1)) {
1848 if (end_val != NULL)
1849 /* RETURN. loop condition does not change during the loop. */
1856 DB((dbg, LEVEL_2, " end_val %N\n", end_val));
1859 /* Check for constraint of remaining in. */
1860 add = is_loop_iteration(iteration);
1864 DB((dbg, LEVEL_2, " add %N\n", add));
1866 add_in0 = get_irn_n(add, 0);
1867 add_in1 = get_irn_n(add, 1);
1870 iteration_phi = NULL;
1872 if (is_loop_invariant_val(add_in0)) {
1874 iteration_phi = add_in1;
1877 DB((dbg, LEVEL_2, "seems step %N iter_phi %N\n", step, iteration_phi));
1879 if (is_loop_invariant_val(add_in1)) {
1883 iteration_phi = add_in0;
1886 DB((dbg, LEVEL_2, " step %N\n", step));
1888 if (! is_Phi(iteration_phi)) {
1889 if (is_Phi(iteration))
1890 iteration_phi = iteration;
1895 DB((dbg, LEVEL_2, " #### COUNTING LOOP in graph %N step %N end %N phi %N\n",
1896 current_ir_graph, step, end_val, iteration_phi));
1901 mode = get_irn_mode(add);
1903 block = new_Block(0, NULL);
1904 /*set_irg_current_Block(current_ir_graph, block);*/
1907 unroll_mod_const = new_Const(unroll_number);
1908 module = new_d_Mod(block, new_NoMem(), end_val, step, mode, op_pin_state_pinned);
1920 static void unroll_loop(void)
1923 unsigned unroll_number;
1925 cur_loop_outs = NEW_ARR_F(out_edge, 0);
1928 irg_walk_graph(current_ir_graph, get_loop_outs, NULL, NULL);
1930 /*unroll_number = get_unroll_decision(cur_loop_outs);*/
1933 /*dump_ir_block_graph(current_ir_graph, "-pre");*/
1935 if (unroll_number) {
1936 create_duffs_device(unroll_number);
1938 /* Copies the loop and allocs node_info */
1939 copy_loop(cur_loop_outs, unroll_number - 1);
1940 /*dump_ir_block_graph(current_ir_graph, "-cp");*/
1942 /* Line up the floating copies. */
1943 place_copies(unroll_number - 1);
1944 /*dump_ir_block_graph(current_ir_graph, "-ord");*/
1946 /* Extend loop successors */
1947 ssa_outs = extend_ins(cur_loop_outs, unroll_number - 1);
1948 /*dump_ir_block_graph(current_ir_graph, "-fixed")*/ ;
1950 /* Links are going to be overwritten */
1951 free_node_info_blocks();
1953 /* Generate phis for all loop outs */
1954 for (i = 0; i < ARR_LEN(ssa_outs); ++i) {
1955 out_edge entry = ssa_outs[i];
1956 ir_node *node = entry.node;
1957 ir_node *pred = get_irn_n(entry.node, entry.pos);
1959 DB((dbg, LEVEL_5, "Do? construct_ssa_n def %N node %N pos %d\n",
1960 pred, node, entry.pos));
1962 if (!is_End(node)) {
1963 DB((dbg, LEVEL_5, "construct_ssa_n def %N node %N pos %d\n",
1964 pred, node, entry.pos));
1966 construct_ssa_n(pred, node, unroll_number - 1);
1968 construct_ssa_n(pred, new_phi, unroll_number - 1);*/
1971 /*dump_ir_block_graph(current_ir_graph, "-ssa-done");*/
1976 /*irg_walk_graph(current_ir_graph, correct_phis, NULL, NULL);*/
1978 DEL_ARR_F(ssa_outs);
1980 set_irg_doms_inconsistent(current_ir_graph);
1981 set_irg_loopinfo_inconsistent(current_ir_graph);
1982 set_irg_outs_inconsistent(current_ir_graph);
1984 DEL_ARR_F(cur_loop_outs);
1987 /* Initialization and */
1988 static void init_analyze(ir_loop *loop)
1990 /* Init new for every loop */
1994 loop_head_valid = 1;
1995 loop_inv_head = NULL;
1996 loop_peeled_head = NULL;
1998 loop_info.calls = 0;
1999 loop_info.blocks = 0;
2000 loop_info.cf_outs = 0;
2002 DB((dbg, LEVEL_2, " >>>> current loop includes node %N <<<\n",
2003 get_loop_node(loop, 0)));
2005 irg_walk_graph(current_ir_graph, get_loop_info, NULL, NULL);
2007 /* RETURN if there is no valid head */
2008 if (!loop_head || !loop_head_valid) {
2009 DB((dbg, LEVEL_2, "No valid loop head. Nothing done.\n"));
2013 DB((dbg, LEVEL_2, "Loophead: %N\n", loop_head));
2015 if (enable_inversion)
2018 if (enable_unrolling)
2021 DB((dbg, LEVEL_2, " <<<< end of loop with node %N >>>>\n",
2022 get_loop_node(loop, 0)));
2025 /* Find most inner loops and send them to analyze_loop */
2026 static void find_most_inner_loop(ir_loop *loop)
2028 /* descend into sons */
2029 int sons = get_loop_n_sons(loop);
2032 ARR_APP1(ir_loop *, loops, loop);
2035 for (s=0; s<sons; s++) {
2036 find_most_inner_loop(get_loop_son(loop, s));
2042 * Assure preconditions are met and go through all loops.
2044 void loop_optimization(ir_graph *irg)
2051 set_current_ir_graph(irg);
2054 assure_irg_outs(irg);
2056 /* NOTE: sets only the loop attribute of blocks, not nodes */
2057 /* NOTE: Kills links */
2058 assure_cf_loop(irg);
2060 ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK | IR_RESOURCE_PHI_LIST);
2061 collect_phiprojs(irg);
2062 ir_free_resources(irg, IR_RESOURCE_IRN_LINK);
2064 loop = get_irg_loop(irg);
2065 sons = get_loop_n_sons(loop);
2067 loops = NEW_ARR_F(ir_loop *, 0);
2068 /* List all inner loops */
2069 for (nr = 0; nr < sons; ++nr) {
2070 find_most_inner_loop(get_loop_son(loop, nr));
2073 ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK);
2074 /* Set all links NULL */
2075 irg_walk_graph(current_ir_graph, reset_link, NULL, NULL);
2077 for (i = 0; i < ARR_LEN(loops); ++i) {
2078 ir_loop *loop = loops[i];
2081 /*dump_ir_block_graph(current_ir_graph, "-part");*/
2082 collect_phiprojs(irg);
2083 irg_walk_graph(current_ir_graph, reset_link, NULL, NULL);
2086 /*dump_ir_block_graph(current_ir_graph, "-full");*/
2089 ir_free_resources(irg, IR_RESOURCE_IRN_LINK);
2090 ir_free_resources(irg, IR_RESOURCE_PHI_LIST);
2093 void do_loop_unrolling(ir_graph *irg)
2095 enable_unrolling = 1;
2097 enable_inversion = 0;
2099 DB((dbg, LEVEL_2, " >>> unrolling (Startnode %N) <<<\n",
2100 get_irg_start(irg)));
2102 loop_optimization(irg);
2104 DB((dbg, LEVEL_2, " >>> unrolling done (Startnode %N) <<<\n",
2105 get_irg_start(irg)));
2108 void do_loop_inversion(ir_graph *irg)
2110 enable_unrolling = 0;
2112 enable_inversion = 1;
2114 DB((dbg, LEVEL_2, " >>> inversion (Startnode %N) <<<\n",
2115 get_irg_start(irg)));
2117 loop_optimization(irg);
2119 DB((dbg, LEVEL_2, " >>> inversion done (Startnode %N) <<<\n",
2120 get_irg_start(irg)));
2123 void do_loop_peeling(ir_graph *irg)
2125 enable_unrolling = 0;
2127 enable_inversion = 0;
2129 DB((dbg, LEVEL_2, " >>> peeling (Startnode %N) <<<\n",
2130 get_irg_start(irg)));
2132 loop_optimization(irg);
2134 DB((dbg, LEVEL_2, " >>> peeling done (Startnode %N) <<<\n",
2135 get_irg_start(irg)));
2139 ir_graph_pass_t *loop_inversion_pass(const char *name)
2141 return def_graph_pass(name ? name : "loop_inversion", do_loop_inversion);
2144 ir_graph_pass_t *loop_unroll_pass(const char *name)
2146 return def_graph_pass(name ? name : "loop_unroll", do_loop_unrolling);
2149 ir_graph_pass_t *loop_peeling_pass(const char *name)
2151 return def_graph_pass(name ? name : "loop_peeling", do_loop_peeling);
2154 void firm_init_loop_opt(void)
2156 FIRM_DBG_REGISTER(dbg, "firm.opt.loop");