2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @author Christian Helmer
23 * @brief loop inversion and loop unrolling
30 #include "iroptimize.h"
48 #include "irbackedge_t.h"
49 #include "irphase_t.h"
53 DEBUG_ONLY(static firm_dbg_module_t *dbg);
56 * Convenience macro for iterating over every phi node of the given block.
57 * Requires phi list per block.
59 #define for_each_phi(block, phi) \
60 for ((phi) = get_Block_phis( (block) ); (phi) ; (phi) = get_Phi_next((phi)))
62 #define for_each_phi_safe(head, phi, next) \
63 for ((phi) = (head), (next) = (head) ? get_Phi_next((head)) : NULL; \
64 (phi) ; (phi) = (next), (next) = (next) ? get_Phi_next((next)) : NULL)
66 /* currently processed loop */
67 static ir_loop *cur_loop;
69 /* Condition for performing copy_walk. */
70 typedef unsigned walker_condition(ir_node *);
72 /* Node and position of a predecessor */
73 typedef struct out_edge {
80 typedef struct inversion_node_info {
83 struct inversion_node_info *free_list_next; /* linked list to free all node_infos */
84 } inversion_node_info;
87 typedef struct node_info {
90 struct ir_node *free_list_next; /* linked list to free all node_infos */
93 /* Head of the list for freeing node_infos */
94 static ir_node *free_list;
96 /* A walker may start visiting the current loop with these nodes. */
97 static out_edge *cur_loop_outs;
99 /* Outs of the nodes head. */
100 static out_edge *cur_head_outs;
102 /* Information about the loop head */
103 static ir_node *loop_head = NULL;
104 static unsigned loop_head_valid = 1;
106 /* List of all inner loops, that are processed. */
107 static ir_loop **loops;
110 static ir_node *loop_inv_head = NULL;
112 static ir_node *loop_peeled_head = NULL;
114 /* Loop analysis informations */
115 typedef struct loop_info_t {
116 unsigned blocks; /* number of blocks in the loop */
117 unsigned calls; /* number of calls */
120 ir_node *new_loop_head;
123 /* Information about the current loop */
124 static loop_info_t loop_info;
126 /* Outs of the condition chain (loop inversion). */
127 static out_edge *cond_chain_entries;
129 /* Arity of the unrolled loop heads. */
130 static int unroll_arity;
132 static ir_phase *phase;
137 static unsigned head_inversion_node_count;
138 static unsigned inversion_head_node_limit;
139 static unsigned head_inversion_block_count;
141 /* As the preconditions are the same, these flags,
142 * set depending on which function called, are used to determine what to do. */
143 static unsigned enable_peeling;
144 static unsigned enable_inversion;
145 static unsigned enable_unrolling;
148 /* Creates node_info. A linked list keeps all node_infos to free them again. */
149 static node_info *new_node_info(ir_node *node)
151 node_info *l = XMALLOCZ(node_info);
152 l->free_list_next = free_list;
158 /* Returns linked node_info for given node. */
159 static node_info *get_node_info(ir_node *n)
161 return ((node_info *)get_irn_link(n));
164 /* Allocates a node_info struct for the given node */
165 static void alloc_node_info(ir_node *node)
168 state = new_node_info(node);
169 set_irn_link(node, (void *)state);
172 /* Frees node info and copy array for given node. Returns next node of free_list. */
173 static ir_node *free_info(ir_node *node)
175 node_info *info = get_node_info(node);
176 ir_node *next = info->free_list_next;
177 /*DB((dbg, LEVEL_1, "FREE %N\n", node));*/
178 DEL_ARR_F(info->copy);
180 set_irn_link(node, NULL);
184 /* Free all created node_infos, including the copy arrays. */
185 static void free_node_info(void)
187 ir_node *node = free_list;
189 node = free_info(node);
194 /* Free node_infos of blocks only, including the copy arrays. */
195 static void free_node_info_blocks(void)
197 ir_node *node = free_list;
198 ir_node *new_freelist = NULL;
201 if (is_Block(node)) {
202 next = free_info(node);
204 next = get_node_info(node)->free_list_next;
205 get_node_info(node)->free_list_next = new_freelist;
210 free_list = new_freelist;
213 /* Reset nodes link. For use with a walker. */
214 static void reset_link(ir_node *node, void *env)
217 set_irn_link(node, NULL);
220 static void set_inversion_copy(ir_node *n, ir_node *cp)
222 phase_set_irn_data(phase, n, cp);
225 /* Returns a nodes copy. */
226 static ir_node *get_copy(ir_node *n, int index)
228 ir_node *cp = ((node_info *)get_irn_link(n))->copy[index];
232 /* Returns a nodes copy if it exists, or else the node. */
233 static ir_node *get_inversion_copy(ir_node *n)
235 ir_node *cp = (ir_node *)phase_get_irn_data(phase, n);
239 /* Returns a nodes copy, or node if there is no node info linked. */
240 static ir_node *get_copy_safe(ir_node *node, int index)
242 node_info *info = (node_info *)get_irn_link(node);
244 return get_copy(node, index);
249 /* Assigns the given array to a nodes node_info */
250 static void set_copy_arr(ir_node *n, ir_node **arr)
252 ((node_info *)get_irn_link(n))->copy = arr;
255 /* Assigns a copy with given index to a node.
256 * Prerequirements are existing node_info and copy array. */
257 static void set_copy(ir_node *n, int index, ir_node *copy)
259 ((node_info *)get_irn_link(n))->copy[index] = copy;
262 /* Returns 0 if the node or block is not in cur_loop. */
263 static unsigned is_in_loop(ir_node *node)
265 return (get_irn_loop(get_block(node)) == cur_loop);
268 /* Returns if the given be is an alien edge.
269 * This is the case when the pred is not in the loop. */
270 static unsigned is_alien_edge(ir_node *n, int i)
272 return(!is_in_loop(get_irn_n(n, i)));
275 static unsigned is_own_backedge(ir_node *n, int pos)
277 return (is_backedge(n, pos) && is_in_loop(get_irn_n(n, pos)));
280 /* Resets block mark for given node. For use with walker */
281 static void reset_block_mark(ir_node *node, void * env)
286 set_Block_mark(node, 0);
289 static unsigned is_nodes_block_marked(ir_node* node)
292 return get_Block_mark(node);
294 return get_Block_mark(get_block(node));
297 /* Returns the number of blocks in a loop. */
298 static int get_loop_n_blocks(ir_loop *loop)
302 elements = get_loop_n_elements(loop);
304 for (e=0; e<elements; e++) {
305 loop_element elem = get_loop_element(loop, e);
306 if (is_ir_node(elem.kind) && is_Block(elem.node))
313 static int extend_irn(ir_node *n, ir_node *new)
317 int arity = get_irn_arity(n);
318 int new_arity = arity + 1;
320 NEW_ARR_A(ir_node *, ins, new_arity);
322 for(i = 0; i < arity; ++i) {
323 ins[i] = get_irn_n(n, i);
327 set_irn_in(n, new_arity, ins);
328 /* arity equals the position of the new node */
333 static void extend_ins_by_copy(ir_node *block, int pos)
337 assert(is_Block(block));
339 /* Extend block by copy of definition at pos */
340 new_in = get_inversion_copy(get_irn_n(block, pos));
341 extend_irn(block, new_in);
343 /* Extend block phis by copy of definition at pos */
344 for_each_phi(block, phi) {
347 pred = get_irn_n(phi, pos);
348 cp = get_inversion_copy(pred);
349 /* If the phis in is not in the condition chain (eg. a constant),
350 * there is no copy. */
356 DB((dbg, LEVEL_5, "Extend phi %N by %N\n", phi, new_in));
357 extend_irn(phi, new_in);
362 * Extends all blocks which succeed the loop with the copied loops outs.
363 * Also extends the blocks phis. Requires block phi list.
364 * Returns an updated list of loop outs of the original loop outs.
366 static out_edge *extend_ins(out_edge *outs, int copies)
371 ir_node *phi, *next_phi, *new_phi, *head, *new_block;
372 int old_arity, new_arity;
376 inc_irg_visited(current_ir_graph);
378 new_outs = NEW_ARR_F(out_edge, 0);
381 for (i = 0; i < ARR_LEN(outs); ++i) {
382 out_edge edge = outs[i];
383 ir_node *node = edge.node;
384 DB((dbg, LEVEL_5, "outs %N %d\n", node, edge.pos));
388 for (i = 0; i < ARR_LEN(outs); ++i) {
389 out_edge edge = outs[i];
390 ir_node *node = edge.node;
391 int in_c, positions_n;
394 if (!is_Block(node) && !is_Bad(node)) {
399 ir_node *p = get_nodes_block(node);
400 for(c = 0; c < get_irn_arity(p); ++c) {
401 if (is_in_loop(get_irn_n(p, c))) {
407 ARR_APP1(out_edge, new_outs, edge);
408 DB((dbg, LEVEL_5, "new out %N\n", node));
413 ARR_APP1(out_edge, new_outs, edge);
414 DB((dbg, LEVEL_5, "new out %N\n", node));
418 /* CONTINUE if node already processed. */
419 if (irn_visited(node))
425 positions = NEW_ARR_F(int, 0);
428 /* Search for all occurences of the current node, and save the in positions. */
429 for (p = 0; p < ARR_LEN(outs); ++p) {
430 ir_node *cur = outs[p].node;
431 int d_pos = outs[p].pos;
434 DB((dbg, LEVEL_5, "Loop successor %N with pred @%d in loop\n",
436 ARR_APP1(int, positions, d_pos);
437 mark_irn_visited(cur);
442 old_arity = get_irn_arity(node);
443 new_arity = old_arity + (positions_n * copies);
445 ins = NEW_ARR_F(ir_node*, new_arity);
446 bes = NEW_ARR_F(int, new_arity);
448 /* extend block ins */
449 for (in_c = 0; in_c < old_arity; ++in_c) {
450 ins[in_c] = get_irn_n(node, in_c);
451 bes[in_c] = is_backedge(node, in_c);
453 for (p = 0; p < positions_n; ++p) {
454 for (c = 0; c < copies; ++c) {
455 ir_node *cp = get_copy_safe(get_irn_n(node, positions[p]), c);
456 DB((dbg, LEVEL_5, "%N (new_arity %d) @%d = %N\n",
457 node, new_arity, in_c, cp));
459 bes[in_c] = is_backedge(node, positions[p]);
464 assert(new_arity == in_c);
466 head = get_Block_phis(node);
467 new_block = new_r_Block(get_irn_irg(node), new_arity, ins);
468 set_irn_loop(new_block, get_irn_loop(node));
470 for (in_c = 0; in_c < new_arity; ++in_c) {
472 set_backedge(new_block, in_c);
478 set_Block_phis(node, NULL);
480 phi_ins = NEW_ARR_F(ir_node *, new_arity);
481 phi_bes = NEW_ARR_F(int, new_arity);
483 for_each_phi_safe(head, phi, next_phi) {
484 for (in_c = 0; in_c < old_arity; ++in_c) {
485 phi_ins[in_c] = get_irn_n(phi, in_c);
486 phi_bes[in_c] = is_backedge(phi, in_c);
489 for (p = 0; p < positions_n; ++p) {
490 for(c = 0; c < copies; ++c) {
492 pred = get_irn_n(phi, positions[p]);
493 DB((dbg, LEVEL_5, "Phi %N pred @%d is %N\n",
494 phi, positions[p], pred));
495 cp = get_copy_safe(pred, c);
497 DB((dbg, LEVEL_5, "Phi %N in %d is %N\n",
499 phi_bes[in_c] = is_backedge(phi, positions[p]);
504 new_phi = new_r_Phi(new_block, new_arity, phi_ins, get_irn_mode(phi));
506 for (in_c = 0; in_c < new_arity; ++in_c) {
508 set_backedge(new_phi, in_c);
510 exchange(phi, new_phi);
511 DB((dbg, LEVEL_5, "exch Phi %N %N\n",phi, new_phi));
513 add_Block_phi(new_block, new_phi);
519 exchange(node, new_block);
520 set_Block_MacroBlock(new_block, new_block);
521 DB((dbg, LEVEL_5, "exch block %N %N\n", node, new_block));
523 DEL_ARR_F(positions);
529 * Finds loop head and loop_info.
531 static void get_loop_info(ir_node *node, void *env)
533 unsigned node_in_loop, pred_in_loop;
537 arity = get_irn_arity(node);
538 for (i = 0; i < arity; i++) {
539 ir_node *pred = get_irn_n(node, i);
541 pred_in_loop = is_in_loop(pred);
542 node_in_loop = is_in_loop(node);
544 /* collect some loop information */
550 /* Find the loops head/the blocks with cfpred outside of the loop */
551 if (is_Block(node) && node_in_loop && !pred_in_loop && loop_head_valid) {
552 ir_node *cfgpred = get_Block_cfgpred(node, i);
554 if (!is_in_loop(cfgpred)) {
555 DB((dbg, LEVEL_5, "potential head %+F because inloop and pred %+F not inloop\n",
557 /* another head? We do not touch this. */
558 if (loop_head && loop_head != node) {
570 static void correct_phis(ir_node *node, void *env)
574 if (is_Phi(node) && get_irn_arity(node) == 1) {
577 in[0] = get_irn_n(node, 0);
579 exch = new_rd_Phi(get_irn_dbg_info(node),
580 get_nodes_block(node), 1, in,
583 exchange(node, exch);
584 /* TODO make sure visited is copied */
591 /* Adds all nodes pointing into the loop to loop_entries and also finds the loops head */
592 static void get_loop_outs(ir_node *node, void *env)
594 unsigned node_in_loop, pred_in_loop;
598 arity = get_irn_arity(node);
599 for (i = 0; i < arity; ++i) {
600 ir_node *pred = get_irn_n(node, i);
602 pred_in_loop = is_in_loop(pred);
603 node_in_loop = is_in_loop(node);
605 if (pred_in_loop && !node_in_loop) {
609 ARR_APP1(out_edge, cur_loop_outs, entry);
610 if (is_Block(node)) {
612 loop_info.cf_out = entry;
614 DB((dbg, LEVEL_5, "loop outs %N\n", node));
619 static ir_node *ssa_second_def;
620 static ir_node *ssa_second_def_block;
623 * Walks the graph bottom up, searching for definitions and creates phis.
625 static ir_node *search_def_and_create_phis(ir_node *block, ir_mode *mode)
633 DB((dbg, LEVEL_5, "ssa search_def_and_create_phis: block %N\n", block));
635 /* Prevents creation of phi that would be bad anyway.
636 * Dead and bad blocks. */
637 if (get_irn_arity(block) < 1 || is_Bad(block)) {
638 DB((dbg, LEVEL_5, "ssa bad %N\n", block));
642 if (block == ssa_second_def_block) {
643 DB((dbg, LEVEL_5, "ssa found second definition: use second def %N\n", ssa_second_def));
644 return ssa_second_def;
647 /* already processed this block? */
648 if (irn_visited(block)) {
649 ir_node *value = get_irn_link(block);
650 DB((dbg, LEVEL_5, "ssa already visited: use linked %N\n", value));
654 irg = get_irn_irg(block);
655 assert(block != get_irg_start_block(irg));
657 /* a Block with only 1 predecessor needs no Phi */
658 n_cfgpreds = get_Block_n_cfgpreds(block);
659 if (n_cfgpreds == 1) {
660 ir_node *pred_block = get_Block_cfgpred_block(block, 0);
663 DB((dbg, LEVEL_5, "ssa 1 pred: walk pred %N\n", pred_block));
665 value = search_def_and_create_phis(pred_block, mode);
666 set_irn_link(block, value);
667 mark_irn_visited(block);
672 /* create a new Phi */
673 NEW_ARR_A(ir_node*, in, n_cfgpreds);
674 for (i = 0; i < n_cfgpreds; ++i)
675 in[i] = new_Unknown(mode);
677 phi = new_r_Phi(block, n_cfgpreds, in, mode);
679 /* Important: always keep block phi list up to date. */
680 add_Block_phi(block, phi);
682 DB((dbg, LEVEL_5, "ssa phi creation: link new phi %N to block %N\n", phi, block));
684 set_irn_link(block, phi);
685 mark_irn_visited(block);
687 /* set Phi predecessors */
688 for (i = 0; i < n_cfgpreds; ++i) {
690 ir_node *pred_block = get_Block_cfgpred_block(block, i);
691 assert(pred_block != NULL);
692 pred_val = search_def_and_create_phis(pred_block, mode);
693 assert(pred_val != NULL);
695 DB((dbg, LEVEL_5, "ssa phi pred:phi %N, pred %N\n", phi, pred_val));
696 set_irn_n(phi, i, pred_val);
703 * Given a set of values this function constructs SSA-form for the users of the
704 * first value (the users are determined through the out-edges of the value).
705 * Works without using the dominance tree.
707 static void construct_ssa(ir_node *orig_block, ir_node *orig_val,
708 ir_node *second_block, ir_node *second_val)
712 const ir_edge_t *edge;
713 const ir_edge_t *next;
715 assert(orig_block && orig_val && second_block && second_val &&
716 "no parameter of construct_ssa may be NULL");
718 /* no need to do anything */
719 if (orig_val == second_val)
722 irg = get_irn_irg(orig_val);
724 ir_reserve_resources(irg, IR_RESOURCE_IRN_VISITED);
725 inc_irg_visited(irg);
727 mode = get_irn_mode(orig_val);
728 set_irn_link(orig_block, orig_val);
729 mark_irn_visited(orig_block);
731 ssa_second_def_block = second_block;
732 ssa_second_def = second_val;
734 /* Only fix the users of the first, i.e. the original node */
735 foreach_out_edge_safe(orig_val, edge, next) {
736 ir_node *user = get_edge_src_irn(edge);
737 int j = get_edge_src_pos(edge);
738 ir_node *user_block = get_nodes_block(user);
745 DB((dbg, LEVEL_5, "original user %N\n", user));
748 ir_node *pred_block = get_Block_cfgpred_block(user_block, j);
749 newval = search_def_and_create_phis(pred_block, mode);
751 newval = search_def_and_create_phis(user_block, mode);
754 assert(!is_Bad(newval));
756 /* If we get a bad node the user keeps the original in.
757 * No second definition needed. */
758 if (newval != user && !is_Bad(newval))
759 set_irn_n(user, j, newval);
762 ir_free_resources(irg, IR_RESOURCE_IRN_VISITED);
766 /* Construct ssa for def and all of its copies.
767 * NOTE: Overwrites links of blocks.
769 static ir_node *construct_ssa_n(ir_node *def, ir_node *user, int copies)
774 ir_graph *irg = get_irn_irg(def);
776 ir_reserve_resources(irg, IR_RESOURCE_IRN_VISITED);
777 inc_irg_visited(current_ir_graph);
778 mode = get_irn_mode(def);
780 set_irn_link(get_nodes_block(def), def);
781 mark_irn_visited(get_nodes_block(def));
783 for (i = 0; i < copies; ++i) {
784 ir_node *cp = get_copy(def, i);
785 ir_node *block = get_nodes_block(cp);
786 set_irn_link(block, cp);
787 mark_irn_visited(block);
789 DB((dbg, LEVEL_5, "ssa mark cp %N of def %N usr %N\n", cp, def, user));
793 user_arity = get_irn_arity(user);
794 for (i = 0; i < user_arity; ++i) {
795 ir_node *user_block = get_nodes_block(user);
796 if (get_irn_n(user, i) != def)
799 DB((dbg, LEVEL_5, "ssa_n found edge of user %N\n", user));
802 ir_node *pred_block = get_Block_cfgpred_block(user_block, i);
803 newval = search_def_and_create_phis(pred_block, mode);
805 newval = search_def_and_create_phis(user_block, mode);
807 /*TODO bads should not happen! */
808 if (newval != user) {
809 DB((dbg, LEVEL_5, "=> ssa_n new in is %N\n", newval));
810 set_irn_n(user, i, newval);
811 /*if (is_Phi(newval) && get_nodes_block(newval)== user_block) {
812 ir_free_resources(irg, IR_RESOURCE_IRN_VISITED);
817 /* BREAK, as we found and handled the definition */
821 if (is_Bad(newval)) {
822 DB((dbg, LEVEL_5, "=> ssa_n new in is BAD %N in graph %N\n", newval, current_ir_graph));
823 add_End_keepalive(get_irg_end(current_ir_graph), newval);
824 dump_ir_block_graph(current_ir_graph, "-bad");
831 ir_free_resources(irg, IR_RESOURCE_IRN_VISITED);
835 /* Returns the number of backedges with or without alien bes. */
836 static int get_backedge_n(ir_node *loophead, unsigned with_alien)
840 int arity = get_irn_arity(loophead);
841 for (i = 0; i < arity; ++i) {
842 ir_node *pred = get_irn_n(loophead, i);
843 if (is_backedge(loophead, i) && (with_alien || is_in_loop(pred)))
850 * Returns a raw copy of the given node.
851 * Attributes are kept/set according to the needs of loop inversion.
853 static ir_node *copy_node_inversion(ir_node *node)
858 cp = exact_copy(node);
859 arity = get_irn_arity(node);
861 for (i = 0; i < arity; ++i) {
862 if (is_backedge(node, i))
866 set_inversion_copy(node, cp);
867 DB((dbg, LEVEL_5, "set copy of %N to cp %N \n", node, cp));
870 /* We may not keep the old macroblock. */
871 set_Block_MacroBlock(cp, cp);
872 set_Block_mark(cp, 0);
879 * This walker copies all walked nodes.
880 * If the walk_condition is true for a node, it is copied.
881 * All nodes node_info copy attributes have to be NULL prior to every walk.
882 * TODO temporary nodes not necessary anymore.
884 static void copy_walk(ir_node *node, walker_condition *walk_condition,
891 ir_graph *irg = current_ir_graph;
894 * break condition and cycle resolver, creating temporary node copies
896 if (get_irn_visited(node) >= get_irg_visited(irg)) {
897 /* Here we rely on nodestate's copy being initialized with NULL */
898 DB((dbg, LEVEL_5, "copy_walk: We have already visited %N\n", node));
899 if (get_inversion_copy(node) == NULL) {
900 cp = copy_node_inversion(node);
901 DB((dbg, LEVEL_5, "The TEMP copy of %N is created %N\n", node, cp));
907 mark_irn_visited(node);
909 if (!is_Block(node)) {
910 ir_node *pred = get_nodes_block(node);
911 if (walk_condition(pred))
912 DB((dbg, LEVEL_5, "walk block %N\n", pred));
913 copy_walk(pred, walk_condition, set_loop);
916 arity = get_irn_arity(node);
918 NEW_ARR_A(ir_node *, cpin, arity);
920 for (i = 0; i < arity; ++i) {
921 ir_node *pred = get_irn_n(node, i);
923 if (walk_condition(pred)) {
924 DB((dbg, LEVEL_5, "walk node %N\n", pred));
925 copy_walk(pred, walk_condition, set_loop);
926 cpin[i] = get_inversion_copy(pred);
927 DB((dbg, LEVEL_5, "copy of %N gets new in %N which is copy of %N\n",
928 node, get_inversion_copy(pred), pred));
934 /* copy node / finalize temp node */
935 if (get_inversion_copy(node) == NULL) {
936 /* No temporary copy existent */
937 cp = copy_node_inversion(node);
938 DB((dbg, LEVEL_5, "The FINAL copy of %N is CREATED %N\n", node, cp));
940 /* temporary copy is existent but without correct ins */
941 cp = get_inversion_copy(node);
942 DB((dbg, LEVEL_5, "The FINAL copy of %N is EXISTENT %N\n", node, cp));
945 if (!is_Block(node)) {
946 ir_node *cpblock = get_inversion_copy(get_nodes_block(node));
948 set_nodes_block(cp, cpblock );
950 add_Block_phi(cpblock, cp);
953 set_irn_loop(cp, set_loop);
955 /* Keeps phi list of temporary node. */
956 set_irn_in(cp, ARR_LEN(cpin), cpin);
959 /* Creates a new node from a given one, but with custom ins. */
960 static ir_node *copy_node_changed(
961 ir_node *node, ir_node *block, int arity, ir_node **ins, ir_loop *loop)
964 DB((dbg, LEVEL_5, "New node from %N in %N block %N arity %d\n",
965 node, get_irn_irg(node), block, arity));
968 cp = new_rd_Phi(get_irn_dbg_info(node),
976 /* We want to keep phi nodes with arity 1, as it makes rewiring a lot easier. */
977 cp = new_ir_node(get_irn_dbg_info(node),
986 add_Block_phi(block, cp);
988 if (get_irn_op(node) == get_irn_op(cp))
989 copy_node_attr(get_irn_irg(node), node, cp);
991 /*new_backedge_info(cp); function removed */
993 set_Block_MacroBlock(cp, cp);
995 set_irn_loop(cp, loop);
997 /* Blocks are copied first, so that future phi nodes populate the phi list. */
999 set_Block_phis(cp, NULL);
1001 DB((dbg, LEVEL_5, "New node %N in %N block %N arity %d\n",
1002 cp, get_irn_irg(cp), block, arity));
1006 /* To be exchangeable, the unknown needs to have proper graph set. */
1007 static ir_node *new_node_unknown(ir_node *block, ir_mode *mode)
1009 ir_node *new = new_ir_node(NULL, get_irn_irg(block), block, op_Unknown, mode, 0, NULL);
1014 * Walker, to copy alle nodes that meet the walk_condition.
1015 * Blocks are copied first, which is important for the creation of the phi lists.
1018 static ir_node *copy_walk_n(ir_node *node, walker_condition *walk_condition, int copy_index, int alloc_cp_arr)
1022 unsigned headblock = 0;
1023 ir_node *block, *cp, *new_cp;
1024 ir_node **ins = NULL;
1026 if (irn_visited(node)) {
1027 cp = get_copy(node, copy_index);
1029 DB((dbg, LEVEL_5, "Visited %N\n", node));
1032 DB((dbg, LEVEL_5, "Visited. Return copy %N\n", cp));
1036 if (!is_Block(node)) {
1037 ir_node *unknown = new_node_unknown(
1038 get_nodes_block(node),
1039 get_irn_mode(node));
1040 DB((dbg, LEVEL_5, " #### Create Unknown %N for %N\n", unknown, node));
1041 set_copy(node, copy_index, unknown);
1045 DB((dbg, LEVEL_5, "Walk %N\n", node));
1046 mark_irn_visited(node);
1048 /* Allocate array of copies */
1049 if (alloc_cp_arr > 0) {
1050 ir_node **arr = NEW_ARR_F(ir_node *, alloc_cp_arr);
1051 alloc_node_info(node);
1052 for (i = 0; i < alloc_cp_arr; ++i)
1054 set_copy_arr(node, arr);
1059 if (!is_Block(node)) {
1060 ir_node *orig_block = get_nodes_block(node);
1061 DB((dbg, LEVEL_5, "current node: %N about to walk block %N\n", node, orig_block));
1062 /* Check walk_condition for blocks not necessary */
1064 if (orig_block == loop_head && is_Phi(node))
1067 block = copy_walk_n(orig_block, walk_condition, copy_index, alloc_cp_arr);
1068 DB((dbg, LEVEL_5, "current node: %N block %N blocks copy %N\n",
1069 node, orig_block, block));
1071 if (node == loop_head)
1076 arity = get_irn_arity(node);
1079 /* Headblock and headblock-phi case */
1081 DB((dbg, LEVEL_5, "Headblock/Phi from headblock\n"));
1082 NEW_ARR_A(ir_node *, ins, unroll_arity);
1084 for (i = 0; i < arity; ++i) {
1086 ir_node *pred = get_irn_n(node, i);
1087 if (is_backedge(loop_head, i)) {
1088 if (walk_condition(pred)) {
1089 ret = copy_walk_n(pred, walk_condition, copy_index, alloc_cp_arr);
1090 DB((dbg, LEVEL_5, "in %d = %N walked\n", i, ret));
1093 DB((dbg, LEVEL_5, "in %d = %N not walked\n", i, ret));
1098 arity = unroll_arity;
1101 NEW_ARR_A(ir_node *, ins, arity);
1103 for (i = 0; i < arity; ++i) {
1105 ir_node *pred = get_irn_n(node, i);
1106 if (walk_condition(pred)) {
1107 ret = copy_walk_n(pred, walk_condition, copy_index, alloc_cp_arr);
1108 DB((dbg, LEVEL_5, "in %d = %N walked\n", i, ret));
1111 DB((dbg, LEVEL_5, "in %d = %N not walked\n", i, ret));
1117 /* NOW, after the walk, we may check if the blocks copy is already present. */
1118 cp = get_copy(node, copy_index);
1119 if (cp != NULL && !is_Unknown(cp)) {
1120 DB((dbg, LEVEL_5, "Must be Block %N copy is already present\n", node, cp));
1121 assert(is_Block(cp) &&
1122 "sanity: The copy should have been a block.");
1126 new_cp = copy_node_changed(node, block, arity, ins, cur_loop);
1127 set_copy(node, copy_index, new_cp);
1129 if (cp != NULL && is_Unknown(cp)) {
1130 DB((dbg, LEVEL_5, "Found unknown %N in %N now exchanged by %N in %N\n",
1131 cp, get_irn_irg(cp), new_cp, get_irn_irg(new_cp)));
1132 exchange(cp, new_cp);
1138 * Populates head_entries with (node, pred_pos) tuple
1139 * whereas the node's pred at pred_pos is in the head but not the node itself.
1140 * Head and condition chain blocks must be marked.
1142 static void get_head_outs(ir_node *node, void *env)
1145 int arity = get_irn_arity(node);
1148 DB((dbg, LEVEL_5, "get head entries %N \n", node));
1150 for (i = 0; i < arity; ++i) {
1151 /* node is not in the head, but the predecessor is.
1152 * (head or loop chain nodes are marked) */
1154 DB((dbg, LEVEL_5, "node %N marked %d (0) pred %d marked %d (1) \n",
1155 node, is_nodes_block_marked(node), i,
1156 is_nodes_block_marked(get_irn_n(node, i))));
1158 if (!is_nodes_block_marked(node) && is_nodes_block_marked(get_irn_n(node, i))) {
1162 entry.pred = get_irn_n(node, i);
1163 ARR_APP1(out_edge, cur_head_outs, entry);
1165 if (is_in_loop(node) && is_Block(node)) {
1166 loop_info.new_loop_head = node;
1167 DB((dbg, LEVEL_5, " SET NEW LOOPHEAD to %N\n", node));
1172 if (is_Phi(node) && get_nodes_block(node) == loop_head)
1174 unsigned needs_ssa = 1;
1175 unsigned loop_invar = 1;
1176 for (i = 0; i < arity; ++i) {
1177 if (is_own_backedge(node, i)) {
1178 if (get_irn_n(node, i) != node)
1181 if (is_nodes_block_marked(get_irn_n(node, i)))
1185 /* Construct ssa for all uses of this phi, as there will be a copy of it. */
1186 if(needs_ssa && !loop_invar) {
1187 const ir_edge_t *edge;
1188 DB((dbg, LEVEL_5, "NEEDS SSA users of phi %N\n", node));
1189 foreach_out_edge(node, edge) {
1191 entry.node = get_edge_src_irn(edge);
1192 DB((dbg, LEVEL_5, "NEEDS SSA user %N\n", entry.node));
1193 entry.pos = get_edge_src_pos(edge);
1194 ARR_APP1(out_edge, cur_head_outs, entry);
1202 * Find condition chains, and add them to be inverted
1203 * A block belongs to the chain if a condition branches out of the loop.
1204 * Returns 1 if the given block belongs to the condition chain.
1206 static unsigned find_condition_chains(ir_node *block)
1208 const ir_edge_t *edge;
1210 unsigned has_backedge = 0;
1213 DB((dbg, LEVEL_5, "condition_chains for block %N\n", block));
1215 /* Collect all outs, including keeps. */
1216 foreach_out_edge_kind(block, edge, EDGE_KIND_NORMAL) {
1220 /* Leave at least one block as body. */
1221 if (head_inversion_node_count + nodes_n > inversion_head_node_limit
1222 || head_inversion_block_count + 1 == loop_info.blocks) {
1223 set_Block_mark(block, 0);
1228 /* First: check our successors,
1229 * and add all of them, which are outside of the loop, to the list */
1230 foreach_block_succ(block, edge) {
1231 ir_node *src = get_edge_src_irn( edge );
1232 int pos = get_edge_src_pos( edge );
1234 if (!is_in_loop(src)) {
1240 ARR_APP1(out_edge, cond_chain_entries, entry);
1241 mark_irn_visited(src);
1242 } else if (is_backedge(src, pos)) {
1243 /* Inverting blocks with backedge outs leads to a cf edge
1244 * from the inverted head, into the inverted head (skipping the body).
1245 * As the body becomes the new loop head,
1246 * this would introduce another loop in the existing loop.
1247 * This loop inversion cannot cope with this complex case. */
1253 ARR_APP1(out_edge, cond_chain_entries, entry);
1254 mark_irn_visited(src);
1258 if (mark == 0 || has_backedge) {
1259 set_Block_mark(block, 0);
1262 set_Block_mark(block, 1);
1263 ++head_inversion_block_count;
1264 DB((dbg, LEVEL_5, "block %N is part of condition chain\n", block));
1265 head_inversion_node_count += nodes_n;
1268 /* Second: walk all successors,
1269 * and add them to the list if they are not part of the chain */
1270 foreach_block_succ(block, edge) {
1272 ir_node *src = get_edge_src_irn( edge );
1273 int pos = get_edge_src_pos( edge );
1276 if (!is_in_loop( src ) || irn_visited(src)) {
1280 mark_irn_visited(src);
1281 DB((dbg, LEVEL_5, "condition chain walk %N\n", src));
1282 inchain = find_condition_chains(src);
1284 /* if successor is not part of chain we need to collect its outs */
1289 ARR_APP1(out_edge, cond_chain_entries, entry);
1296 * Rewires the copied condition chain. Removes backedges.
1297 * as this condition chain is prior to the loop.
1298 * Copy of loop_head must have phi list and old (unfixed) backedge info of the loop head.
1299 * (loop_head is already fixed, we cannot rely on it.)
1301 static void fix_copy_inversion(void)
1306 ir_node *phi, *next;
1307 ir_node *head_cp = get_inversion_copy(loop_head);
1308 int arity = get_irn_arity(head_cp);
1309 int backedges = get_backedge_n(head_cp, 0);
1310 int new_arity = arity - backedges;
1314 NEW_ARR_A(ir_node *, ins, new_arity);
1317 /* Remove block backedges */
1318 for(i = 0; i < arity; ++i) {
1319 if (!is_backedge(head_cp, i))
1320 ins[pos++] = get_irn_n(head_cp, i);
1323 new_head = new_Block(new_arity, ins);
1325 phis = NEW_ARR_F(ir_node *, 0);
1327 for_each_phi_safe(get_Block_phis(head_cp), phi, next) {
1329 NEW_ARR_A(ir_node *, ins, new_arity);
1331 for(i = 0; i < arity; ++i) {
1332 if (!is_backedge(head_cp, i))
1333 ins[pos++] = get_irn_n(phi, i);
1335 new_phi = new_rd_Phi(get_irn_dbg_info(phi),
1336 new_head, new_arity, ins,
1338 ARR_APP1(ir_node *, phis, new_phi);
1342 for_each_phi_safe(get_Block_phis(head_cp), phi, next) {
1343 exchange(phi, phis[pos++]);
1346 exchange(head_cp, new_head);
1351 /* Takes a phi from the inverted loop head and its pred
1352 * which is assigned in the condition chain.
1354 * Returns new phi, created in the new loop head.
1355 * If there is an assignment in the condition chain
1356 * with a user also in the condition chain,
1357 * the dominance frontier is in the new loop head.
1358 * The dataflow loop is completely in the condition chain.
1362 * Phi_cp | | copied condition chain
1366 * Phi* | | new loop head
1368 * Phi | | original, inverted condition chain
1373 static ir_node *fix_inner_cc_definitions(ir_node *phi, ir_node *pred)
1377 ir_node **head_phi_ins;
1378 ir_node *new_loop_head = loop_info.new_loop_head;
1379 int new_loop_head_arity = get_irn_arity(new_loop_head);
1380 DB((dbg, LEVEL_5, "NEW HEAD %N arity %d\n", new_loop_head, new_loop_head_arity));
1382 NEW_ARR_A(ir_node *, head_phi_ins, new_loop_head_arity);
1384 for(i = 0; i < new_loop_head_arity; ++i) {
1385 /* Rely on correct backedge info for the new loop head
1386 * is set by extend_ins_by_copy. */
1388 DB((dbg, LEVEL_5, "CHECK mark of pred of %N @%d %N\n",
1389 new_loop_head, i, get_irn_n(new_loop_head, i)));
1391 if(is_nodes_block_marked(get_irn_n(new_loop_head, i))) {
1392 head_phi_ins[i] = pred;
1393 DB((dbg, LEVEL_5, "NEW HEAD %N pred %d %N is in cc\n", new_loop_head, i, pred));
1395 ir_node *cp = get_inversion_copy(pred);
1396 /* As pred is in the condition chain there has to be a copy. */
1398 head_phi_ins[i] = cp;
1400 DB((dbg, LEVEL_5, "NEW HEAD %N pred %d %N is in not in cc so get copy %N\n",
1401 new_loop_head, i, pred, head_phi_ins[i] ));
1403 DB((dbg, LEVEL_5, "NEW HEAD %N in %N %N\n",
1404 new_loop_head, head_phi_ins[i], current_ir_graph));
1407 head_phi= new_r_Phi(new_loop_head,
1408 new_loop_head_arity,
1412 add_Block_phi(new_loop_head, head_phi);
1414 DB((dbg, LEVEL_5, "fix inner cc definitions: new head %N has new phi %N from cc phi %N @%N\n",
1415 new_loop_head, head_phi, phi, pred));
1421 /* Puts the original condition chain at the end of the loop,
1422 * subsequently to the body.
1423 * Relies on block phi list and correct backedges.
1425 static void fix_head_inversion(void)
1429 ir_node *phi, *next;
1431 int arity = get_irn_arity(loop_head);
1432 int backedges = get_backedge_n(loop_head, 0);
1433 int new_arity = backedges;
1437 NEW_ARR_A(ir_node *, ins, new_arity);
1440 /* Keep only backedges */
1441 for(i = 0; i < arity; ++i) {
1442 if (is_own_backedge(loop_head, i))
1443 ins[pos++] = get_irn_n(loop_head, i);
1446 new_head = new_Block(new_arity, ins);
1448 phis = NEW_ARR_F(ir_node *, 0);
1450 for_each_phi(loop_head, phi) {
1453 NEW_ARR_A(ir_node *, ins, new_arity);
1456 for (i = 0; i < arity; ++i) {
1457 ir_node *pred = get_irn_n(phi, i);
1459 if (is_own_backedge(loop_head, i)) {
1460 /* If assignment is in the condition chain,
1461 * we need to create a phi in the new loop head.
1462 * This can only happen for df, not cf. See find_condition_chains. */
1463 if (is_nodes_block_marked(pred)) {
1464 ins[pos++] = fix_inner_cc_definitions(phi, pred);
1472 new_phi = new_rd_Phi(get_irn_dbg_info(phi),
1473 new_head, new_arity, ins,
1476 ARR_APP1(ir_node *, phis, new_phi);
1478 DB((dbg, LEVEL_5, "fix inverted head should exch %N by %N\n", phi, new_phi));
1482 for_each_phi_safe(get_Block_phis(loop_head), phi, next) {
1483 DB((dbg, LEVEL_5, "fix inverted exch phi %N by %N\n", phi, phis[pos]));
1484 if (phis[pos] != phi)
1485 exchange(phi, phis[pos++]);
1490 DB((dbg, LEVEL_5, "fix inverted head exch head block %N by %N\n", loop_head, new_head));
1491 exchange(loop_head, new_head);
1494 /* Does the loop inversion. */
1495 static void inversion_walk(out_edge *head_entries)
1500 * The order of rewiring bottom-up is crucial.
1501 * Any change of the order leads to lost information that would be needed later.
1504 ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED);
1506 /* 1. clone condition chain */
1507 inc_irg_visited(current_ir_graph);
1509 for (i = 0; i < ARR_LEN(head_entries); ++i) {
1510 out_edge entry = head_entries[i];
1511 ir_node *pred = get_irn_n(entry.node, entry.pos);
1513 DB((dbg, LEVEL_5, "\nInit walk block %N\n", pred));
1515 copy_walk(pred, is_nodes_block_marked, cur_loop);
1518 ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED);
1520 /* 2. Extends the head control flow successors ins
1521 * with the definitions of the copied head node. */
1522 for (i = 0; i < ARR_LEN(head_entries); ++i) {
1523 out_edge head_out = head_entries[i];
1525 if (is_Block(head_out.node))
1526 extend_ins_by_copy(head_out.node, head_out.pos);
1529 /* 3. construct_ssa for users of definitions in the condition chain,
1530 * as there is now a second definition. */
1531 for (i = 0; i < ARR_LEN(head_entries); ++i) {
1532 out_edge head_out = head_entries[i];
1534 /* Ignore keepalives */
1535 if (is_End(head_out.node))
1538 /*if (!is_Block(head_out.node))
1539 assert(is_nodes_block_marked(head_out.pred) && "damn");*/
1541 /* Construct ssa for definitions in the condition chain. */
1542 if (!is_Block(head_out.node)) {
1543 ir_node *pred, *cppred, *block, *cpblock;
1545 /* May not use get_irn_n(head_out.node, head_out.pos)
1546 * as it can be changed by construct_ssa. */
1547 pred = head_out.pred;
1548 cppred = get_inversion_copy(pred);
1549 block = get_nodes_block(pred);
1550 cpblock = get_nodes_block(cppred);
1551 construct_ssa(block, pred, cpblock, cppred);
1555 /* 4. Remove the ins which are no backedges from the original condition chain
1556 * as the cc is now subsequently to the body. */
1557 fix_head_inversion();
1559 /* 5. Remove the backedges of the copied condition chain,
1560 * because it is going to be the new 'head' in advance to the loop */
1561 fix_copy_inversion();
1564 /* Analyse loop, if inversion is possible or reasonable. Then do the inversion. */
1565 static void loop_inversion(void)
1567 unsigned do_inversion = 1;
1569 inversion_head_node_limit = INT_MAX;
1570 ir_reserve_resources(current_ir_graph, IR_RESOURCE_BLOCK_MARK);
1572 /* Reset block marks.
1573 * We use block marks to flag blocks of the original condition chain. */
1574 irg_walk_graph(current_ir_graph, reset_block_mark, NULL, NULL);
1576 loop_info.blocks = get_loop_n_blocks(cur_loop);
1577 cond_chain_entries = NEW_ARR_F(out_edge, 0);
1579 head_inversion_node_count = 0;
1580 head_inversion_block_count = 0;
1582 set_Block_mark(loop_head, 1);
1583 mark_irn_visited(loop_head);
1585 /* Use phase to keep copy of nodes from the condition chain. */
1586 phase = new_phase(current_ir_graph, phase_irn_init_default);
1588 inc_irg_visited(current_ir_graph);
1590 /* Search for condition chains. */
1591 find_condition_chains(loop_head);
1593 DB((dbg, LEVEL_3, "Loop contains %d blocks.\n", loop_info.blocks));
1595 if (loop_info.blocks < 2) {
1598 "Loop contains %d (less than 2) blocks => No Inversion done.\n",
1602 /* We also catch endless loops here,
1603 * because they do not have a condition chain. */
1604 if (head_inversion_block_count < 1) {
1607 "Loop contains %d (less than 1) invertible blocks => No Inversion done.\n",
1608 head_inversion_block_count));
1612 cur_head_outs = NEW_ARR_F(out_edge, 0);
1614 /* Get all edges pointing into the condition chain. */
1615 irg_walk_graph(current_ir_graph, get_head_outs, NULL, NULL);
1617 /* Do the inversion */
1618 inversion_walk(cur_head_outs);
1620 DEL_ARR_F(cur_head_outs);
1621 /* Duplicated blocks changed doms */
1622 set_irg_doms_inconsistent(current_ir_graph);
1623 /* Loop content changed */
1624 set_irg_loopinfo_inconsistent(current_ir_graph);
1625 /* TODO are they? Depends on set_irn_in and set_irn_n exchange and new_node. */
1626 set_irg_outs_inconsistent(current_ir_graph);
1631 DEL_ARR_F(cond_chain_entries);
1632 ir_free_resources(current_ir_graph, IR_RESOURCE_BLOCK_MARK);
1636 * Rewire floating copies of the current loop.
1638 static void place_copies(int copies)
1640 ir_node *loophead = loop_head;
1641 int headarity = get_irn_arity(loophead);
1646 /* Append the copies to the existing loop. */
1647 for (i = 0; i < headarity; ++i) {
1648 /* Build unrolled loop top down */
1649 if (is_backedge(loophead, i) && !is_alien_edge(loophead, i)) {
1650 for (c = 0; c < copies; ++c) {
1651 ir_node *upper_head;
1652 ir_node *lower_head = get_copy(loophead, c);
1653 ir_node *upper_pred;
1656 upper_head = loophead;
1657 upper_pred = get_irn_n(loophead, i);
1659 upper_head = get_copy(loophead, c - 1);
1660 upper_pred = get_copy_safe(get_irn_n(loophead, i), c - 1);
1662 set_irn_n(lower_head, pos, upper_pred);
1663 for_each_phi(loophead, phi) {
1664 ir_node *lower_phi, *upper_phi, *upper_pred_phi;
1665 lower_phi = get_copy(phi, c);
1668 upper_pred_phi = get_irn_n(phi, i);
1670 upper_phi = get_copy(phi, c - 1);
1671 upper_pred_phi = get_copy_safe(get_irn_n(phi, i), c - 1);
1674 DB((dbg, LEVEL_5, "Rewire %N @%d to %N\n",
1675 lower_phi, pos, upper_pred_phi));
1677 /* Relying on phi nodes with 1 in to be present. */
1678 assert(is_Phi(lower_phi));
1680 /* Do not fix phis here. Chained phis will cause a defective copy array. */
1682 if (get_irn_arity(lower_phi) == 1) {
1683 ir_node *single_in[1];
1685 single_in[0] = upper_pred_phi;
1687 cp = new_rd_Phi(get_irn_dbg_info(lower_phi),
1688 get_nodes_block(lower_phi), 1, single_in,
1689 get_irn_mode(lower_phi));
1691 set_copy(phi, c, cp);
1692 exchange(lower_phi, cp);
1695 set_irn_n(lower_phi, pos, upper_pred_phi);
1700 /* Fix the topmost loop heads backedges. */
1701 set_irn_n(loophead, i, get_copy(get_irn_n(loophead, i), copies - 1));
1702 for_each_phi(loophead, phi) {
1703 ir_node *last_cp = get_copy_safe(get_irn_n(phi, i), copies - 1);
1704 set_irn_n(phi, i, last_cp);
1710 /* Copies the cur_loop */
1711 static void copy_loop(out_edge *cur_loop_outs, int copies)
1714 unroll_arity = get_backedge_n(loop_head, 0);
1716 ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED);
1719 for (c = 0; c < copies; ++c) {
1721 inc_irg_visited(current_ir_graph);
1723 DB((dbg, LEVEL_5, " ### Copy_loop copy n: %d ###\n", c));
1724 for (i = 0; i < ARR_LEN(cur_loop_outs); ++i) {
1725 out_edge entry = cur_loop_outs[i];
1726 ir_node *pred = get_irn_n(entry.node, entry.pos);
1728 copy_walk_n(pred, is_in_loop, c, c ? 0 : copies);
1732 ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED);
1736 static void create_duffs_device(int unroll_number)
1738 (void)unroll_number;
1743 static unsigned is_loop_invariant_val(ir_node *node)
1747 if (! is_in_loop(node))
1750 /* TODO all cases? and how to use the dom tree for that. */
1753 for (i = 0; i < get_irn_arity(node); ++i) {
1754 if (is_own_backedge(get_nodes_block(node), i) && get_irn_n(node, i) != node)
1762 static ir_node *is_loop_iteration(ir_node *node)
1765 ir_node *found_add = NULL;
1767 if (!is_in_loop(node))
1773 /*TODO all cases? and how do i use the dom tree for that.*/
1776 for (i = 0; i < get_irn_arity(node); ++i) {
1778 if (! is_own_backedge(get_nodes_block(node), i))
1780 new_found = get_irn_n(node,i);
1782 if (found_add && found_add != new_found)
1785 found_add = new_found;
1792 static unsigned get_unroll_decision(out_edge *cur_loop_outs)
1795 ir_node *pred0, *pred1, *projx, *cond, *projres, *loop_condition;
1796 ir_node *iteration, *add, *iteration_phi, *end_val, *step, *add_in0, *add_in1;
1798 (void) cur_loop_outs;
1802 /* There is more than 1 condition for the loop exit. We cannot cope this with duffs device. */
1803 if (loop_info.cf_outs != 1)
1806 /*TODO test for impact on performance */
1807 if (loop_info.calls > 0)
1811 projx = get_irn_n(loop_info.cf_out.node, loop_info.cf_out.pos);
1812 cond = get_irn_n(projx, 0);
1813 projres = get_irn_n(cond, 0);
1814 loop_condition = get_irn_n(projres, 0);
1816 DB((dbg, LEVEL_2, " loop condition %N in graph %N\n", loop_condition, current_ir_graph));
1818 /* One in of the loop condition needs to be loop invariant.
1819 * The other in is assigned by an add.
1820 * The add uses a loop invariant value
1821 * and another value that depends on a loop invariant start value and the add itself.
1834 /* Find the loop invariant in */
1835 /* Assumption that condition is cmp or mod.
1837 pred0 = get_irn_n(loop_condition, 0);
1838 pred1 = get_irn_n(loop_condition, 1);
1840 if (is_loop_invariant_val(pred0)) {
1846 if (is_loop_invariant_val(pred1)) {
1847 if (end_val != NULL)
1848 /* RETURN. loop condition does not change during the loop. */
1855 DB((dbg, LEVEL_2, " end_val %N\n", end_val));
1858 /* Check for constraint of remaining in. */
1859 add = is_loop_iteration(iteration);
1863 DB((dbg, LEVEL_2, " add %N\n", add));
1865 add_in0 = get_irn_n(add, 0);
1866 add_in1 = get_irn_n(add, 1);
1869 iteration_phi = NULL;
1871 if (is_loop_invariant_val(add_in0)) {
1873 iteration_phi = add_in1;
1876 DB((dbg, LEVEL_2, "seems step %N iter_phi %N\n", step, iteration_phi));
1878 if (is_loop_invariant_val(add_in1)) {
1882 iteration_phi = add_in0;
1885 DB((dbg, LEVEL_2, " step %N\n", step));
1887 if (! is_Phi(iteration_phi)) {
1888 if (is_Phi(iteration))
1889 iteration_phi = iteration;
1894 DB((dbg, LEVEL_2, " #### COUNTING LOOP in graph %N step %N end %N phi %N\n",
1895 current_ir_graph, step, end_val, iteration_phi));
1900 mode = get_irn_mode(add);
1902 block = new_Block(0, NULL);
1903 /*set_irg_current_Block(current_ir_graph, block);*/
1906 unroll_mod_const = new_Const(unroll_number);
1907 module = new_d_Mod(block, new_NoMem(), end_val, step, mode, op_pin_state_pinned);
1919 static void unroll_loop(void)
1922 unsigned unroll_number;
1924 cur_loop_outs = NEW_ARR_F(out_edge, 0);
1927 irg_walk_graph(current_ir_graph, get_loop_outs, NULL, NULL);
1929 /*unroll_number = get_unroll_decision(cur_loop_outs);*/
1932 /*dump_ir_block_graph(current_ir_graph, "-pre");*/
1934 if (unroll_number) {
1935 create_duffs_device(unroll_number);
1937 /* Copies the loop and allocs node_info */
1938 copy_loop(cur_loop_outs, unroll_number - 1);
1939 /*dump_ir_block_graph(current_ir_graph, "-cp");*/
1941 /* Line up the floating copies. */
1942 place_copies(unroll_number - 1);
1943 /*dump_ir_block_graph(current_ir_graph, "-ord");*/
1945 /* Extend loop successors */
1946 ssa_outs = extend_ins(cur_loop_outs, unroll_number - 1);
1947 /*dump_ir_block_graph(current_ir_graph, "-fixed")*/ ;
1949 /* Links are going to be overwritten */
1950 free_node_info_blocks();
1952 /* Generate phis for all loop outs */
1953 for (i = 0; i < ARR_LEN(ssa_outs); ++i) {
1954 out_edge entry = ssa_outs[i];
1955 ir_node *node = entry.node;
1956 ir_node *pred = get_irn_n(entry.node, entry.pos);
1958 DB((dbg, LEVEL_5, "Do? construct_ssa_n def %N node %N pos %d\n",
1959 pred, node, entry.pos));
1961 if (!is_End(node)) {
1962 DB((dbg, LEVEL_5, "construct_ssa_n def %N node %N pos %d\n",
1963 pred, node, entry.pos));
1965 construct_ssa_n(pred, node, unroll_number - 1);
1967 construct_ssa_n(pred, new_phi, unroll_number - 1);*/
1970 /*dump_ir_block_graph(current_ir_graph, "-ssa-done");*/
1975 /*irg_walk_graph(current_ir_graph, correct_phis, NULL, NULL);*/
1977 DEL_ARR_F(ssa_outs);
1979 set_irg_doms_inconsistent(current_ir_graph);
1980 set_irg_loopinfo_inconsistent(current_ir_graph);
1981 set_irg_outs_inconsistent(current_ir_graph);
1983 DEL_ARR_F(cur_loop_outs);
1986 /* Initialization and */
1987 static void init_analyze(ir_loop *loop)
1989 /* Init new for every loop */
1993 loop_head_valid = 1;
1994 loop_inv_head = NULL;
1995 loop_peeled_head = NULL;
1997 loop_info.calls = 0;
1998 loop_info.blocks = 0;
1999 loop_info.cf_outs = 0;
2001 DB((dbg, LEVEL_2, " >>>> current loop includes node %N <<<\n",
2002 get_loop_node(loop, 0)));
2004 irg_walk_graph(current_ir_graph, get_loop_info, NULL, NULL);
2006 /* RETURN if there is no valid head */
2007 if (!loop_head || !loop_head_valid) {
2008 DB((dbg, LEVEL_2, "No valid loop head. Nothing done.\n"));
2012 DB((dbg, LEVEL_2, "Loophead: %N\n", loop_head));
2014 if (enable_inversion)
2017 if (enable_unrolling)
2020 DB((dbg, LEVEL_2, " <<<< end of loop with node %N >>>>\n",
2021 get_loop_node(loop, 0)));
2024 /* Find most inner loops and send them to analyze_loop */
2025 static void find_most_inner_loop(ir_loop *loop)
2027 /* descend into sons */
2028 int sons = get_loop_n_sons(loop);
2031 ARR_APP1(ir_loop *, loops, loop);
2034 for (s=0; s<sons; s++) {
2035 find_most_inner_loop(get_loop_son(loop, s));
2041 * Assure preconditions are met and go through all loops.
2043 void loop_optimization(ir_graph *irg)
2050 set_current_ir_graph(irg);
2053 assure_irg_outs(irg);
2055 /* NOTE: sets only the loop attribute of blocks, not nodes */
2056 /* NOTE: Kills links */
2057 assure_cf_loop(irg);
2059 ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK | IR_RESOURCE_PHI_LIST);
2060 collect_phiprojs(irg);
2061 ir_free_resources(irg, IR_RESOURCE_IRN_LINK);
2063 loop = get_irg_loop(irg);
2064 sons = get_loop_n_sons(loop);
2066 loops = NEW_ARR_F(ir_loop *, 0);
2067 /* List all inner loops */
2068 for (nr = 0; nr < sons; ++nr) {
2069 find_most_inner_loop(get_loop_son(loop, nr));
2072 ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK);
2073 /* Set all links NULL */
2074 irg_walk_graph(current_ir_graph, reset_link, NULL, NULL);
2076 for (i = 0; i < ARR_LEN(loops); ++i) {
2077 ir_loop *loop = loops[i];
2080 /*dump_ir_block_graph(current_ir_graph, "-part");*/
2081 collect_phiprojs(irg);
2082 irg_walk_graph(current_ir_graph, reset_link, NULL, NULL);
2085 /*dump_ir_block_graph(current_ir_graph, "-full");*/
2088 ir_free_resources(irg, IR_RESOURCE_IRN_LINK);
2089 ir_free_resources(irg, IR_RESOURCE_PHI_LIST);
2092 void do_loop_unrolling(ir_graph *irg)
2094 enable_unrolling = 1;
2096 enable_inversion = 0;
2098 DB((dbg, LEVEL_2, " >>> unrolling (Startnode %N) <<<\n",
2099 get_irg_start(irg)));
2101 loop_optimization(irg);
2103 DB((dbg, LEVEL_2, " >>> unrolling done (Startnode %N) <<<\n",
2104 get_irg_start(irg)));
2107 void do_loop_inversion(ir_graph *irg)
2109 enable_unrolling = 0;
2111 enable_inversion = 1;
2113 DB((dbg, LEVEL_2, " >>> inversion (Startnode %N) <<<\n",
2114 get_irg_start(irg)));
2116 loop_optimization(irg);
2118 DB((dbg, LEVEL_2, " >>> inversion done (Startnode %N) <<<\n",
2119 get_irg_start(irg)));
2122 void do_loop_peeling(ir_graph *irg)
2124 enable_unrolling = 0;
2126 enable_inversion = 0;
2128 DB((dbg, LEVEL_2, " >>> peeling (Startnode %N) <<<\n",
2129 get_irg_start(irg)));
2131 loop_optimization(irg);
2133 DB((dbg, LEVEL_2, " >>> peeling done (Startnode %N) <<<\n",
2134 get_irg_start(irg)));
2138 ir_graph_pass_t *loop_inversion_pass(const char *name)
2140 return def_graph_pass(name ? name : "loop_inversion", do_loop_inversion);
2143 ir_graph_pass_t *loop_unroll_pass(const char *name)
2145 return def_graph_pass(name ? name : "loop_unroll", do_loop_unrolling);
2148 ir_graph_pass_t *loop_peeling_pass(const char *name)
2150 return def_graph_pass(name ? name : "loop_peeling", do_loop_peeling);
2153 void firm_init_loop_opt(void)
2155 FIRM_DBG_REGISTER(dbg, "firm.opt.loop");