Loop inversion is more elegant and cleaned up now by using phases. Testsuite runs...
[libfirm] / ir / opt / loop.c
1 /*
2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @author   Christian Helmer
23  * @brief    loop inversion and loop unrolling
24  *
25  * @version  $Id$
26  */
27
28 #include "config.h"
29
30 #include "iroptimize.h"
31 #include "opt_init.h"
32 #include "irnode.h"
33 #include "debug.h"
34
35
36 #include "ircons.h"
37 #include "irgopt.h"
38 #include "irgmod.h"
39 #include "irgwalk.h"
40 #include "irouts.h"
41 #include "iredges.h"
42 #include "irtools.h"
43 #include "array_t.h"
44 #include "beutil.h"
45 #include "irpass.h"
46 #include "irdom.h"
47
48 #include "irbackedge_t.h"
49 #include "irphase_t.h"
50 #include "irloop_t.h"
51
52
53 DEBUG_ONLY(static firm_dbg_module_t *dbg);
54
55 /**
56  * Convenience macro for iterating over every phi node of the given block.
57  * Requires phi list per block.
58  */
59 #define for_each_phi(block, phi) \
60         for ((phi) = get_Block_phis( (block) ); (phi) ; (phi) = get_Phi_next((phi)))
61
62 #define for_each_phi_safe(head, phi, next) \
63         for ((phi) = (head), (next) = (head) ? get_Phi_next((head)) : NULL; \
64                         (phi) ; (phi) = (next), (next) = (next) ? get_Phi_next((next)) : NULL)
65
66 /* currently processed loop */
67 static ir_loop *cur_loop;
68
69 /* Condition for performing copy_walk. */
70 typedef unsigned walker_condition(ir_node *);
71
72 /* Node and position of a predecessor */
73 typedef struct out_edge {
74         ir_node *node;
75         int pos;
76 } out_edge;
77
78 /**/
79 typedef struct inversion_node_info {
80         ir_node *copy;
81         ir_node **ins;
82         struct inversion_node_info *free_list_next;     /* linked list to free all node_infos */
83 } inversion_node_info;
84
85 /*   */
86 typedef struct node_info {
87         ir_node **copy;
88         ir_node **ins;
89         struct ir_node *free_list_next;         /* linked list to free all node_infos */
90 } node_info;
91
92 /* Head of the list for freeing node_infos */
93 static ir_node *free_list;
94
95 /* A walker may start visiting the current loop with these nodes. */
96 static out_edge *cur_loop_outs;
97
98 /* Outs of the nodes head. */
99 static out_edge *cur_head_outs;
100
101 /* Information about the loop head */
102 static ir_node *loop_head = NULL;
103 static unsigned loop_head_valid = 1;
104
105 /* List of all inner loops, that are processed. */
106 static ir_loop **loops;
107
108 /* Inverted head */
109 static ir_node *loop_inv_head = NULL;
110 /* Peeled head */
111 static ir_node *loop_peeled_head = NULL;
112
113 /* Loop analysis informations */
114 typedef struct loop_info_t {
115         unsigned blocks;                        /* number of blocks in the loop */
116         unsigned calls;                         /* number of calls */
117         unsigned cf_outs;
118         out_edge cf_out;
119         ir_node *new_loop_head;
120 } loop_info_t;
121
122 /* Information about the current loop */
123 static loop_info_t loop_info;
124
125 /* Outs of the condition chain (loop inversion). */
126 static out_edge *cond_chain_entries;
127
128 /* Arity of the unrolled loop heads. */
129 static int unroll_arity;
130
131 static ir_phase *phase;
132
133
134
135 /* TODO */
136 static unsigned head_inversion_node_count;
137 static unsigned inversion_head_node_limit;
138 static unsigned head_inversion_block_count;
139
140 static unsigned enable_peeling;
141 static unsigned enable_inversion;
142 static unsigned enable_unrolling;
143
144
145 /* Creates node_info. A linked list keeps all node_infos to free them again. */
146 static node_info *new_node_info(ir_node *node)
147 {
148         node_info *l = XMALLOCZ(node_info);
149         l->free_list_next = free_list;
150         free_list = node;
151         l->copy = NULL;
152         return l;
153 }
154
155 /* Returns linked node_info for given node. */
156 static node_info *get_node_info(ir_node *n)
157 {
158         return ((node_info *)get_irn_link(n));
159 }
160
161 /* Allocates a node_info struct for the given node */
162 static void alloc_node_info(ir_node *node)
163 {
164         node_info *state;
165         state = new_node_info(node);
166         set_irn_link(node, (void *)state);
167 }
168
169 /* Frees node info and copy array for given node. Returns next node of free_list. */
170 static ir_node *free_info(ir_node *node)
171 {
172         node_info *info = get_node_info(node);
173         ir_node *next = info->free_list_next;
174         /*DB((dbg, LEVEL_1, "FREE %N\n", node));*/
175         DEL_ARR_F(info->copy);
176         xfree(info);
177         set_irn_link(node, NULL);
178         return next;
179 }
180
181 /* Free all created node_infos, including the copy arrays. */
182 static void free_node_info(void)
183 {
184         ir_node *node = free_list;
185         while (node) {
186                 node = free_info(node);
187         }
188         free_list = NULL;
189 }
190
191 /* Free node_infos of blocks only, including the copy arrays. */
192 static void free_node_info_blocks(void)
193 {
194         ir_node *node = free_list;
195         ir_node *new_freelist = NULL;
196         while (node) {
197                 ir_node *next;
198                 if (is_Block(node)) {
199                         next = free_info(node);
200                 } else {
201                         next = get_node_info(node)->free_list_next;
202                         get_node_info(node)->free_list_next = new_freelist;
203                         new_freelist = node;
204                 }
205                 node = next;
206         }
207         free_list = new_freelist;
208 }
209
210 /* Reset nodes link. For use with a walker. */
211 static void reset_link(ir_node *node, void *env)
212 {
213         (void)env;
214         set_irn_link(node, NULL);
215 }
216
217 static void set_inversion_copy(ir_node *n, ir_node *cp)
218 {
219         phase_set_irn_data(phase, n, cp);
220 }
221
222 /* Returns a nodes copy. */
223 static ir_node *get_copy(ir_node *n, int index)
224 {
225         ir_node *cp = ((node_info *)get_irn_link(n))->copy[index];
226         return cp;
227 }
228
229 /* Returns a nodes copy if it exists, or else the node. */
230 static ir_node *get_inversion_copy(ir_node *n)
231 {
232         ir_node *cp = (ir_node *)phase_get_irn_data(phase, n);
233         return cp;
234 }
235
236 /* Returns a nodes copy, or node if there is no node info linked. */
237 static ir_node *get_copy_safe(ir_node *node, int index)
238 {
239         node_info *info = (node_info *)get_irn_link(node);
240         if (info != NULL)
241                 return get_copy(node, index);
242         else
243                 return node;
244 }
245
246 /* Assigns the given array to a nodes node_info */
247 static void set_copy_arr(ir_node *n, ir_node **arr)
248 {
249         ((node_info *)get_irn_link(n))->copy = arr;
250 }
251
252 /* Assigns a copy with given index to a node.
253  * Prerequirements are existing node_info and copy array. */
254 static void set_copy(ir_node *n, int index, ir_node *copy)
255 {
256         ((node_info *)get_irn_link(n))->copy[index] = copy;
257 }
258
259 /* Returns 0 if the node or block is not in cur_loop. */
260 static unsigned is_in_loop(ir_node *node)
261 {
262         return (get_irn_loop(get_block(node)) == cur_loop);
263 }
264
265 /* Returns if the given be is an alien edge.
266  * This is the case when the pred is not in the loop. */
267 static unsigned is_alien_edge(ir_node *n, int i)
268 {
269         return(!is_in_loop(get_irn_n(n, i)));
270 }
271
272 static unsigned is_own_backedge(ir_node *n, int pos)
273 {
274         return (is_backedge(n, pos) && is_in_loop(get_irn_n(n, pos)));
275 }
276
277 /* Resets block mark for given node. For use with walker */
278 static void reset_block_mark(ir_node *node, void * env)
279 {
280         (void) env;
281
282         if (is_Block(node))
283                 set_Block_mark(node, 0);
284 }
285
286 static unsigned is_nodes_block_marked(ir_node* node)
287 {
288         if (is_Block(node))
289                 return get_Block_mark(node);
290         else
291                 return get_Block_mark(get_block(node));
292 }
293
294 /* Returns the number of blocks in a loop. */
295 static int get_loop_n_blocks(ir_loop *loop)
296 {
297         int elements, e;
298         int blocks = 0;
299         elements = get_loop_n_elements(loop);
300
301         for (e=0; e<elements; e++) {
302                 loop_element elem = get_loop_element(loop, e);
303                 if (is_ir_node(elem.kind) && is_Block(elem.node))
304                         ++blocks;
305         }
306         return blocks;
307 }
308
309 /* */
310 static void extend_ins_by_copy(ir_node *block, int pos)
311 {
312         ir_node **ins;
313         ir_node *phi;
314         int i;
315         int arity = get_irn_arity(block);
316         int new_arity = arity + 1;
317         assert(is_Block(block));
318
319         NEW_ARR_A(ir_node *, ins, new_arity);
320
321         /* Extend block by copy of definition at pos */
322         for(i = 0; i < arity; ++i) {
323                 ins[i] = get_irn_n(block, i);
324         }
325         ins[i] = get_inversion_copy(get_irn_n(block, pos));
326
327         DB((dbg, LEVEL_5, "Extend block %N by %N\n", block, ins[i]));
328
329         /* Fixes backedges. We rely on correct backedges. */
330         set_irn_in(block, new_arity, ins);
331
332         /* Extend block phis by copy of definition at pos */
333         for_each_phi(block, phi) {
334                 ir_node *cp, *pred;
335                 for(i = 0; i < arity; ++i)
336                         ins[i] = get_irn_n(phi, i);
337                 pred = get_irn_n(phi, pos);
338                 cp = get_inversion_copy(pred);
339                 /* If the phis in is not in the condition chain (eg. a constant),
340                  * there is no copy. */
341                 if (cp == NULL)
342                         ins[i] = pred;
343                 else
344                         ins[i] = cp;
345
346                 DB((dbg, LEVEL_5, "Extend phi %N by %N\n", phi, ins[i]));
347                 set_irn_in(phi, new_arity, ins);
348         }
349 }
350
351 /*
352  * Extends all blocks which succeed the loop with the copied loops outs.
353  * Also extends the blocks phis. Requires block phi list.
354  * Returns an updated list of loop outs of the original loop outs.
355  */
356 static out_edge *extend_ins(out_edge *outs, int copies)
357 {
358         ir_node **ins;
359         ir_node **phi_ins;
360         int *bes, *phi_bes;
361         ir_node *phi, *next_phi, *new_phi, *head, *new_block;
362         int old_arity, new_arity;
363         int i, c, p;
364         out_edge *new_outs;
365
366         inc_irg_visited(current_ir_graph);
367
368         new_outs = NEW_ARR_F(out_edge, 0);
369
370 #if 0
371         for (i = 0; i < ARR_LEN(outs); ++i) {
372                 out_edge edge = outs[i];
373                 ir_node *node = edge.node;
374                 DB((dbg, LEVEL_5, "outs %N %d\n", node, edge.pos));
375         }
376 #endif
377
378         for (i = 0; i < ARR_LEN(outs); ++i) {
379                 out_edge edge = outs[i];
380                 ir_node *node = edge.node;
381                 int in_c, positions_n;
382                 int *positions;
383
384                 if (!is_Block(node) && !is_Bad(node)) {
385 #if 0
386                         if (is_Phi(node)) {
387                                 int c;
388                                 unsigned b = 0;
389                                 ir_node *p = get_nodes_block(node);
390                                 for(c = 0; c < get_irn_arity(p); ++c) {
391                                         if (is_in_loop(get_irn_n(p, c))) {
392                                                 b=1;
393                                                 break;
394                                         }
395                                 }
396                                 if (!b) {
397                                         ARR_APP1(out_edge, new_outs, edge);
398                                         DB((dbg, LEVEL_5, "new out %N\n", node));
399                                 }
400
401                         } else {
402 #endif
403                         ARR_APP1(out_edge, new_outs, edge);
404                         DB((dbg, LEVEL_5, "new out %N\n", node));
405
406                 }
407
408                 /* CONTINUE if node already processed. */
409                 if (irn_visited(node))
410                         continue;
411
412                 if (!is_Block(node))
413                         continue;
414
415                 positions = NEW_ARR_F(int, 0);
416                 positions_n = 0;
417
418                 /* Search for all occurences of the current node, and save the in positions. */
419                 for (p = 0; p < ARR_LEN(outs); ++p) {
420                         ir_node *cur = outs[p].node;
421                         int d_pos = outs[p].pos;
422
423                         if (node==cur) {
424                                 DB((dbg, LEVEL_5, "Loop successor %N with pred @%d in loop\n",
425                                                         node, d_pos));
426                                 ARR_APP1(int, positions, d_pos);
427                                 mark_irn_visited(cur);
428                                 ++positions_n;
429                         }
430                 }
431
432                 old_arity = get_irn_arity(node);
433                 new_arity = old_arity + (positions_n * copies);
434
435                 ins = NEW_ARR_F(ir_node*, new_arity);
436                 bes = NEW_ARR_F(int, new_arity);
437
438                 /* extend block ins */
439                 for (in_c = 0; in_c < old_arity; ++in_c) {
440                         ins[in_c] = get_irn_n(node, in_c);
441                         bes[in_c] = is_backedge(node, in_c);
442                 }
443                 for (p = 0; p < positions_n; ++p) {
444                         for (c = 0; c < copies; ++c) {
445                                 ir_node *cp = get_copy_safe(get_irn_n(node, positions[p]), c);
446                                 DB((dbg, LEVEL_5, "%N (new_arity %d) @%d = %N\n",
447                                                         node, new_arity, in_c, cp));
448                                 ins[in_c] = cp;
449                                 bes[in_c] = is_backedge(node, positions[p]);
450                                 ++in_c;
451                         }
452                 }
453
454                 assert(new_arity == in_c);
455
456                 head = get_Block_phis(node);
457                 new_block = new_r_Block(get_irn_irg(node), new_arity, ins);
458                 set_irn_loop(new_block, get_irn_loop(node));
459
460                 for (in_c = 0; in_c < new_arity; ++in_c) {
461                         if (bes[in_c])
462                                 set_backedge(new_block, in_c);
463                 }
464                 DEL_ARR_F(ins);
465                 DEL_ARR_F(bes);
466
467 #if 1
468                 set_Block_phis(node, NULL);
469
470                 phi_ins = NEW_ARR_F(ir_node *, new_arity);
471                 phi_bes = NEW_ARR_F(int, new_arity);
472
473                 for_each_phi_safe(head, phi, next_phi) {
474                         for (in_c = 0; in_c < old_arity; ++in_c) {
475                                 phi_ins[in_c] = get_irn_n(phi, in_c);
476                                 phi_bes[in_c] = is_backedge(phi, in_c);
477                         }
478
479                         for (p = 0; p < positions_n; ++p) {
480                                 for(c = 0; c < copies; ++c) {
481                                         ir_node *cp, *pred;
482                                         pred = get_irn_n(phi, positions[p]);
483                                         DB((dbg, LEVEL_5, "Phi %N pred @%d is %N\n",
484                                                                 phi, positions[p], pred));
485                                         cp = get_copy_safe(pred, c);
486                                         phi_ins[in_c] = cp;
487                                         DB((dbg, LEVEL_5, "Phi %N in %d is %N\n",
488                                                                 phi, in_c, cp));
489                                         phi_bes[in_c] = is_backedge(phi, positions[p]);
490                                         ++in_c;
491                                 }
492                         }
493
494                         new_phi = new_r_Phi(new_block, new_arity, phi_ins, get_irn_mode(phi));
495
496                         for (in_c = 0; in_c < new_arity; ++in_c) {
497                                 if (phi_bes[in_c])
498                                         set_backedge(new_phi, in_c);
499                         }
500                         exchange(phi, new_phi);
501                         DB((dbg, LEVEL_5, "exch Phi %N  %N\n",phi, new_phi));
502
503                         add_Block_phi(new_block, new_phi);
504                 }
505
506                 DEL_ARR_F(phi_ins);
507                 DEL_ARR_F(phi_bes);
508 #endif
509                 exchange(node, new_block);
510                 set_Block_MacroBlock(new_block, new_block);
511                 DB((dbg, LEVEL_5, "exch block %N  %N\n", node, new_block));
512
513                 DEL_ARR_F(positions);
514         }
515         return new_outs;
516 }
517
518 /**
519  * Finds loop head and loop_info.
520  */
521 static void get_loop_info(ir_node *node, void *env)
522 {
523         unsigned node_in_loop, pred_in_loop;
524         int i, arity;
525         (void)env;
526
527         arity = get_irn_arity(node);
528         for (i = 0; i < arity; i++) {
529                 ir_node *pred = get_irn_n(node, i);
530
531                 pred_in_loop = is_in_loop(pred);
532                 node_in_loop = is_in_loop(node);
533
534                 /* collect some loop information */
535                 if (node_in_loop) {
536                         if (is_Call(node))
537                                 ++loop_info.calls;
538                 }
539
540                 /* Find the loops head/the blocks with cfpred outside of the loop */
541                 if (is_Block(node) && node_in_loop && !pred_in_loop && loop_head_valid) {
542                         ir_node *cfgpred = get_Block_cfgpred(node, i);
543
544                         if (!is_in_loop(cfgpred)) {
545                                 DB((dbg, LEVEL_5, "potential head %+F because inloop and pred %+F not inloop\n",
546                                                         node, pred));
547                                 /* another head? We do not touch this. */
548                                 if (loop_head && loop_head != node) {
549                                         loop_head_valid = 0;
550                                 } else {
551                                         loop_head = node;
552                                 }
553                         }
554                 }
555         }
556 }
557 /**/
558 static void correct_phis(ir_node *node, void *env)
559 {
560         (void)env;
561
562         if (is_Phi(node) && get_irn_arity(node) == 1) {
563                 ir_node *exch;
564                 ir_node *in[1];
565                 in[0] = get_irn_n(node, 0);
566
567                 exch = new_rd_Phi(get_irn_dbg_info(node),
568                         get_nodes_block(node), 1, in,
569                         get_irn_mode(node));
570
571                 exchange(node, exch);
572                 /* TODO make sure visited is copied */
573         }
574
575
576 }
577
578 /* Adds all nodes pointing into the loop to loop_entries and also finds the loops head */
579 static void get_loop_outs(ir_node *node, void *env)
580 {
581         unsigned node_in_loop, pred_in_loop;
582         int i, arity;
583         (void) env;
584
585         arity = get_irn_arity(node);
586         for (i = 0; i < arity; ++i) {
587                 ir_node *pred = get_irn_n(node, i);
588
589                 pred_in_loop = is_in_loop(pred);
590                 node_in_loop = is_in_loop(node);
591
592                 if (pred_in_loop && !node_in_loop) {
593                         out_edge entry;
594                         entry.node = node;
595                         entry.pos = i;
596                         ARR_APP1(out_edge, cur_loop_outs, entry);
597                         if (is_Block(node)) {
598                                 ++loop_info.cf_outs;
599                                 loop_info.cf_out = entry;
600                         }
601                         DB((dbg, LEVEL_5, "loop outs %N\n", node));
602                 }
603         }
604 }
605
606 static ir_node *ssa_second_def;
607 static ir_node *ssa_second_def_block;
608
609 /**
610  * Walks the graph bottom up, searching for definitions and creates phis.
611  */
612 static ir_node *search_def_and_create_phis(ir_node *block, ir_mode *mode)
613 {
614         int i;
615         int n_cfgpreds;
616         ir_graph *irg;
617         ir_node *phi;
618         ir_node **in;
619
620         DB((dbg, LEVEL_5, "ssa search_def_and_create_phis: block %N\n", block));
621
622         /* Prevents creation of phi that would be bad anyway.
623          * Dead and bad blocks. */
624         if (get_irn_arity(block) < 1 || is_Bad(block)) {
625                 DB((dbg, LEVEL_5, "ssa bad %N\n", block));
626                 return new_Bad();
627         }
628
629         if (block == ssa_second_def_block) {
630                 DB((dbg, LEVEL_5, "ssa found second definition: use second def %N\n", ssa_second_def));
631                 return ssa_second_def;
632         }
633
634         /* already processed this block? */
635         if (irn_visited(block)) {
636                 ir_node *value = get_irn_link(block);
637                 DB((dbg, LEVEL_5, "ssa already visited: use linked %N\n", value));
638                 return value;
639         }
640
641         irg = get_irn_irg(block);
642         assert(block != get_irg_start_block(irg));
643
644         /* a Block with only 1 predecessor needs no Phi */
645         n_cfgpreds = get_Block_n_cfgpreds(block);
646         if (n_cfgpreds == 1) {
647                 ir_node *pred_block = get_Block_cfgpred_block(block, 0);
648                 ir_node *value;
649
650                 DB((dbg, LEVEL_5, "ssa 1 pred: walk pred %N\n", pred_block));
651
652                 value = search_def_and_create_phis(pred_block, mode);
653                 set_irn_link(block, value);
654                 mark_irn_visited(block);
655
656                 return value;
657         }
658
659         /* create a new Phi */
660         NEW_ARR_A(ir_node*, in, n_cfgpreds);
661         for (i = 0; i < n_cfgpreds; ++i)
662                 in[i] = new_Unknown(mode);
663
664         phi = new_r_Phi(block, n_cfgpreds, in, mode);
665
666
667         /* Important: always keep block phi list up to date. */
668         add_Block_phi(block, phi);
669
670         DB((dbg, LEVEL_5, "ssa phi creation: link new phi %N to block %N\n", phi, block));
671
672         set_irn_link(block, phi);
673         mark_irn_visited(block);
674
675         /* set Phi predecessors */
676         for (i = 0; i < n_cfgpreds; ++i) {
677                 ir_node *pred_val;
678                 ir_node *pred_block = get_Block_cfgpred_block(block, i);
679                 assert(pred_block != NULL);
680                 pred_val = search_def_and_create_phis(pred_block, mode);
681                 assert(pred_val != NULL);
682
683                 DB((dbg, LEVEL_5, "ssa phi pred:phi %N, pred %N\n", phi, pred_val));
684                 set_irn_n(phi, i, pred_val);
685         }
686         return phi;
687 }
688
689 /**
690  * Given a set of values this function constructs SSA-form for the users of the
691  * first value (the users are determined through the out-edges of the value).
692  * Works without using the dominance tree.
693  */
694 static void construct_ssa(ir_node *orig_block, ir_node *orig_val,
695                 ir_node *second_block, ir_node *second_val)
696 {
697         ir_graph *irg;
698         ir_mode *mode;
699         const ir_edge_t *edge;
700         const ir_edge_t *next;
701
702         assert(orig_block && orig_val && second_block && second_val &&
703                         "no parameter of construct_ssa may be NULL");
704
705         /* no need to do anything */
706         if (orig_val == second_val)
707                 return;
708
709         irg = get_irn_irg(orig_val);
710
711         ir_reserve_resources(irg, IR_RESOURCE_IRN_VISITED);
712         inc_irg_visited(irg);
713
714         mode = get_irn_mode(orig_val);
715         set_irn_link(orig_block, orig_val);
716         mark_irn_visited(orig_block);
717
718         ssa_second_def_block = second_block;
719         ssa_second_def       = second_val;
720
721         /* Only fix the users of the first, i.e. the original node */
722         foreach_out_edge_safe(orig_val, edge, next) {
723                 ir_node *user = get_edge_src_irn(edge);
724                 int j = get_edge_src_pos(edge);
725                 ir_node *user_block = get_nodes_block(user);
726                 ir_node *newval;
727
728                 /* ignore keeps */
729                 if (is_End(user))
730                         continue;
731
732                 DB((dbg, LEVEL_5, "original user %N\n", user));
733
734                 if (is_Phi(user)) {
735                         ir_node *pred_block = get_Block_cfgpred_block(user_block, j);
736                         newval = search_def_and_create_phis(pred_block, mode);
737                 } else {
738                         newval = search_def_and_create_phis(user_block, mode);
739                 }
740
741                 /* If we get a bad node the user keeps the original in. No second definition needed. */
742                 if (newval != user && !is_Bad(newval))
743                         set_irn_n(user, j, newval);
744         }
745
746         ir_free_resources(irg, IR_RESOURCE_IRN_VISITED);
747 }
748
749
750 /* Construct ssa for def and all of its copies.
751  * NOTE: Overwrites links of blocks.
752 */
753 static ir_node *construct_ssa_n(ir_node *def, ir_node *user, int copies)
754 {
755         ir_graph *irg;
756         ir_mode *mode;
757         irg = get_irn_irg(def);
758         int i, user_arity;
759         ir_node *newval;
760
761         ir_reserve_resources(irg, IR_RESOURCE_IRN_VISITED);
762         inc_irg_visited(current_ir_graph);
763         mode = get_irn_mode(def);
764
765         set_irn_link(get_nodes_block(def), def);
766         mark_irn_visited(get_nodes_block(def));
767
768         for (i = 0; i < copies; ++i) {
769                 ir_node *cp = get_copy(def, i);
770                 ir_node *block = get_nodes_block(cp);
771                 set_irn_link(block, cp);
772                 mark_irn_visited(block);
773
774                 DB((dbg, LEVEL_5, "ssa mark cp %N of def %N  usr %N\n", cp, def, user));
775         }
776
777         /* TODO BUG BUG */
778         newval = user;
779         user_arity = get_irn_arity(user);
780         for (i = 0; i < user_arity; ++i) {
781                 ir_node *user_block = get_nodes_block(user);
782                 if (get_irn_n(user, i) != def)
783                         continue;
784
785                 DB((dbg, LEVEL_5, "ssa_n found edge of user %N\n", user));
786
787                 if (is_Phi(user)) {
788                         ir_node *pred_block = get_Block_cfgpred_block(user_block, i);
789                         newval = search_def_and_create_phis(pred_block, mode);
790                 } else {
791                         newval = search_def_and_create_phis(user_block, mode);
792                 }
793                 /*TODO bads should not happen! */
794                 if (newval != user) {
795                         DB((dbg, LEVEL_5, "=> ssa_n new in is %N\n", newval));
796                         set_irn_n(user, i, newval);
797                         /*if (is_Phi(newval) && get_nodes_block(newval)== user_block) {
798                         ir_free_resources(irg, IR_RESOURCE_IRN_VISITED);
799                                 return newval;
800                         }*/
801                 }
802
803                 /* BREAK, as we found and handled the definition */
804                 break;
805
806 #if 0
807                 if (is_Bad(newval)) {
808                         DB((dbg, LEVEL_5, "=> ssa_n new in is BAD %N in graph %N\n", newval, current_ir_graph));
809                         add_End_keepalive(get_irg_end(current_ir_graph), newval);
810                         dump_ir_block_graph(current_ir_graph, "-bad");
811                         assert(0);
812
813                 }
814 #endif
815         }
816
817         ir_free_resources(irg, IR_RESOURCE_IRN_VISITED);
818         return NULL;
819 }
820
821 /* Returns the number of backedges with or without alien bes. */
822 static int get_backedge_n(ir_node *loophead, unsigned with_alien)
823 {
824         int i;
825         int be_n = 0;
826         int arity = get_irn_arity(loophead);
827         for (i = 0; i < arity; ++i) {
828                 ir_node *pred = get_irn_n(loophead, i);
829                 if (is_backedge(loophead, i) && (with_alien || is_in_loop(pred)))
830                         ++be_n;
831         }
832         return be_n;
833 }
834
835 /**
836  * Returns a raw copy of the given node.
837  * Attributes are kept/set according to the needs of loop inversion.
838  */
839 static ir_node *copy_node_inversion(ir_node *node)
840 {
841         int i, arity;
842         ir_node *cp;
843
844         cp = exact_copy(node);
845         arity = get_irn_arity(node);
846
847         for (i = 0; i < arity; ++i) {
848                 if (is_backedge(node, i))
849                         set_backedge(cp, i);
850         }
851
852         set_inversion_copy(node, cp);
853         DB((dbg, LEVEL_5, "set copy of %N to cp %N \n", node, cp));
854
855         if (is_Block(cp)) {
856                 /* We may not keep the old macroblock. */
857                 set_Block_MacroBlock(cp, cp);
858                 set_Block_mark(cp, 0);
859         }
860         return cp;
861 }
862
863
864 /**
865  * This walker copies all walked nodes.
866  * If the walk_condition is true for a node, it is copied.
867  * All nodes node_info copy attributes have to be NULL prior to every walk.
868  */
869 static void copy_walk(ir_node *node, walker_condition *walk_condition,
870                 ir_loop *set_loop)
871 {
872         int i;
873         int arity;
874         ir_node *cp;
875         ir_node **cpin;
876         ir_graph *irg = current_ir_graph;
877
878         /**
879          * break condition and cycle resolver, creating temporary node copies
880          */
881         if (get_irn_visited(node) >= get_irg_visited(irg)) {
882                 /* Here we rely on nodestate's copy being initialized with NULL */
883                 DB((dbg, LEVEL_5, "copy_walk: We have already visited %N\n", node));
884                 if (get_inversion_copy(node) == NULL) {
885                         cp = copy_node_inversion(node);
886                         DB((dbg, LEVEL_5, "The TEMP copy of %N is created %N\n", node, cp));
887                 }
888                 return;
889         }
890
891         /* Walk */
892         mark_irn_visited(node);
893
894         if (!is_Block(node)) {
895                 ir_node *pred = get_nodes_block(node);
896                 if (walk_condition(pred))
897                         DB((dbg, LEVEL_5, "walk block %N\n", pred));
898                 copy_walk(pred, walk_condition, set_loop);
899         }
900
901         arity = get_irn_arity(node);
902
903         NEW_ARR_A(ir_node *, cpin, arity);
904
905         for (i = 0; i < arity; ++i) {
906                 ir_node *pred = get_irn_n(node, i);
907
908                 if (walk_condition(pred)) {
909                         DB((dbg, LEVEL_5, "walk node %N\n", pred));
910                         copy_walk(pred, walk_condition, set_loop);
911                         cpin[i] = get_inversion_copy(pred);
912                         DB((dbg, LEVEL_5, "copy of %N gets new in %N which is copy of %N\n",
913                                                 node, get_inversion_copy(pred), pred));
914                 } else {
915                         cpin[i] = pred;
916                 }
917         }
918
919         /* copy node / finalize temp node */
920         if (get_inversion_copy(node) == NULL) {
921                 /* No temporary copy existent */
922                 cp = copy_node_inversion(node);
923                 DB((dbg, LEVEL_5, "The FINAL copy of %N is CREATED %N\n", node, cp));
924         } else {
925                 /* temporary copy is existent but without correct ins */
926                 cp = get_inversion_copy(node);
927                 DB((dbg, LEVEL_5, "The FINAL copy of %N is EXISTENT %N\n", node, cp));
928         }
929
930         if (!is_Block(node)) {
931                 ir_node *cpblock = get_inversion_copy(get_nodes_block(node));
932
933                 set_nodes_block(cp, cpblock );
934                 if (is_Phi(cp))
935                         add_Block_phi(cpblock, cp);
936         }
937
938         set_irn_loop(cp, set_loop);
939
940         /* Keeps phi list of temporary node. */
941         set_irn_in(cp, ARR_LEN(cpin), cpin);
942 }
943
944 /* Creates a new node from a given one, but with custom ins. */
945 static ir_node *copy_node_changed(
946                 ir_node *node, ir_node *block, int arity, ir_node **ins, ir_loop *loop)
947 {
948         ir_node *cp;
949         DB((dbg, LEVEL_5, "New node from %N in %N block %N arity %d\n",
950                                 node, get_irn_irg(node), block, arity));
951 #if 0
952         if (is_Phi(node)) {
953                 cp = new_rd_Phi(get_irn_dbg_info(node),
954                                 block,
955                                 arity,
956                                 ins,
957                                 get_irn_mode(node));
958
959         } else {
960 #endif
961         /* We want to keep phi nodes with arity 1, as it makes rewiring a lot easier. */
962         cp = new_ir_node(get_irn_dbg_info(node),
963                         get_irn_irg(node),
964                         block,
965                         get_irn_op(node),
966                         get_irn_mode(node),
967                         arity,
968                         ins);
969
970         /* By any means, keep the blocks phi list up-to-date,
971            because regaining it would destroy the links. */
972         if (is_Phi(cp))
973                 add_Block_phi(block, cp);
974 /* TODO */
975         if (get_irn_op(node) == get_irn_op(cp))
976                 copy_node_attr(get_irn_irg(node), node, cp);
977
978         /*new_backedge_info(cp); function removed */
979         if (is_Block(cp))
980                 set_Block_MacroBlock(cp, cp);
981
982         set_irn_loop(cp, loop);
983
984         /* Blocks are copied first, so that future phi nodes populate the phi list. */
985         if (is_Block(cp))
986                 set_Block_phis(cp, NULL);
987
988         DB((dbg, LEVEL_5, "New node %N in %N block %N arity %d\n",
989                                 cp, get_irn_irg(cp), block, arity));
990         return cp;
991 }
992
993 /* To be exchangeable, the unknown needs to have proper graph set. */
994 static ir_node *new_node_unknown(ir_node *block, ir_mode *mode)
995 {
996         ir_node *new = new_ir_node(NULL, get_irn_irg(block), block, op_Unknown, mode, 0, NULL);
997         return new;
998 }
999
1000 /**
1001  * Walker, to copy alle nodes that meet the walk_condition.
1002  * Blocks are copied first, which is important for the creation of the phi lists.
1003  *
1004  */
1005 static ir_node *copy_walk_n(ir_node *node, walker_condition *walk_condition, int copy_index, int alloc_cp_arr)
1006 {
1007         int             i;
1008         int             arity;
1009         unsigned        headblock = 0;
1010         ir_node         *block, *cp, *new_cp;
1011         ir_node         **ins = NULL;
1012
1013         if (irn_visited(node)) {
1014                 cp = get_copy(node, copy_index);
1015
1016                 DB((dbg, LEVEL_5, "Visited %N\n", node));
1017
1018                 if (cp != NULL) {
1019                         DB((dbg, LEVEL_5, "Visited. Return copy %N\n", cp));
1020                         return cp;
1021                 }
1022
1023                 if (!is_Block(node)) {
1024                         ir_node *unknown = new_node_unknown(
1025                                         get_nodes_block(node),
1026                                         get_irn_mode(node));
1027                         DB((dbg, LEVEL_5, " #### Create Unknown %N for %N\n", unknown, node));
1028                         set_copy(node, copy_index, unknown);
1029                         return unknown;
1030                 }
1031         } else {
1032                 DB((dbg, LEVEL_5, "Walk %N\n", node));
1033                 mark_irn_visited(node);
1034
1035                 /* Allocate array of copies */
1036                 if (alloc_cp_arr > 0) {
1037                         ir_node **arr = NEW_ARR_F(ir_node *, alloc_cp_arr);
1038                         alloc_node_info(node);
1039                         for (i = 0; i < alloc_cp_arr; ++i)
1040                                 arr[i] = NULL;
1041                         set_copy_arr(node, arr);
1042                 }
1043         }
1044
1045         block = NULL;
1046         if (!is_Block(node)) {
1047                 ir_node *orig_block = get_nodes_block(node);
1048                 DB((dbg, LEVEL_5, "current node: %N about to walk block %N\n", node, orig_block));
1049                 /* Check walk_condition for blocks not necessary */
1050
1051                 if (orig_block == loop_head && is_Phi(node))
1052                         headblock = 1;
1053
1054                 block = copy_walk_n(orig_block, walk_condition, copy_index, alloc_cp_arr);
1055                 DB((dbg, LEVEL_5, "current node: %N block %N blocks copy %N\n",
1056                                         node, orig_block, block));
1057         } else {
1058                 if (node == loop_head)
1059                         headblock = 1;
1060         }
1061
1062
1063         arity = get_irn_arity(node);
1064
1065         if (headblock) {
1066                 /* Headblock and headblock-phi case */
1067                 int in_c = 0;
1068                 DB((dbg, LEVEL_5, "Headblock/Phi from headblock\n"));
1069                 NEW_ARR_A(ir_node *, ins, unroll_arity);
1070
1071                 for (i = 0; i < arity; ++i) {
1072                         ir_node *ret;
1073                         ir_node *pred = get_irn_n(node, i);
1074                         if (is_backedge(loop_head, i)) {
1075                                 if (walk_condition(pred)) {
1076                                         ret = copy_walk_n(pred, walk_condition, copy_index, alloc_cp_arr);
1077                                         DB((dbg, LEVEL_5, "in %d = %N walked\n", i, ret));
1078                                 } else {
1079                                         ret = pred;
1080                                         DB((dbg, LEVEL_5, "in %d = %N not walked\n", i, ret));
1081                                 }
1082                                 ins[in_c++] = ret;
1083                         }
1084                 }
1085                 arity = unroll_arity;
1086         } else {
1087                 /* Normal case */
1088                 NEW_ARR_A(ir_node *, ins, arity);
1089
1090                 for (i = 0; i < arity; ++i) {
1091                         ir_node *ret;
1092                         ir_node *pred = get_irn_n(node, i);
1093                         if (walk_condition(pred)) {
1094                                 ret = copy_walk_n(pred, walk_condition, copy_index, alloc_cp_arr);
1095                                 DB((dbg, LEVEL_5, "in %d = %N walked\n", i, ret));
1096                         } else {
1097                                 ret = pred;
1098                                 DB((dbg, LEVEL_5, "in %d = %N not walked\n", i, ret));
1099                         }
1100                         ins[i] = ret;
1101                 }
1102         }
1103
1104         /* NOW, after the walk, we may check if the blocks copy is already present. */
1105         cp = get_copy(node, copy_index);
1106         if (cp != NULL && !is_Unknown(cp)) {
1107                 DB((dbg, LEVEL_5, "Must be Block %N copy is already present\n", node, cp));
1108                 assert(is_Block(cp) &&
1109                                 "sanity: The copy should have been a block.");
1110                 return cp;
1111         }
1112
1113         new_cp = copy_node_changed(node, block, arity, ins, cur_loop);
1114         set_copy(node, copy_index, new_cp);
1115
1116         if (cp != NULL && is_Unknown(cp)) {
1117                 DB((dbg, LEVEL_5, "Found unknown %N in %N now exchanged by %N in %N\n",
1118                                         cp, get_irn_irg(cp), new_cp, get_irn_irg(new_cp)));
1119                 exchange(cp, new_cp);
1120         }
1121         return new_cp;
1122 }
1123
1124 /**
1125  * Populates head_entries with (node, pred_pos) tuple
1126  * whereas the node's pred at pred_pos is in the head but not the node itself.
1127  * Head and condition chain blocks must be marked.
1128  */
1129 static void get_head_outs(ir_node *node, void *env)
1130 {
1131         int i;
1132         int arity = get_irn_arity(node);
1133         (void) env;
1134
1135         DB((dbg, LEVEL_5, "get head entries %N \n", node));
1136
1137         for (i = 0; i < arity; ++i) {
1138                 /* node is not in the head, but the predecessor is.
1139                  * (head or loop chain nodes are marked) */
1140
1141                 DB((dbg, LEVEL_5, "... "));
1142                 DB((dbg, LEVEL_5, "node %N  marked %d (0)  pred %d marked %d (1) \n",
1143                                         node, is_nodes_block_marked(node), i,
1144                                         is_nodes_block_marked(get_irn_n(node, i))));
1145
1146                 if (!is_nodes_block_marked(node) && is_nodes_block_marked(get_irn_n(node, i))) {
1147                         out_edge entry;
1148                         entry.node = node;
1149                         entry.pos = i;
1150                         ARR_APP1(out_edge, cur_head_outs, entry);
1151
1152                         if (is_in_loop(node) && is_Block(node)) {
1153                                 loop_info.new_loop_head = node;
1154                                 DB((dbg, LEVEL_5, " SET NEW LOOPHEAD to %N\n", node));
1155                         }
1156                 }
1157         }
1158 }
1159
1160 /**
1161  * Find condition chains, and add them to be inverted
1162  * A block belongs to the chain if a condition branches out of the loop.
1163  * Returns 1 if the given block belongs to the condition chain.
1164  */
1165 static unsigned find_condition_chains(ir_node *block)
1166 {
1167         const ir_edge_t *edge;
1168         unsigned mark = 0;
1169         int nodes_n = 0;
1170
1171         DB((dbg, LEVEL_5, "condition_chains for block %N\n", block));
1172
1173         /* Collect all outs, including keeps. */
1174         foreach_out_edge_kind(block, edge, EDGE_KIND_NORMAL) {
1175                 ++nodes_n;
1176         }
1177
1178         /* Leave at least one block as body. */
1179         if (head_inversion_node_count + nodes_n > inversion_head_node_limit
1180                         || head_inversion_block_count + 1 == loop_info.blocks) {
1181                 set_Block_mark(block, 0);
1182
1183                 return 0;
1184         }
1185
1186         /* First: check our successors,
1187          * and add all of them, which are outside of the loop, to the list */
1188         foreach_block_succ(block, edge) {
1189                 ir_node *src = get_edge_src_irn( edge );
1190                 int pos = get_edge_src_pos( edge );
1191
1192                 if (!is_in_loop(src)) {
1193                         out_edge entry;
1194
1195                         mark = 1;
1196                         entry.node = src;
1197                         entry.pos = pos;
1198                         ARR_APP1(out_edge, cond_chain_entries, entry);
1199                         mark_irn_visited(src);
1200                 }
1201         }
1202
1203         if (mark == 0) {
1204                 set_Block_mark(block, 0);
1205                 return 0;
1206         } else {
1207                 set_Block_mark(block, 1);
1208                 ++head_inversion_block_count;
1209                 DB((dbg, LEVEL_5, "block %N is part of condition chain\n", block));
1210                 head_inversion_node_count += nodes_n;
1211         }
1212
1213         /* Second: walk all successors,
1214          * and add them to the list if they are not part of the chain */
1215         foreach_block_succ(block, edge) {
1216                 unsigned inchain;
1217                 ir_node *src = get_edge_src_irn( edge );
1218                 int pos = get_edge_src_pos( edge );
1219
1220                 /* already done */
1221                 if (!is_in_loop( src ) || irn_visited(src)) {
1222                         continue;
1223                 }
1224
1225                 mark_irn_visited(src);
1226                 DB((dbg, LEVEL_5, "condition chain walk %N\n", src));
1227                 inchain = find_condition_chains(src);
1228
1229                 /* if successor is not part of chain we need to collect its outs */
1230                 if (!inchain) {
1231                         out_edge entry;
1232                         entry.node = src;
1233                         entry.pos = pos;
1234                         ARR_APP1(out_edge, cond_chain_entries, entry);
1235                 }
1236         }
1237         return mark;
1238 }
1239
1240 /**
1241  * Rewires the copied condition chain. Removes backedges.
1242  * as this condition chain is prior to the loop.
1243  * Copy of loop_head must have phi list and old (unfixed) backedge info of the loop head.
1244  * (loop_head is already fixed, we cannot rely on it.)
1245  */
1246 static void fix_copy_inversion(void)
1247 {
1248         ir_node *new_head;
1249         ir_node **ins;
1250         ir_node **phis;
1251         ir_node *phi, *next;
1252         ir_node *head_cp        = get_inversion_copy(loop_head);
1253         int arity                       = get_irn_arity(head_cp);
1254         int backedges           = get_backedge_n(head_cp, 0);
1255         int new_arity           = arity - backedges;
1256         int pos;
1257         int i;
1258
1259         NEW_ARR_A(ir_node *, ins, new_arity);
1260
1261         pos = 0;
1262         /* Remove block backedges */
1263         for(i = 0; i < arity; ++i) {
1264                 if (!is_backedge(head_cp, i))
1265                         ins[pos++] = get_irn_n(head_cp, i);
1266         }
1267
1268         new_head = new_Block(new_arity, ins);
1269
1270         phis = NEW_ARR_F(ir_node *, 0);
1271
1272         for_each_phi_safe(get_Block_phis(head_cp), phi, next) {
1273                 ir_node *new_phi;
1274                 NEW_ARR_A(ir_node *, ins, new_arity);
1275                 pos = 0;
1276                 for(i = 0; i < arity; ++i) {
1277                         if (!is_backedge(head_cp, i))
1278                                 ins[pos++] = get_irn_n(phi, i);
1279                 }
1280                 new_phi = new_rd_Phi(get_irn_dbg_info(phi),
1281                                 new_head, new_arity, ins,
1282                                 get_irn_mode(phi));
1283                 ARR_APP1(ir_node *, phis, new_phi);
1284         }
1285
1286         pos = 0;
1287         for_each_phi_safe(get_Block_phis(head_cp), phi, next) {
1288                 exchange(phi, phis[pos++]);
1289         }
1290
1291         exchange(head_cp, new_head);
1292
1293         DEL_ARR_F(phis);
1294 }
1295
1296 /**/
1297 static ir_node *fix_inner_cc_definitions(ir_node *phi, ir_node *pred)
1298 {
1299         int i;
1300         ir_node *head_phi;
1301         ir_node **head_phi_ins;
1302         ir_node *new_loop_head = loop_info.new_loop_head;
1303         int new_loop_head_arity = get_irn_arity(new_loop_head);
1304         DB((dbg, LEVEL_5, "NEW HEAD %N arity %d\n", new_loop_head, new_loop_head_arity));
1305
1306         NEW_ARR_A(ir_node *, head_phi_ins, new_loop_head_arity);
1307
1308         for(i = 0; i < new_loop_head_arity; ++i) {
1309                 /* Rely on correct backedge info for the new loop head
1310                  * is set by extend_ins_by_copy. */
1311                 if(is_nodes_block_marked(get_irn_n(new_loop_head, i))) {
1312                         head_phi_ins[i] = pred;
1313                 } else {
1314                         head_phi_ins[i] = get_inversion_copy(pred);
1315                 }
1316                 DB((dbg, LEVEL_5, "NEW HEAD %N in %N %N\n", new_loop_head, head_phi_ins[i], current_ir_graph));
1317         }
1318
1319         head_phi= new_r_Phi(new_loop_head,
1320                         new_loop_head_arity,
1321                         head_phi_ins,
1322                         get_irn_mode(phi));
1323
1324         add_Block_phi(new_loop_head, head_phi);
1325
1326         DB((dbg, LEVEL_5, "fix inner cc definitions: new head %N has new phi %N from cc phi %N @%N\n",
1327                                 new_loop_head, head_phi, phi, pred));
1328
1329         return head_phi;
1330 }
1331
1332
1333 /* Puts the original condition chain at the end of the loop, subsequently to the body.
1334  * Relies on block phi list and correct backedges.
1335  */
1336 static void fix_head_inversion(void)
1337 {
1338         ir_node *new_head;
1339         ir_node **ins;
1340         ir_node *phi, *next;
1341         ir_node **phis;
1342         int arity                       = get_irn_arity(loop_head);
1343         int backedges           = get_backedge_n(loop_head, 0);
1344         int new_arity           = backedges;
1345         int pos;
1346         int i;
1347
1348         NEW_ARR_A(ir_node *, ins, new_arity);
1349
1350         pos = 0;
1351         /* Keep only backedges */
1352         for(i = 0; i < arity; ++i) {
1353                 if (is_own_backedge(loop_head, i))
1354                         ins[pos++] = get_irn_n(loop_head, i);
1355         }
1356
1357         new_head = new_Block(new_arity, ins);
1358
1359         phis = NEW_ARR_F(ir_node *, 0);
1360
1361         for_each_phi(loop_head, phi) {
1362                 ir_node *new_phi;
1363
1364                 NEW_ARR_A(ir_node *, ins, new_arity);
1365
1366                 pos = 0;
1367                 for(i = 0; i < arity; ++i) {
1368                         ir_node *pred = get_irn_n(phi, i);
1369
1370                         if (is_own_backedge(loop_head, i)) {
1371                                 DB((dbg, LEVEL_5, " !!! PHI not useful %N\n", phi));
1372
1373                                 /* If definition is in the condition chain,
1374                                  * we need to create a phi in the new loop head */
1375                                 if (is_nodes_block_marked(pred))
1376                                         ins[pos++] = fix_inner_cc_definitions(phi, pred);
1377                                 else
1378                                         ins[pos++] = pred;
1379                         }
1380                 }
1381                 new_phi = new_rd_Phi(get_irn_dbg_info(phi),
1382                         new_head, new_arity, ins,
1383                         get_irn_mode(phi));
1384
1385                 ARR_APP1(ir_node *, phis, new_phi);
1386
1387                 DB((dbg, LEVEL_5, "fix inverted head should exch %N by %N\n", phi, new_phi));
1388         }
1389
1390         pos = 0;
1391         for_each_phi_safe(get_Block_phis(loop_head), phi, next) {
1392                 DB((dbg, LEVEL_5, "fix inverted exch phi %N by %N\n", phi, phis[pos]));
1393                 if (phis[pos] != phi)
1394                         exchange(phi, phis[pos++]);
1395         }
1396
1397         DEL_ARR_F(phis);
1398
1399         DB((dbg, LEVEL_5, "fix inverted head exch head block %N by %N\n", loop_head, new_head));
1400         exchange(loop_head, new_head);
1401 }
1402
1403 /* Does the loop inversion.  */
1404 static void inversion_walk(out_edge *head_entries)
1405 {
1406         int i;
1407
1408         /*
1409          * The order of rewiring bottom-up is crucial.
1410          * Any change of the order leads to lost information that would be needed later.
1411          */
1412
1413         ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED);
1414
1415         /* 1. clone condition chain */
1416         inc_irg_visited(current_ir_graph);
1417
1418         for (i = 0; i < ARR_LEN(head_entries); ++i) {
1419                 out_edge entry = head_entries[i];
1420                 ir_node *pred = get_irn_n(entry.node, entry.pos);
1421
1422                 DB((dbg, LEVEL_5, "\nInit walk block %N\n", pred));
1423
1424                 copy_walk(pred, is_nodes_block_marked, cur_loop);
1425         }
1426
1427         ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED);
1428
1429         /* 2. Extends the head control flow successors ins
1430          *    by the definitions of the copied head node. */
1431         for (i = 0; i < ARR_LEN(head_entries); ++i) {
1432                 out_edge head_out = head_entries[i];
1433
1434                 if (is_Block(head_out.node))
1435                         extend_ins_by_copy(head_out.node, head_out.pos);
1436         }
1437
1438         /* 3. construct_ssa for users of definitions in the condition chain,
1439          *    as there is now a second definition. */
1440         for (i = 0; i < ARR_LEN(head_entries); ++i) {
1441                 out_edge head_out = head_entries[i];
1442                 /* Ignore keepalives */
1443                 if (is_End(head_out.node))
1444                         continue;
1445
1446                 ir_node *is_in_cc = get_nodes_block(get_irn_n(head_out.node, head_out.pos));
1447                 /* Construct ssa for definitions in the condition chain. */
1448                 /* TODO Why do we need the check for is_in_cc? All definitions should be there... */
1449                 if (!is_Block(head_out.node) && is_nodes_block_marked(is_in_cc)) {
1450                         ir_node *pred, *cppred, *block, *cpblock;
1451
1452                         pred = get_irn_n(head_out.node, head_out.pos);
1453                         cppred = get_inversion_copy(pred);
1454                         block = get_nodes_block(pred);
1455                         cpblock = get_nodes_block(cppred);
1456                         construct_ssa(block, pred, cpblock, cppred);
1457                 }
1458         }
1459
1460         /* 4. Remove the ins which are no backedges from the original condition chain,
1461          *    as the cc is now subsequently to the body. */
1462         fix_head_inversion();
1463
1464         /* 5. Remove the backedges of the copied condition chain,
1465          *    because it is going to be the new 'head' in advance to the loop */
1466         fix_copy_inversion();
1467 }
1468
1469 /* Analyse loop, if inversion is possible or reasonable. Then do the inversion. */
1470 static void loop_inversion(void)
1471 {
1472         unsigned do_inversion = 1;
1473
1474         inversion_head_node_limit = INT_MAX;
1475         ir_reserve_resources(current_ir_graph, IR_RESOURCE_BLOCK_MARK);
1476
1477         /* Reset block marks.
1478          * We use block marks to flag blocks of the original condition chain. */
1479         irg_walk_graph(current_ir_graph, reset_block_mark, NULL, NULL);
1480
1481         loop_info.blocks = get_loop_n_blocks(cur_loop);
1482         cond_chain_entries = NEW_ARR_F(out_edge, 0);
1483
1484         head_inversion_node_count = 0;
1485         head_inversion_block_count = 0;
1486
1487         set_Block_mark(loop_head, 1);
1488         mark_irn_visited(loop_head);
1489
1490         /* Use phase to keep copy of nodes from the condition chain. */
1491         phase = new_phase(current_ir_graph, phase_irn_init_default);
1492
1493         inc_irg_visited(current_ir_graph);
1494
1495         /* Search for condition chains. */
1496         find_condition_chains(loop_head);
1497
1498         DB((dbg, LEVEL_3, "Loop contains %d blocks.\n", loop_info.blocks));
1499
1500         if (loop_info.blocks < 2) {
1501                 do_inversion = 0;
1502                 DB((dbg, LEVEL_3,
1503                         "Loop contains %d (less than 2) blocks => No Inversion done.\n",
1504                         loop_info.blocks));
1505         }
1506
1507         /* We also catch endless loops here,
1508          * because they do not have a condition chain. */
1509         if (head_inversion_block_count < 1) {
1510                 do_inversion = 0;
1511                 DB((dbg, LEVEL_3,
1512                         "Loop contains %d (less than 1) invertible blocks => No Inversion done.\n",
1513                         head_inversion_block_count));
1514         }
1515
1516         if (do_inversion) {
1517                 cur_head_outs = NEW_ARR_F(out_edge, 0);
1518
1519                 /* Get all edges pointing into the condition chain. */
1520                 irg_walk_graph(current_ir_graph, get_head_outs, NULL, NULL);
1521
1522                 /* Do the inversion */
1523                 inversion_walk(cur_head_outs);
1524
1525                 DEL_ARR_F(cur_head_outs);
1526                 /* Duplicated blocks changed doms */
1527                 set_irg_doms_inconsistent(current_ir_graph);
1528                 /* Loop content changed */
1529                 set_irg_loopinfo_inconsistent(current_ir_graph);
1530                 /* TODO are they? */
1531                 set_irg_outs_inconsistent(current_ir_graph);
1532         }
1533
1534         /* free */
1535         phase_free(phase);
1536         DEL_ARR_F(cond_chain_entries);
1537         ir_free_resources(current_ir_graph, IR_RESOURCE_BLOCK_MARK);
1538 }
1539
1540 /**
1541  * Rewire floating copies of the current loop.
1542  */
1543 static void place_copies(int copies)
1544 {
1545         ir_node *loophead = loop_head;
1546         int headarity =         get_irn_arity(loophead);
1547         ir_node *phi;
1548         int i, pos, c;
1549
1550         pos = 0;
1551         /* Append the copies to the existing loop. */
1552         for (i = 0; i < headarity; ++i) {
1553                 /* Build unrolled loop top down */
1554                 if (is_backedge(loophead, i) && !is_alien_edge(loophead, i)) {
1555                         for (c = 0; c < copies; ++c) {
1556                                 ir_node *upper_head;
1557                                 ir_node *lower_head = get_copy(loophead, c);
1558                                 ir_node *upper_pred;
1559
1560                                 if (c == 0) {
1561                                         upper_head = loophead;
1562                                         upper_pred = get_irn_n(loophead, i);
1563                                 } else {
1564                                         upper_head = get_copy(loophead, c - 1);
1565                                         upper_pred = get_copy_safe(get_irn_n(loophead, i), c - 1);
1566                                 }
1567                                 set_irn_n(lower_head, pos, upper_pred);
1568                                 for_each_phi(loophead, phi) {
1569                                         ir_node *lower_phi, *upper_phi, *upper_pred_phi;
1570                                         lower_phi = get_copy(phi, c);
1571                                         if (c == 0) {
1572                                                 upper_phi = phi;
1573                                                 upper_pred_phi = get_irn_n(phi, i);
1574                                         } else {
1575                                                 upper_phi = get_copy(phi, c - 1);
1576                                                 upper_pred_phi = get_copy_safe(get_irn_n(phi, i), c - 1);
1577                                         }
1578
1579                                         DB((dbg, LEVEL_5, "Rewire %N @%d to %N\n",
1580                                                                 lower_phi, pos, upper_pred_phi));
1581
1582                                         /* Relying on phi nodes with 1 in to be present. */
1583                                         assert(is_Phi(lower_phi));
1584
1585 /* Do not fix phis here. Chained phis will cause a defective copy array. */
1586 #if 0
1587                                         if (get_irn_arity(lower_phi) == 1) {
1588                                                 ir_node *single_in[1];
1589                                                 ir_node *cp;
1590                                                 single_in[0] = upper_pred_phi;
1591
1592                                                 cp = new_rd_Phi(get_irn_dbg_info(lower_phi),
1593                                                                 get_nodes_block(lower_phi), 1, single_in,
1594                                                                 get_irn_mode(lower_phi));
1595
1596                                                 set_copy(phi, c, cp);
1597                                                 exchange(lower_phi, cp);
1598                                         } else
1599 #endif
1600                                                 set_irn_n(lower_phi, pos, upper_pred_phi);
1601
1602                                 }
1603                         }
1604                         ++pos;
1605                         /* Fix the topmost loop heads backedges. */
1606                         set_irn_n(loophead, i, get_copy(get_irn_n(loophead, i), copies - 1));
1607                         for_each_phi(loophead, phi) {
1608                                 ir_node *last_cp = get_copy_safe(get_irn_n(phi, i), copies - 1);
1609                                 set_irn_n(phi, i, last_cp);
1610                         }
1611                 }
1612         }
1613 }
1614
1615 /* Copies the cur_loop */
1616 static void copy_loop(out_edge *cur_loop_outs, int copies)
1617 {
1618         int i, c;
1619         unroll_arity = get_backedge_n(loop_head, 0);
1620
1621         ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED);
1622
1623         /* copy loop */
1624         for (c = 0; c < copies; ++c) {
1625
1626                 inc_irg_visited(current_ir_graph);
1627
1628                 DB((dbg, LEVEL_5, "          ### Copy_loop  copy n: %d ###\n", c));
1629                 for (i = 0; i < ARR_LEN(cur_loop_outs); ++i) {
1630                         out_edge entry = cur_loop_outs[i];
1631                         ir_node *pred = get_irn_n(entry.node, entry.pos);
1632
1633                         copy_walk_n(pred, is_in_loop, c, c ? 0 : copies);
1634                 }
1635         }
1636
1637         ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED);
1638 }
1639
1640 /**/
1641 static void create_duffs_device(int unroll_number)
1642 {
1643         (void)unroll_number;
1644 }
1645
1646 #if 1
1647
1648 static unsigned is_loop_invariant_val(ir_node *node)
1649 {
1650         int i;
1651
1652         if (! is_in_loop(node))
1653                 return 1;
1654
1655         /* TODO all cases? and how to use the dom tree for that. */
1656
1657         if (is_Phi(node)) {
1658                 for (i = 0; i < get_irn_arity(node); ++i) {
1659                         if (is_own_backedge(get_nodes_block(node), i) && get_irn_n(node, i) != node)
1660                                 return 0;
1661                 }
1662         }
1663         return 1;
1664 }
1665
1666 /* TODO name */
1667 static ir_node *is_loop_iteration(ir_node *node)
1668 {
1669         int i;
1670         ir_node *found_add = NULL;
1671
1672         if (!is_in_loop(node))
1673                 return NULL;
1674
1675         if (is_Add(node))
1676                 return node;
1677
1678         /*TODO all cases? and how do i use the dom tree for that.*/
1679
1680         if (is_Phi(node)) {
1681                 for (i = 0; i < get_irn_arity(node); ++i) {
1682                         ir_node *new_found;
1683                         if (! is_own_backedge(get_nodes_block(node), i))
1684                                 continue;
1685                         new_found = get_irn_n(node,i);
1686
1687                         if (found_add && found_add != new_found)
1688                                 return NULL;
1689
1690                         found_add = new_found;
1691                 }
1692         }
1693         return found_add;
1694 }
1695
1696 /* counting loop */
1697 static unsigned get_unroll_decision(out_edge *cur_loop_outs)
1698 {
1699         int i;
1700         ir_node *pred0, *pred1, *projx, *cond, *projres, *loop_condition;
1701         ir_node *iteration, *add, *iteration_phi, *end_val, *step, *add_in0, *add_in1;
1702
1703         (void) cur_loop_outs;
1704         (void) i;
1705
1706
1707         /* There is more than 1 condition for the loop exit. We cannot cope this with duffs device. */
1708         if (loop_info.cf_outs != 1)
1709                 return 0;
1710
1711         /*TODO test for impact on performance */
1712         if (loop_info.calls > 0)
1713                 return 0;
1714
1715
1716         projx = get_irn_n(loop_info.cf_out.node, loop_info.cf_out.pos);
1717         cond = get_irn_n(projx, 0);
1718         projres = get_irn_n(cond, 0);
1719         loop_condition = get_irn_n(projres, 0);
1720
1721         DB((dbg, LEVEL_2, " loop condition %N\n", loop_condition));
1722
1723         /* One in of the loop condition needs to be loop invariant.
1724          * The other in is assigned by an add.
1725          * The add uses a loop invariant value
1726          * and another value that depends on a loop invariant start value and the add itself.
1727
1728            ^   ^
1729            |   | .-,
1730            |   Phi |
1731                 \  |   |
1732                  Add  /
1733               |__/
1734         */
1735
1736         end_val = NULL;
1737         /* Find the loop invariant in */
1738         /* Assumption that condition is cmp or mod.
1739            TODO other? */
1740         pred0 = get_irn_n(loop_condition, 0);
1741         pred1 = get_irn_n(loop_condition, 1);
1742
1743         if (is_loop_invariant_val(pred0)) {
1744                 end_val = pred0;
1745                 iteration = pred1;
1746         }
1747
1748
1749         if (is_loop_invariant_val(pred1)) {
1750                 if (end_val != NULL)
1751                         /* RETURN. loop condition does not change during the loop. */
1752                         return 0;
1753
1754                 end_val = pred1;
1755                 iteration = pred0;
1756         }
1757
1758         DB((dbg, LEVEL_2, " end_val %N\n", end_val));
1759
1760         /* Check for constraint of remaining in. */
1761         add = is_loop_iteration(iteration);
1762         if (! add)
1763                 return 0;
1764
1765         DB((dbg, LEVEL_2, " add %N\n", add));
1766
1767         add_in0 = get_irn_n(add, 0);
1768         add_in1 = get_irn_n(add, 1);
1769
1770         if (is_loop_invariant_val(add_in0)) {
1771                 step = add_in0;
1772                 iteration_phi = add_in1;
1773         }
1774
1775         if (is_loop_invariant_val(add_in1)) {
1776                 if (step != NULL)
1777                         return 0;
1778                 step = add_in1;
1779                 iteration_phi = add_in0;
1780         }
1781
1782         DB((dbg, LEVEL_2, " step %N\n", step));
1783
1784         if (! is_Phi(iteration_phi)) {
1785                 if (is_Phi(iteration))
1786                         iteration_phi = iteration;
1787                 else
1788                         return 0;
1789         }
1790
1791         DB((dbg, LEVEL_2, " #### COUNTING LOOP step %N end %N phi %N\n", step, end_val, iteration_phi));
1792
1793 #if 0
1794         phi for startval
1795
1796         mode = get_irn_mode(add);
1797
1798         block = new_Block(0, NULL);
1799         /*set_irg_current_Block(current_ir_graph, block);*/
1800
1801         /*TODO*/
1802         unroll_mod_const = new_Const(unroll_number);
1803         module = new_r_Mod(block, new_NoMem(), end_val, step, mode, op_pin_state_pinned);
1804         /*new_Proj();*/
1805
1806
1807 #endif
1808         return 0;
1809 }
1810 #endif
1811
1812 /**
1813  * Loop unrolling
1814  */
1815 static void unroll_loop(void)
1816 {
1817         int i;
1818         unsigned unroll_number;
1819         out_edge *ssa_outs;
1820         cur_loop_outs = NEW_ARR_F(out_edge, 0);
1821
1822         /* Get loop outs */
1823         irg_walk_graph(current_ir_graph, get_loop_outs, NULL, NULL);
1824
1825         unroll_number = get_unroll_decision(cur_loop_outs);
1826
1827         /*dump_ir_block_graph(current_ir_graph, "-pre");*/
1828
1829         if (unroll_number) {
1830                 create_duffs_device(unroll_number);
1831
1832                 /* Copies the loop and allocs node_info */
1833                 copy_loop(cur_loop_outs, unroll_number - 1);
1834                 /*dump_ir_block_graph(current_ir_graph, "-cp");*/
1835
1836                 /* Line up the floating copies. */
1837                 place_copies(unroll_number - 1);
1838                 /*dump_ir_block_graph(current_ir_graph, "-ord");*/
1839
1840                 /* Extend loop successors */
1841                 ssa_outs = extend_ins(cur_loop_outs, unroll_number - 1);
1842                 /*dump_ir_block_graph(current_ir_graph, "-fixed")*/ ;
1843
1844                 /* Links are going to be overwritten */
1845                 free_node_info_blocks();
1846
1847                 /* Generate phis for all loop outs */
1848                 for (i = 0; i < ARR_LEN(ssa_outs); ++i) {
1849                         out_edge entry = ssa_outs[i];
1850                         ir_node *node = entry.node;
1851                         ir_node *pred = get_irn_n(entry.node, entry.pos);
1852
1853                         DB((dbg, LEVEL_5, "Do? construct_ssa_n def %N  node %N  pos %d\n",
1854                                         pred, node, entry.pos));
1855
1856                         if (!is_End(node)) {
1857                                 DB((dbg, LEVEL_5, "construct_ssa_n def %N  node %N  pos %d\n",
1858                                                         pred, node, entry.pos));
1859
1860                                 construct_ssa_n(pred, node, unroll_number - 1);
1861                                 /*if (new_phi)
1862                                         construct_ssa_n(pred, new_phi, unroll_number - 1);*/
1863                         }
1864                 }
1865                 /*dump_ir_block_graph(current_ir_graph, "-ssa-done");*/
1866
1867                 /* FREE */
1868                 free_node_info();
1869
1870                 irg_walk_graph(current_ir_graph, correct_phis, NULL, NULL);
1871
1872                 DEL_ARR_F(ssa_outs);
1873
1874                 set_irg_doms_inconsistent(current_ir_graph);
1875                 set_irg_loopinfo_inconsistent(current_ir_graph);
1876                 set_irg_outs_inconsistent(current_ir_graph);
1877         }
1878         DEL_ARR_F(cur_loop_outs);
1879 }
1880
1881 /* Initialization and */
1882 static void init_analyze(ir_loop *loop)
1883 {
1884         /* Init new for every loop */
1885         cur_loop = loop;
1886
1887         loop_head = NULL;
1888         loop_head_valid = 1;
1889         loop_inv_head = NULL;
1890         loop_peeled_head = NULL;
1891
1892         loop_info.calls = 0;
1893         loop_info.blocks = 0;
1894         loop_info.cf_outs = 0;
1895
1896         DB((dbg, LEVEL_2, "          >>>> current loop includes node %N <<<\n",
1897                 get_loop_node(loop, 0)));
1898
1899         irg_walk_graph(current_ir_graph, get_loop_info, NULL, NULL);
1900
1901         /* RETURN if there is no valid head */
1902         if (!loop_head || !loop_head_valid) {
1903                 DB((dbg, LEVEL_2,   "No valid loop head. Nothing done.\n"));
1904                 return;
1905         }
1906
1907         DB((dbg, LEVEL_2,   "Loophead: %N\n", loop_head));
1908
1909         if (enable_inversion)
1910                 loop_inversion();
1911
1912         if (enable_unrolling)
1913                 unroll_loop();
1914
1915         DB((dbg, LEVEL_2, "             <<<< end of loop with node %N >>>>\n",
1916                 get_loop_node(loop, 0)));
1917 }
1918
1919 /* Find most inner loops and send them to analyze_loop */
1920 static void find_most_inner_loop(ir_loop *loop)
1921 {
1922         /* descend into sons */
1923         int sons = get_loop_n_sons(loop);
1924
1925         if (sons == 0) {
1926                 ARR_APP1(ir_loop *, loops, loop);
1927         } else {
1928                 int s;
1929                 for (s=0; s<sons; s++) {
1930                         find_most_inner_loop(get_loop_son(loop, s));
1931                 }
1932         }
1933 }
1934
1935 /**
1936  * Assure preconditions are met and go through all loops.
1937  */
1938 void loop_optimization(ir_graph *irg)
1939 {
1940         ir_loop *loop;
1941         int     i, sons, nr;
1942
1943         /* Init */
1944         free_list = NULL;
1945         set_current_ir_graph(irg);
1946
1947         edges_assure(irg);
1948         assure_irg_outs(irg);
1949
1950         /* NOTE: sets only the loop attribute of blocks, not nodes */
1951         /* NOTE: Kills links */
1952         assure_cf_loop(irg);
1953
1954         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK | IR_RESOURCE_PHI_LIST);
1955         collect_phiprojs(irg);
1956         ir_free_resources(irg, IR_RESOURCE_IRN_LINK);
1957
1958         loop = get_irg_loop(irg);
1959         sons = get_loop_n_sons(loop);
1960
1961         loops = NEW_ARR_F(ir_loop *, 0);
1962         /* List all inner loops */
1963         for (nr = 0; nr < sons; ++nr) {
1964                 find_most_inner_loop(get_loop_son(loop, nr));
1965         }
1966
1967         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK);
1968         /* Set all links NULL */
1969         irg_walk_graph(current_ir_graph, reset_link, NULL, NULL);
1970
1971         for (i = 0; i < ARR_LEN(loops); ++i) {
1972                 ir_loop *loop = loops[i];
1973                 init_analyze(loop);
1974
1975                 /*dump_ir_block_graph(current_ir_graph, "-part");*/
1976                 collect_phiprojs(irg);
1977                 irg_walk_graph(current_ir_graph, reset_link, NULL, NULL);
1978         }
1979
1980         /*dump_ir_block_graph(current_ir_graph, "-full");*/
1981
1982         DEL_ARR_F(loops);
1983         ir_free_resources(irg, IR_RESOURCE_IRN_LINK);
1984         ir_free_resources(irg, IR_RESOURCE_PHI_LIST);
1985 }
1986
1987 void do_loop_unrolling(ir_graph *irg)
1988 {
1989         enable_unrolling = 1;
1990         enable_peeling = 0;
1991         enable_inversion = 0;
1992
1993         DB((dbg, LEVEL_2, " >>> unrolling (Startnode %N) <<<\n",
1994                                 get_irg_start(irg)));
1995
1996         loop_optimization(irg);
1997
1998         DB((dbg, LEVEL_2, " >>> unrolling done (Startnode %N) <<<\n",
1999                                 get_irg_start(irg)));
2000 }
2001
2002 void do_loop_inversion(ir_graph *irg)
2003 {
2004         enable_unrolling = 0;
2005         enable_peeling = 0;
2006         enable_inversion = 1;
2007
2008         DB((dbg, LEVEL_2, " >>> inversion (Startnode %N) <<<\n",
2009                                 get_irg_start(irg)));
2010
2011         loop_optimization(irg);
2012
2013         DB((dbg, LEVEL_2, " >>> inversion done (Startnode %N) <<<\n",
2014                                 get_irg_start(irg)));
2015 }
2016
2017 void do_loop_peeling(ir_graph *irg)
2018 {
2019         enable_unrolling = 0;
2020         enable_peeling = 1;
2021         enable_inversion = 0;
2022
2023         DB((dbg, LEVEL_2, " >>> peeling (Startnode %N) <<<\n",
2024                                 get_irg_start(irg)));
2025
2026         loop_optimization(irg);
2027
2028         DB((dbg, LEVEL_2, " >>> peeling done (Startnode %N) <<<\n",
2029                                 get_irg_start(irg)));
2030
2031 }
2032
2033 ir_graph_pass_t *loop_inversion_pass(const char *name)
2034 {
2035         return def_graph_pass(name ? name : "loop_inversion", do_loop_inversion);
2036 }
2037
2038 ir_graph_pass_t *loop_unroll_pass(const char *name)
2039 {
2040         return def_graph_pass(name ? name : "loop_unroll", do_loop_unrolling);
2041 }
2042
2043 ir_graph_pass_t *loop_peeling_pass(const char *name)
2044 {
2045         return def_graph_pass(name ? name : "loop_peeling", do_loop_peeling);
2046 }
2047
2048 void firm_init_loop_opt(void)
2049 {
2050         FIRM_DBG_REGISTER(dbg, "firm.opt.loop");
2051 }