invalidate phase info before starting the code selection; temporary disable freeing...
[libfirm] / ir / opt / loop.c
1 /*
2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @author   Christian Helmer
23  * @brief    loop inversion and loop unrolling, loop peeling
24  *
25  * @version  $Id$
26  */
27 #include "config.h"
28
29 #include "iroptimize.h"
30 #include "opt_init.h"
31 #include "irnode.h"
32 #include "debug.h"
33
34 #include "ircons.h"
35 #include "irgopt.h"
36 #include "irgmod.h"
37 #include "irgwalk.h"
38 #include "irouts.h"
39 #include "iredges.h"
40 #include "irtools.h"
41 #include "array_t.h"
42 #include "beutil.h"
43 #include "irloop_t.h"
44 #include "irpass.h"
45
46 DEBUG_ONLY(static firm_dbg_module_t *dbg);
47
48 /**
49  * Convenience macro for iterating over every phi node of the given block.
50  * Requires phi list per block.
51  */
52 #define for_each_phi(block, phi) \
53         for ( (phi) = get_Block_phis( (block) ); (phi) ; (phi) = get_Phi_next( (phi) ) )
54
55 /* current loop */
56 static ir_loop *cur_loop;
57
58 /* abortable walker function */
59 typedef unsigned irg_walk_func_abortable(ir_node *, void *);
60
61 /* condition for walking a node during a copy_walk */
62 typedef unsigned walker_condition(ir_node *);
63
64 /* node and position of a predecessor */
65 typedef struct out_edges {
66         ir_node *node;
67         int pred_irn_n;
68 } out_edge;
69
70 /* access complex values through the nodes links */
71 typedef struct node_info {
72         unsigned invariant:1;
73         ir_node *copy;
74         ir_node *link;                                  /* temporary links for ssa creation */
75         ir_node **ins;                                  /* ins for phi nodes, during rewiring of blocks */
76         unsigned done;
77         struct node_info *freelistnext; /* linked list to free all node_infos */
78 } node_info;
79
80 static node_info *link_node_state_list;         /* head of the linked list to free all node_infos */
81
82 static out_edge *cur_loop_outs;                         /* A walker may start visiting the current loop with these nodes. */
83 static out_edge *cur_head_outs;                         /* A walker may start visiting the cur head with these nodes. */
84
85 static ir_node *loop_cf_head = NULL;                            /* Loop head node */
86 static unsigned loop_cf_head_valid = 1;                         /* A loop may have one head, otherwise we do not touch it. */
87
88 ir_node **loops;
89
90 /* Inverted head */
91 static ir_node *loop_inv_head = NULL;
92 /* Peeled head */
93 static ir_node *loop_peeled_head = NULL;
94
95 /* Loop analysis informations */
96 typedef struct loop_info_t {
97         unsigned blocks;                        /* number of blocks in the loop */
98         unsigned calls;                         /* number of calls */
99         unsigned loads;                         /* number of load nodes */
100         unsigned outs;                          /* outs without keepalives */
101 #if 0
102         unsigned invariant_loads;
103         unsigned stores;                        /* number of store nodes */
104         unsigned opnodes_n;                     /* nodes that probably result in an instruction */
105         unsigned do_invariant_opt;
106 #endif
107 } loop_info_t;
108
109 /* Information about the current loop */
110 static loop_info_t loop_info;
111
112 /* A walker may start visiting a condition chain with these nodes. */
113 static out_edge *cond_chain_entries;
114
115 /* Number of unrolling */
116 int unroll_times;
117
118 static unsigned head_inversion_node_count;
119 static unsigned inversion_head_node_limit;
120 static unsigned head_inversion_block_count;
121
122 static unsigned enable_peeling;
123 static unsigned enable_inversion;
124 static unsigned enable_unrolling;
125
126 /**
127  *
128  * ============= AUXILIARY FUNCTIONS =====================================
129  */
130
131
132 /**
133  * Creates object on the heap, and adds it to a linked list to free it later.
134  */
135 static node_info *new_node_info(void)
136 {
137         node_info *l = XMALLOCZ(node_info);
138         l->freelistnext = link_node_state_list;
139         link_node_state_list = l;
140         l->copy = NULL;
141         l->invariant = 0;
142         return l;
143 }
144
145 static node_info *get_node_info(ir_node *n)
146 {
147         return ((node_info *)get_irn_link(n));
148 }
149
150 /* Allocates a node_info struct for the given node. For use with a walker. */
151 static void alloc_node_info(ir_node *node, void *env)
152 {
153         node_info *state;
154         (void) env;
155         state = new_node_info();
156         set_irn_link(node, (void *)state);
157 }
158
159 static void free_node_info(void)
160 {
161         int a = 0;
162         node_info *n;
163         n = link_node_state_list;
164         while (n) {
165                 node_info *next = n->freelistnext;
166                 ++a;
167                 xfree(n);
168                 n = next;
169         }
170         link_node_state_list = NULL;
171 }
172
173 /**
174  * Use the linked list to reset the reused values of all node_infos
175  * Reset in particular the copy attribute as copy_walk uses it to determine a present copy
176  */
177 static void reset_node_infos(void)
178 {
179         node_info *next;
180         next = link_node_state_list;
181         while (next->freelistnext) {
182                 node_info *cur = next;
183                 next = cur->freelistnext;
184                 cur->copy = NULL;
185                 cur->ins = NULL;
186                 cur->link = NULL;
187         }
188 }
189
190 /* Returns the nodes node_info link. */
191 static ir_node *get_link(ir_node *n)
192 {
193         return ((node_info *)get_irn_link(n))->link;
194 }
195
196 /* Sets the nodes node_info link. */
197 static void set_link(ir_node *n, ir_node *link)
198 {
199         ((node_info *)get_irn_link(n))->link = link;
200 }
201
202 /* Returns a nodes copy. */
203 static ir_node *get_copy(ir_node *n)
204 {
205         return ((node_info *)get_irn_link(n))->copy;
206 }
207
208 /* Sets a nodes copy. */
209 static void set_copy(ir_node *n, ir_node *copy)
210 {
211         ((node_info *)get_irn_link(n) )->copy = copy;
212 }
213
214 /**
215  * Convenience macro for iterating over every copy in a linked list
216  * of copies.
217  */
218 #define for_each_copy(node) \
219         for ( ; (node) ; (node) = get_copy(node))
220
221 /**
222  * Convenience macro for iterating over every copy in 2 linked lists
223  * of copies in parallel.
224  */
225 #define for_each_copy2(high1, low1, high2, low2) \
226         for ( ; (low1) && (low2); (high1) = (low1), (low1) = get_copy(low1), \
227                                                         (high2) = (low2), (low2) = get_copy(low2))
228
229 /*
230  * Returns 0 if the node or block is not in cur_loop.
231  */
232 static unsigned is_in_loop(ir_node *node)
233 {
234         return (get_irn_loop(get_block(node)) == cur_loop);
235 }
236
237 /* Returns if the given be is an alien edge. This is the case when the pred is not in the loop. */
238 static unsigned is_alien_edge(ir_node *n, int i)
239 {
240         return(!is_in_loop(get_irn_n(n, i)));
241 }
242
243 /* used for block walker */
244 static void reset_block_mark(ir_node *node, void * env)
245 {
246         (void) env;
247
248         if (is_Block(node))
249                 set_Block_mark(node, 0);
250 }
251
252 static unsigned is_nodesblock_marked(ir_node* node)
253 {
254         return (get_Block_mark(get_block(node)));
255 }
256
257 /* Returns the number of blocks in a loop. */
258 static int get_loop_n_blocks(ir_loop *loop)
259 {
260         int elements, e;
261         int blocks = 0;
262         elements = get_loop_n_elements(loop);
263
264         for (e=0; e<elements; e++) {
265                 loop_element elem = get_loop_element(loop, e);
266                 if (is_ir_node(elem.kind) && is_Block(elem.node))
267                         ++blocks;
268         }
269         return blocks;
270 }
271
272 /**
273  * Add newpred at position pos to node and also add the corresponding value to the phis.
274  * Requires block phi list.
275  */
276 static int duplicate_preds(ir_node* block, unsigned pos, ir_node* newpred)
277 {
278         ir_node **ins;
279         /*int *is_be;*/
280         ir_node *phi;
281         int block_arity;
282         int i;
283
284         assert(is_Block(block) && "duplicate_preds may be called for blocks only");
285
286         DB((dbg, LEVEL_5, "duplicate_preds(node %N, pos %d, newpred %N)\n", block, pos, newpred));
287
288         block_arity = get_irn_arity(block);
289
290         NEW_ARR_A(ir_node*, ins, block_arity + 1);
291
292         for (i = 0; i < block_arity; ++i) {
293                 ins[i] = get_irn_n(block, i);
294         }
295         ins[block_arity] = newpred;
296
297         set_irn_in(block, block_arity + 1, ins);
298
299         /* LDBG 1 */
300 #if 1
301         for_each_phi(block, phi) {
302                 int phi_arity = get_irn_arity(phi);
303                 DB((dbg, LEVEL_5, "duplicate_preds: fixing phi %N\n", phi));
304
305                 NEW_ARR_A(ir_node *, ins, block_arity + 1);
306                 for (i = 0; i < phi_arity; ++i) {
307                         DB((dbg, LEVEL_5, "pos %N\n", get_irn_n(phi, i)));
308                         ins[i] = get_irn_n(phi, i);
309                 }
310                 ins[block_arity] = get_copy(get_irn_n(phi, pos));
311                 set_irn_in(phi, block_arity + 1, ins);
312         }
313 #endif
314         return block_arity;
315 }
316
317 /**
318  * Finds loop head and loop_info.
319  */
320 static void get_loop_info(ir_node *node, void *env)
321 {
322         unsigned node_in_loop, pred_in_loop;
323         int i, arity;
324         (void) env;
325
326         arity = get_irn_arity(node);
327         for (i = 0; i < arity; i++) {
328                 ir_node *pred = get_irn_n(node, i);
329
330                 pred_in_loop = is_in_loop(pred);
331                 node_in_loop = is_in_loop(node);
332
333                 /* collect some loop information */
334                 if (node_in_loop) {
335                         if (is_Call(node))
336                                 ++loop_info.calls;
337                 }
338
339                 /* Find the loops head/the blocks with cfpred outside of the loop */
340                 if (is_Block(node) && node_in_loop && !pred_in_loop && loop_cf_head_valid) {
341                         ir_node *cfgpred = get_Block_cfgpred(node, i);
342
343                         if (!is_in_loop(cfgpred)) {
344                                 DB((dbg, LEVEL_5, "potential head %+F because inloop and pred %+F not inloop\n", node, pred));
345                                 /* another head? We do not touch this. */
346                                 if (loop_cf_head && loop_cf_head != node) {
347                                         loop_cf_head_valid = 0;
348                                 } else {
349                                         loop_cf_head = node;
350                                 }
351                         }
352                 }
353         }
354 }
355
356 /* Adds all nodes pointing into the loop to loop_entries and also finds the loops head */
357 static void get_loop_outs(ir_node *node, void *env)
358 {
359         unsigned node_in_loop, pred_in_loop;
360         int i, arity;
361         (void) env;
362
363         arity = get_irn_arity(node);
364         for (i = 0; i < arity; ++i) {
365                 ir_node *pred = get_irn_n(node, i);
366
367                 pred_in_loop = is_in_loop(pred);
368                 node_in_loop = is_in_loop(node);
369
370                 if (pred_in_loop && !node_in_loop) {
371                         out_edge entry;
372                         entry.node = node;
373                         entry.pred_irn_n = i;
374                         ARR_APP1(out_edge, cur_loop_outs, entry);
375                 }
376         }
377 }
378
379 static ir_node *ssa_second_def;
380 static ir_node *ssa_second_def_block;
381
382 /**
383  * Walks the graph bottom up, searching for definitions and creates phis.
384  */
385 static ir_node *search_def_and_create_phis(ir_node *block, ir_mode *mode)
386 {
387         int i;
388         int n_cfgpreds;
389         ir_graph *irg;
390         ir_node *phi;
391         ir_node **in;
392
393         DB((dbg, LEVEL_5, "ssa search_def_and_create_phis: block %N\n", block));
394
395         /* Prevents creation of phi that would be bad anyway.
396          * Dead and bad blocks. */
397         if (get_irn_arity(block) < 1 || is_Bad(block))
398                 return new_Bad();
399
400         if (block == ssa_second_def_block) {
401                 DB((dbg, LEVEL_5, "ssa found second definition: use second def %N\n", ssa_second_def));
402                 return ssa_second_def;
403         }
404
405         /* already processed this block? */
406         if (irn_visited(block)) {
407                 ir_node *value = get_link(block);
408                 DB((dbg, LEVEL_5, "ssa already visited: use linked %N\n", value));
409                 return value;
410         }
411
412         irg = get_irn_irg(block);
413         assert(block != get_irg_start_block(irg));
414
415         /* a Block with only 1 predecessor needs no Phi */
416         n_cfgpreds = get_Block_n_cfgpreds(block);
417         if (n_cfgpreds == 1) {
418                 ir_node *pred_block = get_Block_cfgpred_block(block, 0);
419                 ir_node *value;
420
421                 DB((dbg, LEVEL_5, "ssa 1 pred: walk pred %N\n", pred_block));
422
423                 value = search_def_and_create_phis(pred_block, mode);
424                 set_link(block, value);
425                 mark_irn_visited(block);
426
427                 return value;
428         }
429
430         /* create a new Phi */
431         NEW_ARR_A(ir_node*, in, n_cfgpreds);
432         for (i = 0; i < n_cfgpreds; ++i)
433                 in[i] = new_Unknown(mode);
434
435         phi = new_r_Phi(block, n_cfgpreds, in, mode);
436
437         /* Important: always keep block phi list up to date. */
438         add_Block_phi(block, phi);
439         /* EVERY node is assumed to have a node_info linked. */
440         alloc_node_info(phi, NULL);
441
442         DB((dbg, LEVEL_5, "ssa phi creation: link new phi %N to block %N\n", phi, block));
443
444         set_link(block, phi);
445         mark_irn_visited(block);
446
447         /* set Phi predecessors */
448         for (i = 0; i < n_cfgpreds; ++i) {
449                 ir_node *pred_val;
450                 ir_node *pred_block = get_Block_cfgpred_block(block, i);
451                 assert(pred_block != NULL);
452                 pred_val = search_def_and_create_phis(pred_block, mode);
453                 assert(pred_val != NULL);
454
455                 DB((dbg, LEVEL_5, "ssa phi pred:phi %N, pred %N\n", phi, pred_val));
456                 set_irn_n(phi, i, pred_val);
457         }
458         return phi;
459 }
460
461 /**
462  * Given a set of values this function constructs SSA-form for the users of the
463  * first value (the users are determined through the out-edges of the value).
464  * Works without using the dominance tree.
465  */
466 static void construct_ssa(ir_node *orig_block, ir_node *orig_val,
467                           ir_node *second_block, ir_node *second_val)
468 {
469         ir_graph *irg;
470         ir_mode *mode;
471         const ir_edge_t *edge;
472         const ir_edge_t *next;
473
474         assert(orig_block && orig_val && second_block && second_val &&
475                         "no parameter of construct_ssa may be NULL");
476
477         /* no need to do anything */
478         if (orig_val == second_val)
479                 return;
480
481         irg = get_irn_irg(orig_val);
482
483         ir_reserve_resources(irg, IR_RESOURCE_IRN_VISITED);
484         inc_irg_visited(irg);
485
486         mode = get_irn_mode(orig_val);
487         set_link(orig_block, orig_val);
488         mark_irn_visited(orig_block);
489
490         ssa_second_def_block = second_block;
491         ssa_second_def       = second_val;
492
493         /* Only fix the users of the first, i.e. the original node */
494         foreach_out_edge_safe(orig_val, edge, next) {
495                 ir_node *user = get_edge_src_irn(edge);
496                 int j = get_edge_src_pos(edge);
497                 ir_node *user_block = get_nodes_block(user);
498                 ir_node *newval;
499
500                 /* ignore keeps */
501                 if (is_End(user))
502                         continue;
503
504                 DB((dbg, LEVEL_5, "original user %N\n", user));
505
506                 if (is_Phi(user)) {
507                         ir_node *pred_block = get_Block_cfgpred_block(user_block, j);
508                         newval = search_def_and_create_phis(pred_block, mode);
509                 } else {
510                         newval = search_def_and_create_phis(user_block, mode);
511                 }
512
513                 /* If we get a bad node the user keeps the original in. No second definition needed. */
514                 if (newval != user && !is_Bad(newval))
515                         set_irn_n(user, j, newval);
516         }
517
518         ir_free_resources(irg, IR_RESOURCE_IRN_VISITED);
519 }
520
521 /*
522  * Construct SSA for def and all of its copies.
523  */
524 static void construct_ssa_n(ir_node *def, ir_node *user)
525 {
526         ir_graph *irg;
527         ir_mode *mode;
528         ir_node *iter = def;
529         const ir_edge_t *edge;
530         const ir_edge_t *next;
531         irg = get_irn_irg(def);
532
533         ir_reserve_resources(irg, IR_RESOURCE_IRN_VISITED);
534         inc_irg_visited(irg);
535
536         mode = get_irn_mode(def);
537
538         for_each_copy(iter) {
539                 set_link(get_nodes_block(iter), iter);
540                 mark_irn_visited(get_nodes_block(iter));
541
542                 DB((dbg, LEVEL_5, "ssa_n:  Link def %N to block %N\n",
543                                                                         iter, get_nodes_block(iter)));
544         }
545
546         /* Need to search the outs, because we need the in-pos on the user node. */
547         foreach_out_edge_safe(def, edge, next) {
548                 ir_node *edge_user = get_edge_src_irn(edge);
549                 int edge_src = get_edge_src_pos(edge);
550                 ir_node *user_block = get_nodes_block(user);
551                 ir_node *newval;
552
553                 if (edge_user != user)
554                         continue;
555
556                 if (is_Phi(user)) {
557                         ir_node *pred_block = get_Block_cfgpred_block(user_block, edge_src);
558                         newval = search_def_and_create_phis(pred_block, mode);
559                 } else {
560                         newval = search_def_and_create_phis(user_block, mode);
561                 }
562
563                 if (newval != user && !is_Bad(newval))
564                         set_irn_n(user, edge_src, newval);
565         }
566
567         ir_free_resources(irg, IR_RESOURCE_IRN_VISITED);
568 }
569
570 /**
571  * Construct SSA for all definitions in arr.
572  */
573 static void construct_ssa_foreach(ir_node **arr, int arr_n)
574 {
575         int i;
576         for (i = 0; i < arr_n ; ++i) {
577                 ir_node *cppred, *block, *cpblock, *pred;
578
579                 pred = arr[i];
580                 cppred = get_copy(pred);
581                 block = get_nodes_block(pred);
582                 cpblock = get_nodes_block(cppred);
583                 construct_ssa(block, pred, cpblock, cppred);
584         }
585 }
586
587 /* get the number of backedges without alien bes */
588 static int get_backedge_n(ir_node *loophead, unsigned with_alien)
589 {
590         int i;
591         int be_n = 0;
592         int arity = get_irn_arity(loophead);
593         for (i = 0; i < arity; ++i) {
594                 ir_node *pred = get_irn_n(loophead, i);
595                 if (is_backedge(loophead, i) && (with_alien || is_in_loop(pred)))
596                         ++be_n;
597         }
598         return be_n;
599 }
600
601 /**
602  * Rewires the heads after peeling.
603  */
604 static void peel_fix_heads(void)
605 {
606         ir_node **loopheadnins, **peelheadnins;
607         ir_node *loophead = loop_cf_head;
608         ir_node *peelhead = get_copy(loophead);
609
610         int headarity = get_irn_arity(loophead);
611         ir_node *phi;
612         int i;
613
614         int lheadin_c = 0;
615         int pheadin_c = 0;
616
617         int backedges_n = get_backedge_n(loophead, 0);
618
619         int lhead_arity = 2 * backedges_n;
620         int phead_arity = headarity - backedges_n;
621
622         /* new in arrays */
623         NEW_ARR_A(ir_node *, loopheadnins, lhead_arity );
624         NEW_ARR_A(ir_node *, peelheadnins, phead_arity );
625
626         for_each_phi(loophead, phi) {
627                 NEW_ARR_A(ir_node *, get_node_info(phi)->ins, lhead_arity);
628         }
629         for_each_phi(peelhead, phi) {
630                 NEW_ARR_A(ir_node *, get_node_info(phi)->ins, phead_arity);
631         }
632
633         for (i = 0; i < headarity; i++)
634         {
635                 ir_node *orgjmp = get_irn_n(loophead, i);
636                 ir_node *copyjmp = get_copy(orgjmp);
637
638                 /**
639                  * Rewire the head blocks ins and their phi ins.
640                  * Requires phi list per block.
641                  */
642                 if (is_backedge(loophead, i) && !is_alien_edge(loophead, i)) {
643                         loopheadnins[lheadin_c] = orgjmp;
644                         for_each_phi(loophead, phi) {
645                                 get_node_info( phi )->ins[lheadin_c] =  get_irn_n( phi, i) ;
646                         }
647                         ++lheadin_c;
648
649                         /* former bes of the peeled code origin now from the loophead */
650                         loopheadnins[lheadin_c] = copyjmp;
651
652                         /* get_irn_n( get_copy_of(phi), i ) <!=> get_copy_of( get_irn_n( phi, i) )
653                          * Order is crucial! Predecessors outside of the loop are non existent.
654                          * The copy (cloned with its ins!) has pred i,
655                          * but phis pred i might not have a copy of itself.
656                          */
657                         for_each_phi(loophead, phi) {
658                                 get_node_info( phi )->ins[lheadin_c] =  get_irn_n( get_copy(phi), i) ;
659                         }
660                         ++lheadin_c;
661                 } else {
662                         peelheadnins[pheadin_c] = orgjmp;
663                         for_each_phi(peelhead, phi) {
664                                 get_node_info( phi )->ins[pheadin_c] = get_irn_n(phi, i);
665                         }
666                         ++pheadin_c;
667                 }
668         }/* for */
669
670         assert(pheadin_c == ARR_LEN(peelheadnins) &&
671                         lheadin_c == ARR_LEN(loopheadnins) &&
672                         "the constructed head arities do not match the predefined arities");
673
674         /* assign the ins to the nodes */
675         set_irn_in(loophead, ARR_LEN(loopheadnins), loopheadnins);
676         set_irn_in(peelhead, ARR_LEN(peelheadnins), peelheadnins);
677
678         for_each_phi(loophead, phi) {
679                 ir_node **ins = get_node_info( phi )->ins;
680                 set_irn_in(phi, lhead_arity, ins);
681         }
682
683         for_each_phi(peelhead, phi) {
684                 ir_node **ins = get_node_info( phi )->ins;
685                 set_irn_in(phi, phead_arity, ins);
686         }
687 }
688
689 /**
690  * Create a raw copy (ins are still the old ones) of the given node.
691  * We rely on copies to be NOT visited.
692  */
693 static ir_node *rawcopy_node(ir_node *node)
694 {
695         int i, arity;
696         ir_node *cp;
697         node_info *cpstate;
698
699         cp = exact_copy(node);
700
701         arity = get_irn_arity(node);
702
703         for (i = 0; i < arity; ++i) {
704                 if (is_backedge(node, i))
705                         set_backedge(cp, i);
706         }
707
708         set_copy(node, cp);
709         cpstate = new_node_info();
710         set_irn_link(cp, cpstate);
711
712
713         if (is_Block(cp)) {
714                 /* TODO
715                  * exact_copy already sets Macroblock.
716                  * Why should we do this anyway? */
717                 set_Block_MacroBlock(cp, cp);
718         }
719
720         return cp;
721 }
722
723 /**
724  * This walker copies all walked nodes.
725  * If the walk_condition is true for a node, it is walked.
726  * All nodes node_info->copy attributes have to be NULL prior to every walk.
727  */
728 static void copy_walk(ir_node *node, walker_condition *walk_condition, ir_loop *set_loop)
729 {
730         int i;
731         int arity;
732         ir_node *cp;
733         ir_node **cpin;
734         ir_graph *irg = current_ir_graph;
735         node_info *node_info = get_node_info(node);
736
737         /**
738          * break condition and cycle resolver, creating temporary node copies
739          */
740         if (get_irn_visited(node) >= get_irg_visited(irg)) {
741                 /* Here we rely on nodestate's copy being initialized with NULL */
742                 DB((dbg, LEVEL_5, "copy_walk: We have already visited %N\n", node));
743                 if (node_info->copy == NULL) {
744                         cp = rawcopy_node(node);
745                         DB((dbg, LEVEL_5, "The TEMP copy of %N is created %N\n", node, cp));
746                 }
747                 return;
748         }
749
750         /* Walk */
751         mark_irn_visited(node);
752
753         if (!is_Block(node)) {
754                 ir_node *pred = get_nodes_block(node);
755                 if (walk_condition(pred))
756                         DB((dbg, LEVEL_5, "walk block %N\n", pred));
757                         copy_walk(pred, walk_condition, set_loop);
758         }
759
760         arity = get_irn_arity(node);
761
762         NEW_ARR_A(ir_node *, cpin, arity);
763
764         for (i = 0; i < arity; ++i) {
765                 ir_node *pred = get_irn_n(node, i);
766
767                 if (walk_condition(pred)) {
768                         DB((dbg, LEVEL_5, "walk node %N\n", pred));
769                         copy_walk(pred, walk_condition, set_loop);
770                         cpin[i] = get_copy(pred);
771                         DB((dbg, LEVEL_5, "copy of %N gets new in %N which is copy of %N\n",
772                                 node, get_copy(pred), pred));
773                 } else {
774                         cpin[i] = pred;
775                 }
776         }
777
778         /* copy node / finalize temp node */
779         if (node_info->copy == NULL) {
780                 /* No temporary copy existent */
781                 cp = rawcopy_node(node);
782                 DB((dbg, LEVEL_5, "The FINAL copy of %N is CREATED %N\n", node, cp));
783         } else {
784                 /* temporary copy is existent but without correct ins */
785                 cp = get_copy(node);
786                 DB((dbg, LEVEL_5, "The FINAL copy of %N is EXISTENT %N\n", node, cp));
787         }
788
789         if (!is_Block(node)) {
790                 ir_node *cpblock = get_copy(get_nodes_block(node));
791
792                 set_nodes_block(cp, cpblock );
793                 if (is_Phi(cp))
794                         add_Block_phi(cpblock, cp);
795         }
796
797         set_irn_loop(cp, set_loop);
798         set_irn_in(cp, ARR_LEN(cpin), cpin);
799 }
800
801 /**
802  * Loop peeling, and fix the cf for the loop entry nodes, which have now more preds
803  */
804 static void peel(out_edge *loop_outs)
805 {
806         int i;
807         ir_node **entry_buffer;
808         int entry_c = 0;
809
810         ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED);
811
812         NEW_ARR_A(ir_node *, entry_buffer, ARR_LEN(loop_outs));
813
814         /* duplicate loop walk */
815         inc_irg_visited(current_ir_graph);
816
817         for (i = 0; i < ARR_LEN(loop_outs); i++) {
818                 out_edge entry = loop_outs[i];
819                 ir_node *node = entry.node;
820                 ir_node *pred = get_irn_n(entry.node, entry.pred_irn_n);
821
822                 if (is_Block(node)) {
823                         copy_walk(pred, is_in_loop, NULL);
824                         duplicate_preds(node, entry.pred_irn_n, get_copy(pred) );
825                 } else {
826                         copy_walk(pred, is_in_loop, NULL);
827                         /* leave out keepalives */
828                         if (!is_End(node)) {
829                                 /* Node is user of a value defined inside the loop.
830                                  * We'll need a phi since we duplicated the loop. */
831                                 /* Cannot construct_ssa here, because it needs another walker. */
832                                 entry_buffer[entry_c] = pred;
833                                 ++entry_c;
834                         }
835                 }
836         }
837
838         ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED);
839
840         /* Rewires the 2 heads */
841         peel_fix_heads();
842
843         /* Generate phis for values from peeled code and original loop */
844         construct_ssa_foreach(entry_buffer, entry_c);
845         /*for (i = 0; i < entry_c; i++)
846         {
847                 ir_node *cppred, *block, *cpblock, *pred;
848
849                 pred = entry_buffer[i];
850                 cppred = get_copy(pred);
851                 block = get_nodes_block(pred);
852                 cpblock = get_nodes_block(cppred);
853                 construct_ssa(block, pred, cpblock, cppred);
854         }*/
855 }
856
857 /**
858  * Populates head_entries with (node, pred_pos) tuple
859  * whereas the node's pred at pred_pos is in the head but not the node itself.
860  * Head and condition chain blocks must be marked.
861  */
862 static void get_head_outs(ir_node *node, void *env)
863 {
864         int i;
865         int arity = get_irn_arity(node);
866         (void) env;
867
868         DB((dbg, LEVEL_5, "get head entries %N \n", node));
869
870         for (i = 0; i < arity; ++i) {
871                 /* node is not in the head, but the predecessor is.
872                  * (head or loop chain nodes are marked) */
873
874                 DB((dbg, LEVEL_5, "... "));
875                 DB((dbg, LEVEL_5, "node %N  marked %d (0)  pred %d marked %d (1) \n",
876                     node->node_nr, is_nodesblock_marked(node),i, is_nodesblock_marked(get_irn_n(node, i))));
877
878                 if (!is_nodesblock_marked(node) && is_nodesblock_marked(get_irn_n(node, i))) {
879                         out_edge entry;
880                         entry.node = node;
881                         entry.pred_irn_n = i;
882                         DB((dbg, LEVEL_5,
883                                 "Found head chain entry %N @%d because !inloop %N and inloop %N\n",
884                                 node, i, node, get_irn_n(node, i)));
885                         ARR_APP1(out_edge, cur_head_outs, entry);
886                 }
887         }
888 }
889
890 /**
891  * Find condition chains, and add them to be inverted, until the node count exceeds the limit.
892  * A block belongs to the chain if a condition branches out of the loop.
893  * Returns 1 if the given block belongs to the condition chain.
894  */
895 static unsigned find_condition_chains(ir_node *block)
896 {
897         const ir_edge_t *edge;
898         unsigned mark = 0;
899         int nodes_n = 0;
900
901         DB((dbg, LEVEL_5, "condition_chains for block %N\n", block));
902
903         /* Collect all outs, including keeps.
904          * (TODO firm function for number of out edges?) */
905         foreach_out_edge_kind(block, edge, EDGE_KIND_NORMAL) {
906                 ++nodes_n;
907         }
908
909         /* We do not want to collect more nodes from condition chains, than the limit allows us to.
910          * Also, leave at least one block as body. */
911         if (head_inversion_node_count + nodes_n > inversion_head_node_limit
912                     || head_inversion_block_count + 1 == loop_info.blocks) {
913                 set_Block_mark(block, 0);
914
915                 return 0;
916         }
917
918         /* First: check our successors, and add all succs that are outside of the loop to the list */
919         foreach_block_succ(block, edge) {
920                 ir_node *src = get_edge_src_irn( edge );
921                 int pos = get_edge_src_pos( edge );
922
923                 if (!is_in_loop(src)) {
924                         out_edge entry;
925
926                         mark = 1;
927                         entry.node = src;
928                         entry.pred_irn_n = pos;
929                         ARR_APP1(out_edge, cond_chain_entries, entry);
930                         mark_irn_visited(src);
931                 }
932         }
933
934         if (mark == 0) {
935                 /* this block is not part of the chain,
936                  * because the chain would become too long or we have no successor outside of the loop */
937
938                 set_Block_mark(block, 0);
939                 return 0;
940         } else {
941                 set_Block_mark(block, 1);
942                 ++head_inversion_block_count;
943                 DB((dbg, LEVEL_5, "block %N is part of condition chain\n", block));
944                 head_inversion_node_count += nodes_n;
945         }
946
947         /* Second: walk all successors, and add them to the list if they are not part of the chain */
948         foreach_block_succ(block, edge) {
949                 unsigned inchain;
950                 ir_node *src = get_edge_src_irn( edge );
951                 int pos = get_edge_src_pos( edge );
952
953                 /* already done cases */
954                 if (!is_in_loop( src ) || (get_irn_visited(src) >= get_irg_visited(current_ir_graph))) {
955                         continue;
956                 }
957
958                 mark_irn_visited(src);
959                 DB((dbg, LEVEL_5, "condition chain walk %N\n", src));
960                 inchain = find_condition_chains(src);
961
962                 /* if successor is not part of chain we need to collect its outs */
963                 if (!inchain) {
964                         out_edge entry;
965                         entry.node = src;
966                         entry.pred_irn_n = pos;
967                         ARR_APP1(out_edge, cond_chain_entries, entry);
968                 }
969         }
970         return mark;
971 }
972
973 /**
974  * Rewire the loop head and inverted head for loop inversion.
975  */
976 static void inversion_fix_heads(void)
977 {
978         ir_node **loopheadnins, **invheadnins;
979         ir_node *loophead = loop_cf_head;
980         ir_node *invhead =      get_copy(loophead);
981
982         int headarity =         get_irn_arity(loophead);
983         ir_node *phi;
984         int i;
985
986         int lheadin_c = 0;
987         int iheadin_c = 0;
988
989         int backedges_n = get_backedge_n(loophead, 0);
990         int lhead_arity = backedges_n;
991         int ihead_arity = headarity - backedges_n;
992
993         assert(lhead_arity != 0 && "Loophead has arity 0. Probably wrong backedge informations.");
994         assert(ihead_arity != 0 && "Inversionhead has arity 0. Probably wrong backedge informations.");
995
996         /* new in arrays for all phis in the head blocks */
997         NEW_ARR_A(ir_node *, loopheadnins, lhead_arity);
998         NEW_ARR_A(ir_node *, invheadnins, ihead_arity);
999
1000         for_each_phi(loophead, phi) {
1001                 NEW_ARR_A(ir_node *, get_node_info(phi)->ins, lhead_arity);
1002         }
1003         for_each_phi(invhead, phi) {
1004                 NEW_ARR_A(ir_node *, get_node_info(phi)->ins, ihead_arity);
1005         }
1006
1007         for (i = 0; i < headarity; i++) {
1008                 ir_node *pred = get_irn_n(loophead, i);
1009
1010                 /**
1011                  * Rewire the head blocks ins and their phi ins.
1012                  * Requires phi list per block.
1013                  */
1014                 if (is_backedge(loophead, i) && !is_alien_edge(loophead, i)) {
1015                         /* just copy these edges */
1016                         loopheadnins[lheadin_c] = pred;
1017                         for_each_phi(loophead, phi) {
1018                                 get_node_info(phi)->ins[lheadin_c] = get_irn_n(phi, i);
1019                         }
1020                         ++lheadin_c;
1021                 } else {
1022                         invheadnins[iheadin_c] = pred;
1023                         for_each_phi(invhead, phi) {
1024                                 get_node_info(phi)->ins[iheadin_c] = get_irn_n(phi, i) ;
1025                         }
1026                         ++iheadin_c;
1027                 }
1028         }
1029
1030         /* assign the ins to the head blocks */
1031         set_irn_in(loophead, ARR_LEN(loopheadnins), loopheadnins);
1032         set_irn_in(invhead, ARR_LEN(invheadnins), invheadnins);
1033
1034         /* assign the ins for the phis */
1035         for_each_phi(loophead, phi) {
1036                 ir_node **ins = get_node_info(phi)->ins;
1037                 set_irn_in(phi, lhead_arity, ins);
1038         }
1039
1040         for_each_phi(invhead, phi) {
1041                 ir_node **ins = get_node_info(phi)->ins;
1042                 set_irn_in(phi, ihead_arity, ins);
1043         }
1044 }
1045
1046 static void inversion_walk(out_edge *head_entries)
1047 {
1048         int i;
1049         ir_node *phi;
1050         int entry_c = 0;
1051         ir_node **entry_buffer;
1052         ir_node **head_phi_assign;
1053
1054         NEW_ARR_A(ir_node *, entry_buffer, ARR_LEN(head_entries));
1055
1056         head_phi_assign = NEW_ARR_F(ir_node *, 0);
1057
1058         /* Find assignments in the condition chain,
1059          * to construct_ssa for them after the loop inversion. */
1060         for_each_phi(loop_cf_head , phi) {
1061                 int arity = get_irn_arity(phi);
1062                 for (i = 0; i < arity; ++i) {
1063                         ir_node *def = get_irn_n(phi, i);
1064                         if (is_nodesblock_marked(def)) {
1065                                 ARR_APP1(ir_node *, head_phi_assign, def);
1066                         }
1067                 }
1068         }
1069
1070         ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED);
1071
1072         /**
1073          * duplicate condition chain
1074          **/
1075         inc_irg_visited(current_ir_graph);
1076
1077         for (i = 0; i < ARR_LEN(head_entries); ++i) {
1078                 out_edge entry = head_entries[i];
1079                 ir_node *node = entry.node;
1080                 ir_node *pred = get_irn_n(entry.node, entry.pred_irn_n);
1081
1082                 if (is_Block(node)) {
1083                         DB((dbg, LEVEL_5, "\nInit walk block %N\n", pred));
1084
1085                         copy_walk(pred, is_nodesblock_marked, cur_loop);
1086                         duplicate_preds(node, entry.pred_irn_n, get_copy(pred) );
1087                 } else {
1088                         DB((dbg, LEVEL_5, "\nInit walk node  %N\n", pred));
1089
1090                         copy_walk(pred, is_nodesblock_marked, cur_loop);
1091
1092                         /* ignore keepalives */
1093                         if (!is_End(node)) {
1094                                 /* Node is user of a value assigned inside the loop.
1095                                  * We will need a phi since we duplicated the head. */
1096                                 entry_buffer[entry_c] = pred;
1097                                 ++entry_c;
1098                         }
1099                 }
1100         }
1101
1102         ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED);
1103
1104         inversion_fix_heads();
1105
1106         /* Generate phis for users of values assigned in the condition chain
1107          * and read in the loops body */
1108         construct_ssa_foreach(entry_buffer, entry_c);
1109
1110         /* Generate phis for values that are assigned in the condition chain
1111          * but not read in the loops body. */
1112         construct_ssa_foreach(head_phi_assign, ARR_LEN(head_phi_assign));
1113
1114         loop_cf_head = get_copy(loop_cf_head);
1115 }
1116
1117 /* Loop peeling */
1118 static void loop_peeling(void)
1119 {
1120         cur_loop_outs = NEW_ARR_F(out_edge, 0);
1121         irg_walk_graph( current_ir_graph, get_loop_outs, NULL, NULL );
1122
1123         peel(cur_loop_outs);
1124
1125         /* clean up */
1126         reset_node_infos();
1127
1128         set_irg_doms_inconsistent(current_ir_graph);
1129         set_irg_loopinfo_inconsistent(current_ir_graph);
1130         set_irg_outs_inconsistent(current_ir_graph);
1131
1132         DEL_ARR_F(cur_loop_outs);
1133 }
1134
1135 /* Loop inversion */
1136 static void loop_inversion(void)
1137 {
1138         unsigned do_inversion = 1;
1139
1140         inversion_head_node_limit = INT_MAX;
1141
1142         /* Search for condition chains. */
1143         ir_reserve_resources(current_ir_graph, IR_RESOURCE_BLOCK_MARK);
1144
1145         irg_walk_graph(current_ir_graph, reset_block_mark, NULL, NULL);
1146
1147         loop_info.blocks = get_loop_n_blocks(cur_loop);
1148         cond_chain_entries = NEW_ARR_F(out_edge, 0);
1149
1150         head_inversion_node_count = 0;
1151         head_inversion_block_count = 0;
1152
1153         set_Block_mark(loop_cf_head, 1);
1154         mark_irn_visited(loop_cf_head);
1155         inc_irg_visited(current_ir_graph);
1156
1157         find_condition_chains(loop_cf_head);
1158
1159         DB((dbg, LEVEL_3, "Loop contains %d blocks.\n", loop_info.blocks));
1160         if (loop_info.blocks < 2) {
1161                 do_inversion = 0;
1162                 DB((dbg, LEVEL_3, "Loop contains %d (less than 2) blocks => No Inversion done.\n", loop_info.blocks));
1163         }
1164
1165         /* We also catch endless loops here,
1166          * because they do not have a condition chain. */
1167         if (head_inversion_block_count < 1) {
1168                 do_inversion = 0;
1169                 DB((dbg, LEVEL_3, "Loop contains %d (less than 1) invertible blocks => No Inversion done.\n", head_inversion_block_count));
1170         }
1171
1172         if (do_inversion) {
1173                 cur_head_outs = NEW_ARR_F(out_edge, 0);
1174
1175                 /* Get all edges pointing into the head or condition chain (outs). */
1176                 irg_walk_graph(current_ir_graph, get_head_outs, NULL, NULL);
1177                 inversion_walk(cur_head_outs);
1178
1179                 DEL_ARR_F(cur_head_outs);
1180
1181                 set_irg_doms_inconsistent(current_ir_graph);
1182                 set_irg_loopinfo_inconsistent(current_ir_graph);
1183                 set_irg_outs_inconsistent(current_ir_graph);
1184         }
1185
1186         /* free */
1187         DEL_ARR_F(cond_chain_entries);
1188         ir_free_resources(current_ir_graph, IR_RESOURCE_BLOCK_MARK);
1189 }
1190
1191 /**
1192  * Returns last element of linked list of copies by
1193  * walking the linked list.
1194  */
1195 static ir_node *get_last_copy(ir_node *node)
1196 {
1197         ir_node *copy, *cur;
1198         cur = node;
1199         while ((copy = get_copy(cur))) {
1200                 cur = copy;
1201         }
1202         return cur;
1203 }
1204
1205 /**
1206  * Rewire floating copies of the current loop.
1207  */
1208 static void unrolling_fix_cf(void)
1209 {
1210         ir_node *loophead = loop_cf_head;
1211         int headarity =         get_irn_arity(loophead);
1212         ir_node *phi, *headnode;
1213         /*ir_node *high, *low;*/
1214         int i;
1215
1216         int uhead_in_n = 0;
1217         int backedges_n = get_backedge_n(loophead, 0);
1218         int unroll_arity = backedges_n;
1219
1220         /* Create ins for all heads and their phis */
1221         headnode = get_copy(loophead);
1222         for_each_copy(headnode) {
1223                 NEW_ARR_A(ir_node *, get_node_info(headnode)->ins, unroll_arity);
1224                 for_each_phi(headnode, phi) {
1225                         NEW_ARR_A(ir_node *, get_node_info(phi)->ins, unroll_arity);
1226                 }
1227         }
1228
1229         /* Append the copies to the existing loop. */
1230         for (i = 0; i < headarity; i++) {
1231                 ir_node *upper_head = loophead;
1232                 ir_node *lower_head = get_copy(loophead);
1233
1234                 ir_node *upper_pred = get_irn_n(loophead, i);
1235                 ir_node *lower_pred = get_copy(get_irn_n(loophead, i));
1236
1237                 ir_node *last_pred;
1238
1239                 /**
1240                  * Build unrolled loop top down
1241                  */
1242                 if (is_backedge(loophead, i) && !is_alien_edge(loophead, i)) {
1243                         for_each_copy2(upper_head, lower_head, upper_pred, lower_pred) {
1244                                 get_node_info(lower_head)->ins[uhead_in_n] = upper_pred;
1245
1246                                 for_each_phi(upper_head, phi) {
1247                                         ir_node *phi_copy = get_copy(phi);
1248                                         get_node_info(phi_copy)->ins[uhead_in_n] = get_irn_n(phi, i);
1249                                 }
1250                         }
1251
1252                         last_pred = upper_pred;
1253                         ++uhead_in_n;
1254
1255                         /* Fix the topmost loop heads backedges. */
1256                         set_irn_n(loophead, i, last_pred);
1257                         for_each_phi(loophead, phi) {
1258                                 ir_node *last_phi = get_last_copy(phi);
1259                                 ir_node *pred = get_irn_n(last_phi, i);
1260                                 set_irn_n(phi, i, pred);
1261                         }
1262                 }
1263         }
1264
1265         headnode = get_copy(loophead);
1266         for_each_copy(headnode) {
1267                 set_irn_in(headnode, unroll_arity, get_node_info(headnode)->ins);
1268                 for_each_phi(headnode, phi) {
1269                         set_irn_in(phi, unroll_arity, get_node_info(phi)->ins);
1270                 }
1271         }
1272 }
1273
1274 #if 0
1275 static ir_node *add_phi(ir_node *node, int phi_pos)
1276 {
1277         ir_mode *mode;
1278         ir_node *phi;
1279         ir_node **in;
1280         mode = get_irn_mode(get_irn_n(node, phi_pos));
1281         ir_node *block = get_nodes_block(node);
1282         int n_cfgpreds = get_irn_arity(block);
1283         ir_node *pred = get_irn_n(node, phi_pos);
1284         int i;
1285
1286         /* create a new Phi */
1287         NEW_ARR_A(ir_node*, in, n_cfgpreds);
1288         for (i = 0; i < n_cfgpreds; ++i)
1289                 in[i] = new_Unknown(mode);  /*pred;*/
1290
1291         phi = new_r_Phi(block, n_cfgpreds, in, mode);
1292
1293         assert(phi && "phi null");
1294         assert(is_Bad(phi) && "phi bad");
1295
1296         /* Important: always keep block phi list up to date. */
1297         add_Block_phi(block, phi);
1298         /* EVERY node is assumed to have a node_info linked. */
1299         alloc_node_info(phi, NULL);
1300
1301         set_irn_n(node, phi_pos, phi);
1302         return phi;
1303 }
1304 #endif
1305
1306
1307 /**
1308  * Loop unrolling
1309  * Could be improved with variable range informations.
1310  */
1311 static void loop_unrolling(void)
1312 {
1313         int i, j;
1314
1315         unroll_times = 7;
1316
1317         cur_loop_outs = NEW_ARR_F(out_edge, 0);
1318         irg_walk_graph( current_ir_graph, get_loop_outs, NULL, NULL );
1319
1320         ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED);
1321
1322         /* duplicate whole loop content */
1323         inc_irg_visited(current_ir_graph);
1324
1325         for (i = 0; i < ARR_LEN(cur_loop_outs); ++i) {
1326                 out_edge entry = cur_loop_outs[i];
1327                 ir_node *node = entry.node;
1328                 ir_node *pred = get_irn_n(entry.node, entry.pred_irn_n);
1329                 if (!is_Block(node)) {
1330                         for (j = 0; j < unroll_times - 1; ++j) {
1331                                 copy_walk(pred, is_in_loop, cur_loop);
1332                                 pred = get_copy(pred);
1333                         }
1334                 }
1335         }
1336
1337         for (i = 0; i < ARR_LEN(cur_loop_outs); ++i) {
1338                 out_edge entry = cur_loop_outs[i];
1339                 ir_node *node = entry.node;
1340                 ir_node *pred = get_irn_n(entry.node, entry.pred_irn_n);
1341
1342                 if (is_Block(node)) {
1343                         for (j = 0; j < unroll_times - 1; ++j) {
1344                                 copy_walk(pred, is_in_loop, cur_loop);
1345                                 duplicate_preds(node, entry.pred_irn_n, get_copy(pred));
1346
1347                                 pred = get_copy(pred);
1348                         }
1349                 }
1350         }
1351
1352         ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED);
1353
1354         /*dump_ir_graph(current_ir_graph, "-raw");*/
1355
1356         /* LDBG 2 */
1357 #if 1
1358         /* Line up the floating copies. */
1359         unrolling_fix_cf();
1360
1361         /* Generate phis for all loop outs */
1362         for (i = 0; i < ARR_LEN(cur_loop_outs); ++i) {
1363                 out_edge entry = cur_loop_outs[i];
1364                 ir_node *node = entry.node;
1365                 ir_node *pred = get_irn_n(entry.node, entry.pred_irn_n);
1366
1367                 if (!is_Block(node) && !is_End(node)) {
1368                         DB((dbg, LEVEL_5, "  construct_ssa_n def %N  node %N  pos %d\n",
1369                                         pred, node, entry.pred_irn_n));
1370                         construct_ssa_n(pred, node);
1371                 }
1372         }
1373 #endif
1374
1375         DEL_ARR_F(cur_loop_outs);
1376
1377         set_irg_doms_inconsistent(current_ir_graph);
1378         set_irg_loopinfo_inconsistent(current_ir_graph);
1379         set_irg_outs_inconsistent(current_ir_graph);
1380 }
1381
1382 /* Initialization and */
1383 static void init_analyze(ir_loop *loop)
1384 {
1385         /* Init new for every loop */
1386         cur_loop = loop;
1387
1388         loop_cf_head = NULL;
1389         loop_cf_head_valid = 1;
1390         loop_inv_head = NULL;
1391         loop_peeled_head = NULL;
1392
1393         loop_info.outs = 0;
1394         loop_info.calls = 0;
1395         loop_info.loads = 0;
1396         loop_info.blocks = 0;
1397
1398         DB((dbg, LEVEL_2, "  >>>> current loop includes node %N <<<\n", get_loop_node(loop, 0)));
1399
1400         irg_walk_graph(current_ir_graph, get_loop_info, NULL, NULL);
1401
1402         /* RETURN if there is no valid head */
1403         if (!loop_cf_head || !loop_cf_head_valid) {
1404                 DB((dbg, LEVEL_2,   "No valid loop head. Nothing done.\n"));
1405                 return;
1406         }
1407
1408         if (enable_peeling)
1409                 loop_peeling();
1410
1411         if (enable_inversion)
1412                 loop_inversion();
1413         if (enable_unrolling)
1414                 loop_unrolling();
1415
1416 #if 0
1417         /* RETURN if there is a call in the loop */
1418         if (loop_info.calls)
1419                 return;
1420 #endif
1421
1422         DB((dbg, LEVEL_2, "      <<<< end of loop with node %N >>>>\n", get_loop_node(loop, 0)));
1423 }
1424
1425 /* Find most inner loops and send them to analyze_loop */
1426 static void find_most_inner_loop(ir_loop *loop)
1427 {
1428         /* descend into sons */
1429         int sons = get_loop_n_sons(loop);
1430
1431         if (sons == 0) {
1432                 loop_element elem;
1433                 int el_n, i;
1434
1435                 el_n = get_loop_n_elements(loop);
1436
1437                 for (i=0; i < el_n; ++i) {
1438                         elem = get_loop_element(loop, i);
1439                         /* We can only rely on the blocks,
1440                          * as the loop attribute of the nodes seems not to be set. */
1441                         if (is_ir_node(elem.kind) && is_Block(elem.node)) {
1442                                 ARR_APP1(ir_node *, loops, elem.node);
1443                                 DB((dbg, LEVEL_5, "Found most inner loop (contains block %+F)\n", elem.node));
1444                                 break;
1445                         }
1446                 }
1447         } else {
1448                 int s;
1449                 for (s=0; s<sons; s++) {
1450                         find_most_inner_loop(get_loop_son(loop, s));
1451                 }
1452         }
1453 }
1454
1455 /**
1456  * Assure preconditions are met and go through all loops.
1457  */
1458 void loop_optimization(ir_graph *irg)
1459 {
1460         ir_loop *loop;
1461         int     i, sons, nr;
1462
1463         /* Init */
1464         link_node_state_list = NULL;
1465         set_current_ir_graph(irg);
1466
1467         /* preconditions */
1468         edges_assure(irg);
1469         assure_irg_outs(irg);
1470
1471         /* NOTE: sets only the loop attribute of blocks, not nodes */
1472         /* NOTE: Kills links */
1473         assure_cf_loop(irg);
1474
1475         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1476         collect_phiprojs(irg);
1477         ir_free_resources(irg, IR_RESOURCE_IRN_LINK);
1478
1479         /* allocate node_info for additional information on nodes */
1480         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK);
1481         irg_walk_graph(current_ir_graph, alloc_node_info, NULL, NULL);
1482
1483         loop = get_irg_loop(irg);
1484         sons = get_loop_n_sons(loop);
1485
1486         loops = NEW_ARR_F(ir_node *, 0);
1487
1488         for (nr = 0; nr < sons; ++nr) {
1489                 find_most_inner_loop(get_loop_son(loop, nr));
1490         }
1491
1492 /* TODO Keep backedges during optimization to avoid
1493  * this ugly allocation and deallocation.
1494  * (set_irn_in seems to destroy them)
1495  */
1496 #if 0
1497         for (i = 0; i < ARR_LEN(loops); ++i) {
1498                 ir_loop *loop;
1499
1500                 loop = get_irn_loop(loops[i]);
1501                 init_analyze(loop);
1502         }
1503 #else
1504         /* This part is useful for testing
1505          * or has to be used if the backedge information is destroyed.
1506          * Which is the case at the moment, because the backedge information gets lost
1507          * before inversion_fix_heads/unrolling_fix_cf, which results in bads.
1508          * NOTE!: Testsuite runs successfully nevertheless...
1509          */
1510
1511         /**
1512          * assure_cf_loop() creates a completely new loop tree.
1513          * Thus we cannot optimize a loop, assure_cf_loop() and continue with the next loop,
1514          * as the next loop must be searched because it is not distinguishable from the
1515          * already done loops.
1516          * The links of the loops are also not available anymore (to store a "loop done" flag).
1517          * Therefore we save a block per loop.
1518          * NOTE: We rely on the loop optimizations not to remove any block from the loop.
1519          * Later, we fetch the blocks loop attribute, as it is updated by assure_cf_loop.
1520          */
1521         for (i = 0; i < ARR_LEN(loops); ++i) {
1522                 ir_loop *loop;
1523
1524                 free_node_info();
1525                 ir_free_resources(irg, IR_RESOURCE_IRN_LINK);
1526
1527                 edges_assure(current_ir_graph);
1528                 assure_irg_outs(current_ir_graph);
1529
1530                 /* NOTE: sets only the loop attribute of blocks */
1531                 /* NOTE: Kills links */
1532                 assure_cf_loop(current_ir_graph);
1533
1534                 irg_walk_graph(current_ir_graph, alloc_node_info, NULL, NULL);
1535                 ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK);
1536
1537                 /* Get loop from block */
1538                 loop = get_irn_loop(loops[i]);
1539                 init_analyze(loop);
1540         }
1541 #endif
1542
1543         /* Free */
1544         DEL_ARR_F(loops);
1545
1546         free_node_info();
1547         ir_free_resources(irg, IR_RESOURCE_IRN_LINK);
1548         ir_free_resources(irg, IR_RESOURCE_PHI_LIST);
1549 }
1550
1551 void do_loop_unrolling(ir_graph *irg)
1552 {
1553         enable_unrolling = 1;
1554         enable_peeling = 0;
1555         enable_inversion = 0;
1556
1557         DB((dbg, LEVEL_2, " >>> unrolling (Startnode %N) <<<\n",
1558                 get_irg_start(irg)));
1559
1560         loop_optimization(irg);
1561
1562         DB((dbg, LEVEL_2, " >>> unrolling done (Startnode %N) <<<\n",
1563                 get_irg_start(irg)));
1564 }
1565
1566 void do_loop_inversion(ir_graph *irg)
1567 {
1568         enable_unrolling = 0;
1569         enable_peeling = 0;
1570         enable_inversion = 1;
1571
1572         DB((dbg, LEVEL_2, " >>> inversion (Startnode %N) <<<\n",
1573                 get_irg_start(irg)));
1574
1575         loop_optimization(irg);
1576
1577         DB((dbg, LEVEL_2, " >>> inversion done (Startnode %N) <<<\n",
1578                 get_irg_start(irg)));
1579 }
1580
1581 void do_loop_peeling(ir_graph *irg)
1582 {
1583         enable_unrolling = 0;
1584         enable_peeling = 1;
1585         enable_inversion = 0;
1586
1587         DB((dbg, LEVEL_2, " >>> peeling (Startnode %N) <<<\n",
1588                 get_irg_start(irg)));
1589
1590         loop_optimization(irg);
1591
1592         DB((dbg, LEVEL_2, " >>> peeling done (Startnode %N) <<<\n",
1593                 get_irg_start(irg)));
1594
1595 }
1596
1597 ir_graph_pass_t *loop_inversion_pass(const char *name)
1598 {
1599         return def_graph_pass(name ? name : "loop_inversion", do_loop_inversion);
1600 }
1601
1602 ir_graph_pass_t *loop_unroll_pass(const char *name)
1603 {
1604         return def_graph_pass(name ? name : "loop_unroll", do_loop_unrolling);
1605 }
1606
1607 ir_graph_pass_t *loop_peeling_pass(const char *name)
1608 {
1609         return def_graph_pass(name ? name : "loop_peeling", do_loop_peeling);
1610 }
1611
1612 void firm_init_loop_opt(void)
1613 {
1614         FIRM_DBG_REGISTER(dbg, "firm.opt.loop");
1615 }