Loop unrolling implemented. Inversion+unrolling fail 3 tests.
[libfirm] / ir / opt / loop.c
1 /*
2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @author   Christian Helmer
23  * @brief    loop inversion and loop unrolling, loop peeling
24  *
25  * @version  $Id$
26  */
27 #include "config.h"
28
29 #include "irnode.h"
30 #include "debug.h"
31
32 #include "ircons.h"
33 #include "irgopt.h"
34 #include "irgmod.h"
35 #include "irgwalk.h"
36 #include "irouts.h"
37 #include "iredges.h"
38 #include "irtools.h"
39 #include "array_t.h"    /* automatic array */
40 #include "beutil.h"             /* get_block */
41 #include "irloop_t.h"   /* set_irn_loop*/
42
43 #if 0
44         #include "irdump_t.h"
45 #endif
46
47
48 DEBUG_ONLY(static firm_dbg_module_t *dbg);
49
50 /**
51  * Convenience macro for iterating over every phi node of the given block.
52  * Requires phi list per block.
53  */
54 #define for_each_phi(block, phi) \
55         for ( (phi) = get_Block_phis( (block) ); (phi) ; (phi) = get_Phi_next( (phi) ) )
56
57 /* current loop */
58 static ir_loop *cur_loop;
59
60 /* abortable walker function */
61 typedef unsigned irg_walk_func_abortable(ir_node *, void *);
62
63 /* condition for walking a node during a copy_walk */
64 typedef unsigned walker_condition(ir_node *);
65
66 /* node and position of a predecessor */
67 typedef struct out_edges {
68         ir_node *node;
69         int pred_irn_n;
70 } out_edge;
71
72 /* access complex values through the nodes links */
73 typedef struct node_info {
74         unsigned invariant:1;
75         ir_node *copy;
76         ir_node *link;                                  /* temporary links for ssa creation */
77         ir_node **ins;                                  /* ins for phi nodes, during rewiring of blocks */
78         unsigned done;
79         struct node_info *freelistnext; /* linked list to free all node_infos */
80 } node_info;
81
82 static node_info *link_node_state_list;         /* head of the linked list to free all node_infos */
83
84 static out_edge *cur_loop_outs;                         /* A walker may start visiting the current loop with these nodes. */
85 static out_edge *cur_head_outs;                         /* A walker may start visiting the cur head with these nodes. */
86
87 static ir_node *loop_cf_head = NULL;                            /* Loop head node */
88 static unsigned loop_cf_head_valid = 1;                         /* A loop may have one head, otherwise we do not touch it. */
89
90 ir_node **loops;
91
92 /* Inverted head */
93 static ir_node *loop_inv_head = NULL;
94 /* Peeled head */
95 static ir_node *loop_peeled_head = NULL;
96
97 /* Loop analysis informations */
98 typedef struct loop_info_t {
99         unsigned calls;                         /* number of calls */
100         unsigned loads;                         /* number of load nodes */
101         unsigned invariant_loads;
102         unsigned stores;                        /* number of store nodes */
103         unsigned blocks;                        /* number of blocks in the loop */
104         unsigned opnodes_n;                     /* nodes that probably result in an instruction */
105         unsigned do_invariant_opt;
106         unsigned outs;                          /* outs without keepalives */
107 } loop_info_t;
108
109 /* Information about the current loop */
110 static loop_info_t loop_info;
111
112 /* A walker may start visiting a condition chain with these nodes. */
113 static out_edge *cond_chain_entries;
114
115 /* Number of unrolling */
116 int unroll_times;
117
118 static unsigned head_inversion_node_count;
119 static unsigned head_inversion_node_limit;
120 static unsigned head_inversion_block_count;
121
122 static unsigned enable_peeling;
123 static unsigned enable_inversion;
124 static unsigned enable_unrolling;
125
126 /**
127  *
128  * ============= AUXILIARY FUNCTIONS =====================================
129  */
130
131 /* Check for number of bads. This optimization should not produce bads. */
132 DEBUG_ONLY(
133         int bads;
134         int previous_bads;
135
136         void check_bad(ir_node *node, void *env)
137         {
138                 (void)env;
139
140                 if (is_Bad(node)) {
141                         DB((dbg, LEVEL_1, "WARNING: BAD %ld\n", get_irn_node_nr(node)));
142                         ++bads;
143                 }
144         }
145
146         void is_bad(char *msg)
147         {
148                 DB((dbg, LEVEL_1, "%s\n", msg ? msg : ""));
149                 irg_walk_graph(current_ir_graph, check_bad, NULL,NULL);
150                 if (msg && bads != previous_bads)
151                 {
152                         /*assert(!bads && "BADS!\n");*/
153                         DB((dbg, LEVEL_1, "####### BAD ########\n%s\n %d  >  %d ##############################\n", msg, previous_bads, bads));
154                 }
155                 if (!msg) previous_bads=bads;
156                 bads = 0;
157                 /* dump_ir_block_graph(current_ir_graph, "-LASTGOOD"); */
158         }
159 )
160
161 /**
162  * Creates object on the heap, and adds it to a linked list to free it later.
163  */
164 static node_info *new_node_info(void) {
165         node_info *l = XMALLOCZ(node_info);
166         l->freelistnext = link_node_state_list;
167         link_node_state_list = l;
168         l->copy = NULL;
169         l->invariant = 0;
170         return l;
171 }
172
173 static node_info *get_node_info(ir_node *n)
174 {
175         return ((node_info *)get_irn_link(n));
176 }
177
178 /* Allocates a node_info struct for the given node. For use with a walker. */
179 static void alloc_node_info(ir_node *node, void *env)
180 {
181         node_info *state;
182         (void) env;
183         state = new_node_info();
184         set_irn_link(node, (void *)state);
185 }
186
187 static void free_node_info(void)
188 {
189         int a = 0;
190         node_info *n;
191         n = link_node_state_list;
192         while(n) {
193                 node_info *next = n->freelistnext;
194                 ++a;
195                 xfree(n);
196                 n = next;
197         }
198         link_node_state_list = NULL;
199 }
200
201 /**
202  * Use the linked list to reset the reused values of all node_infos
203  * Reset in particular the copy attribute as copy_walk uses it to determine a present copy
204  */
205 static void reset_node_infos(void)
206 {
207         node_info *next;
208         next = link_node_state_list;
209         while(next->freelistnext) {
210                 node_info *cur = next;
211                 next = cur->freelistnext;
212                 cur->copy = NULL;
213                 cur->ins = NULL;
214                 cur->link = NULL;
215         }
216 }
217
218
219
220 /* Returns the  */
221 static ir_node *get_copy(ir_node *n)
222 {
223         return ((node_info *)get_irn_link(n))->copy;
224 }
225
226 /**
227  * Convenience macros for iterating over every copy in a linked list
228  * of copies.
229  */
230 #define for_each_copy(node) \
231         for ( ; (node) ; (node) = get_copy(node))
232
233 #define for_each_copy2(high1, low1, high2, low2) \
234         for ( ; (low1) && (low2); (high1) = (low1), (low1) = get_copy(low1), \
235                                                         (high2) = (low2), (low2) = get_copy(low2))
236
237 /* Links the node to its copy */
238 static void set_copy(ir_node *n, ir_node *copy)
239 {
240         ((node_info *)get_irn_link(n) )->copy = copy;
241 }
242
243 /* Returns 0 if the node or block is not in cur_loop.
244  * NOTE: get_irn_loop returns the ir_node loop attribute.
245  * But it seems only to be set correctly to blocks!
246  * Thus, we need to use get_block to get the loop.
247  */
248 static unsigned is_in_loop(ir_node *node)
249 {
250         return (get_irn_loop(get_block(node)) == cur_loop);
251 }
252
253 /* Returns if the given be is an alien edge. This is the case when the pred is not in the loop. */
254 static unsigned is_alien_edge(ir_node *n, int i)
255 {
256         return(!is_in_loop(get_irn_n(n, i)));
257 }
258
259 /* used for block walker */
260 static void reset_block_mark(ir_node *node, void * env)
261 {
262         (void) env;
263
264         if (is_Block(node))
265                 set_Block_mark(node, 0);
266 }
267
268 static unsigned is_nodesblock_marked(ir_node* node)
269 {
270         return (get_Block_mark(get_block(node)));
271 }
272
273 /* Returns the number of blocks in a loop. */
274 int get_loop_n_blocks(ir_loop *loop) {
275         int elements, e;
276         int blocks = 0;
277         elements = get_loop_n_elements(loop);
278
279         for(e=0; e<elements; e++) {
280                 loop_element elem = get_loop_element(loop, e);
281                 if  (is_ir_node(elem.kind) && is_Block(elem.node))
282                         ++blocks;
283         }
284         return blocks;
285 }
286
287 /**
288  * Add newpred at position pos to node and also add the corresponding value to the phis.
289  * Requires block phi list.
290  */
291 static int duplicate_preds(ir_node* node, unsigned pos, ir_node* newpred)
292 {
293         ir_node **ins;
294         /*int *is_be;*/
295         ir_node *phi;
296         int block_arity;
297         int i;
298
299         assert(is_Block(node) && "duplicate_preds is only allowed for blocks");
300
301         DB((dbg, LEVEL_5, "duplicate_preds(node %ld, pos %d, newpred %ld)\n",
302             get_irn_node_nr(node), pos, get_irn_node_nr(newpred)));
303
304         block_arity = get_irn_arity(node);
305
306         NEW_ARR_A(ir_node*, ins, block_arity + 1);
307         /*NEW_ARR_A(int, is_be, block_arity + 1);*/
308
309         for (i = 0; i < block_arity; ++i) {
310                 ins[i] = get_irn_n(node, i);
311                 /*is_be[i] = is_backedge(node, i);*/
312         }
313         ins[block_arity] = newpred;
314
315         set_irn_in(node, block_arity + 1, ins);
316 /*
317         for (i = 0; i < block_arity; ++i) {
318                 set_backedge(node, is_be[i]);
319         }
320 */
321
322         for_each_phi(node, phi) {
323                 int phi_arity = get_irn_arity(phi);
324                 DB((dbg, LEVEL_5, "duplicate_preds: fixing phi %ld\n", get_irn_node_nr(phi)));
325
326                 NEW_ARR_A(ir_node *, ins, block_arity + 1);
327                 for (i=0; i < phi_arity; ++i) {
328                         DB((dbg, LEVEL_5, "in %ld\n", get_irn_node_nr(get_irn_n(phi, i))));
329                         ins[i] = get_irn_n(phi, i);
330                 }
331                 ins[block_arity] = get_irn_n(phi, pos);
332                 set_irn_in(phi, block_arity + 1, ins);
333         }
334         /* return new position */
335         return block_arity;
336 }
337
338 /**
339  * Finds loop head and loop_info.
340  * TODO Remove unused counted informations.
341  */
342 static void get_loop_info(ir_node *node, void *env)
343 {
344         unsigned node_in_loop, pred_in_loop;
345         int i, arity;
346         (void) env;
347
348         arity = get_irn_arity(node);
349         for (i = 0; i < arity; i++) {
350                 ir_node *pred = get_irn_n(node, i);
351
352                 pred_in_loop = is_in_loop(pred);
353                 node_in_loop = is_in_loop(node);
354
355                 /* collect some loop information */
356                 if (node_in_loop) {
357                         if (is_Store(node))
358                                 ++loop_info.stores;
359                         if (is_Load(node))
360                                 ++loop_info.loads;
361                         if (is_Call(node))
362                                 ++loop_info.calls;
363                         if (!is_Block(node) && !is_Proj(node) && !is_Phi(node))
364                                 ++loop_info.opnodes_n;
365                 }
366
367                 /* Find the loops head/the blocks with cfpred outside of the loop */
368                 if (is_Block(node) && node_in_loop && !pred_in_loop && loop_cf_head_valid) {
369                         ir_node *cfgpred = get_Block_cfgpred(node, i);
370
371                         if (!is_in_loop(cfgpred)) {
372                                 DB((dbg, LEVEL_5, "potential head %+F because inloop and pred %+F not inloop\n", node, pred));
373                                 /* another head? We do not touch this. */
374                                 if (loop_cf_head && loop_cf_head != node) {
375                                         loop_cf_head_valid = 0;
376                                 } else {
377                                         loop_cf_head = node;
378                                 }
379                         }
380                 }
381         }
382 }
383
384 /* Adds all nodes pointing into the loop to loop_entries and also finds the loops head */
385 static void get_loop_outs(ir_node *node, void *env)
386 {
387         unsigned node_in_loop, pred_in_loop;
388         int i, arity;
389         (void) env;
390
391         arity = get_irn_arity(node);
392         for (i = 0; i < arity; i++) {
393                 ir_node *pred = get_irn_n(node, i);
394
395                 pred_in_loop = is_in_loop(pred);
396                 node_in_loop = is_in_loop(node);
397
398                 if (pred_in_loop && !node_in_loop) {
399                         out_edge entry;
400                         entry.node = node;
401                         entry.pred_irn_n = i;
402                         ARR_APP1(out_edge, cur_loop_outs, entry);
403                         if (node != get_irg_end(current_ir_graph))
404                                 ++loop_info.outs;
405                 }
406         }
407 }
408
409 /**
410  * Finds invariant loads and marks them as invariant.
411  * (has to be post walk)
412  */
413 static unsigned get_invariants(ir_node *node)
414 {
415         unsigned invar = 1;
416         int arity = get_irn_arity(node);
417
418         /* RETURN, no predecessors to visit */
419         if (arity == 0)
420                 return 0;
421
422         if (is_Load(node)) {
423                 ir_node *pred = get_Load_ptr(node);
424                 if ((get_Load_volatility(node) == volatility_non_volatile) &
425                                 (!is_in_loop(pred)
426                                 || is_Const(pred)
427                                 || is_SymConst(pred)
428                                 || get_node_info(node)->invariant )) {
429                         get_node_info(node)->invariant = 1;
430                         DB((dbg, LEVEL_2, "Invariant node found: %ld", get_irn_node_nr(node)));
431                         ++loop_info.invariant_loads;
432
433                 } else {
434                         get_node_info(node)->invariant = 0;
435                 }
436         } else {
437                 int i;
438                 invar = 1;
439                 /* find loop variant preds */
440                 for(i = 0; i < arity; ++i) {
441                         ir_node *pred = get_irn_n(node, i);
442
443                         if ( is_in_loop(pred)                                                   /* assignments outside of the loop are loop invariant */
444                                         && !is_Const(pred)                                              /* constants */
445                                         && !is_SymConst(pred)                                   /* SymConst */
446                                         && !get_node_info(node)->invariant ) {  /* pred is marked as invariant */
447                                 invar = 0;
448                                 break;
449                         }
450                 }
451
452                 get_node_info(node)->invariant = invar;
453         }
454         return 0;
455 }
456
457 static ir_node *ssa_second_def;
458 static ir_node *ssa_second_def_block;
459
460 /**
461  * Walks the graph bottom up, searching for definitions and create phis.
462  * (Does not handle the special case where the second definition is in the block of the user
463  * of the original definition because it is not necessary here.)
464  */
465 static ir_node *search_def_and_create_phis(ir_node *block, ir_mode *mode)
466 {
467         int i;
468         int n_cfgpreds;
469         ir_graph *irg;
470         ir_node *phi;
471         ir_node **in;
472
473         DB((dbg, LEVEL_5, "ssa search_def_and_create_phis: block %ld\n", get_irn_node_nr(block)));
474
475         /* Prevents creation of phi that would be bad anyway.
476          * Dead and bad blocks. */
477         if (get_irn_arity(block) < 1 || is_Bad(block))
478                 return new_Bad();
479
480         if (block == ssa_second_def_block) {
481                 DB((dbg, LEVEL_5, "ssa found second definition: use second def %ld\n", get_irn_node_nr(ssa_second_def)));
482                 return ssa_second_def;
483         }
484
485         /* already processed this block? */
486         if (irn_visited(block)) {
487                 ir_node *value = get_node_info(block)->link;
488                 DB((dbg, LEVEL_5, "ssa already visited: use linked %ld\n", get_irn_node_nr(value)));
489                 return value;
490         }
491
492         irg = get_irn_irg(block);
493         assert(block != get_irg_start_block(irg));
494
495         /* a Block with only 1 predecessor needs no Phi */
496         n_cfgpreds = get_Block_n_cfgpreds(block);
497         if (n_cfgpreds == 1) {
498                 ir_node *pred_block = get_Block_cfgpred_block(block, 0);
499                 ir_node *value;
500
501                 DB((dbg, LEVEL_5, "ssa 1 pred: walk pred %ld\n", get_irn_node_nr(pred_block)));
502
503                 value = search_def_and_create_phis(pred_block, mode);
504                 get_node_info(block)->link = value;
505                 mark_irn_visited(block);
506
507                 return value;
508         }
509
510         /* create a new Phi */
511         NEW_ARR_A(ir_node*, in, n_cfgpreds);
512         for(i = 0; i < n_cfgpreds; ++i)
513                 in[i] = new_Unknown(mode);
514
515         phi = new_r_Phi(block, n_cfgpreds, in, mode);
516
517         /* Important: always keep block phi list up to date. */
518         add_Block_phi(block, phi);
519         /* EVERY node is assumed to have a node_info linked. */
520         alloc_node_info(phi, NULL);
521
522         DB((dbg, LEVEL_5, "ssa phi creation: link new phi %ld to block %ld\n", get_irn_node_nr(phi), get_irn_node_nr(block)));
523
524         get_node_info(block)->link = phi;
525         mark_irn_visited(block);
526
527         /* set Phi predecessors */
528         for(i = 0; i < n_cfgpreds; ++i) {
529                 ir_node *pred_val;
530                 ir_node *pred_block = get_Block_cfgpred_block(block, i);
531                 assert(pred_block != NULL);
532                 pred_val = search_def_and_create_phis(pred_block, mode);
533                 assert(pred_val != NULL);
534
535                 DB((dbg, LEVEL_5, "ssa phi pred:phi %ld, pred %ld\n", get_irn_node_nr(phi), get_irn_node_nr(pred_val)));
536                 set_irn_n(phi, i, pred_val);
537         }
538         return phi;
539 }
540
541 /**
542  * Given a set of values this function constructs SSA-form for the users of the
543  * first value (the users are determined through the out-edges of the value).
544  * Works without using the dominance tree.
545  */
546 static void construct_ssa(ir_node *orig_block, ir_node *orig_val,
547                           ir_node *second_block, ir_node *second_val)
548 {
549         ir_graph *irg;
550         ir_mode *mode;
551         const ir_edge_t *edge;
552         const ir_edge_t *next;
553
554         assert(orig_block && orig_val && second_block && second_val &&
555                         "no parameter of construct_ssa may be NULL");
556
557         /* no need to do anything */
558         if (orig_val == second_val)
559                 return;
560
561         irg = get_irn_irg(orig_val);
562
563         ir_reserve_resources(irg, IR_RESOURCE_IRN_VISITED);
564         inc_irg_visited(irg);
565
566         mode = get_irn_mode(orig_val);
567         get_node_info(orig_block)->link = orig_val;
568         mark_irn_visited(orig_block);
569
570         ssa_second_def_block = second_block;
571         ssa_second_def       = second_val;
572
573         /* Only fix the users of the first, i.e. the original node */
574         foreach_out_edge_safe(orig_val, edge, next) {
575                 ir_node *user = get_edge_src_irn(edge);
576                 int j = get_edge_src_pos(edge);
577                 ir_node *user_block = get_nodes_block(user);
578                 ir_node *newval;
579
580                 /* ignore keeps */
581                 if (is_End(user))
582                         continue;
583
584                 DB((dbg, LEVEL_5, "original user %ld\n", get_irn_node_nr(user)));
585
586                 if (is_Phi(user)) {
587                         ir_node *pred_block = get_Block_cfgpred_block(user_block, j);
588                         newval = search_def_and_create_phis(pred_block, mode);
589                 } else {
590                         newval = search_def_and_create_phis(user_block, mode);
591                 }
592
593                 /* If we get a bad node the user keeps the original in. No second definition needed. */
594                 if (newval != user && !is_Bad(newval))
595                         set_irn_n(user, j, newval);
596         }
597
598         ir_free_resources(irg, IR_RESOURCE_IRN_VISITED);
599 }
600
601 /**
602  * Construct SSA for all definitions in arr.
603  */
604 void construct_ssa_foreach(ir_node **arr, int arr_n)
605 {
606         int i;
607         for(i = 0; i < arr_n; i++) {
608                 ir_node *cppred, *block, *cpblock, *pred;
609
610                  /* It is not possible to use
611                   * pred = get_irn_n(entry.node, entry.pred_irn_n);
612                   * because we might have changed the nodes predecessors in construct_ssa
613                   */
614                 pred = arr[i];
615                 cppred = get_copy(pred);
616                 block = get_nodes_block(pred);
617                 cpblock = get_nodes_block(cppred);
618                 construct_ssa(block, pred, cpblock, cppred);
619         }
620 }
621
622 /* get the number of backedges without alien bes */
623 static int get_backedge_n(ir_node *loophead, unsigned with_alien)
624 {
625         int i;
626         int be_n = 0;
627         int arity = get_irn_arity(loophead);
628         for (i = 0; i < arity; ++i) {
629                 ir_node *pred = get_irn_n(loophead, i);
630                 if (is_backedge(loophead, i) && (with_alien || is_in_loop(pred)))
631                         ++be_n;
632         }
633         return be_n;
634 }
635
636 /**
637  * Sets the nodes backedges, according to its predecessors link.
638  */
639 static void fix_backedge_info(ir_node *node)
640 {
641         int i;
642         for (i = 0; i < get_irn_arity(node); ++i) {
643                 ir_node *pred = get_irn_n(node, i);
644                 if (get_node_info(pred)->link != NULL) {
645                         set_backedge(node, i);
646                         get_node_info(pred)->link = NULL;
647                         DB((dbg, LEVEL_5, "fix backedge: node %ld  pred %ld  is backedge\n",
648                                         get_irn_node_nr(node), get_irn_node_nr(pred)));
649                 } else {
650                         set_not_backedge(node, i);
651                         DB((dbg, LEVEL_5, "fix backedge: node %ld  pred %ld  is not backedge\n",
652                                         get_irn_node_nr(node), get_irn_node_nr(pred)));
653                 }
654         }
655 }
656
657
658 /**
659  * Rewires the heads after peeling.
660  */
661 static void peel_fix_heads(void)
662 {
663         ir_node **loopheadnins, **peelheadnins;
664         ir_node *loophead = loop_cf_head;
665         ir_node *peelhead = get_copy(loophead);
666
667         int headarity = get_irn_arity(loophead);
668         ir_node *phi;
669         int i;
670
671         int lheadin_c = 0;
672         int pheadin_c = 0;
673
674         int backedges_n = get_backedge_n(loophead, 0);
675
676         int lhead_arity = 2 * backedges_n;
677         int phead_arity = headarity - backedges_n;
678
679         /* new in arrays */
680         NEW_ARR_A(ir_node *, loopheadnins, lhead_arity );
681         NEW_ARR_A(ir_node *, peelheadnins, phead_arity );
682
683         for_each_phi(loophead, phi) {
684                 NEW_ARR_A(ir_node *, get_node_info(phi)->ins, lhead_arity);
685         }
686         for_each_phi(peelhead, phi) {
687                 NEW_ARR_A(ir_node *, get_node_info(phi)->ins, phead_arity);
688         }
689
690         for (i = 0; i < headarity; i++)
691         {
692                 ir_node *orgjmp = get_irn_n(loophead, i);
693                 ir_node *copyjmp = get_copy(orgjmp);
694
695                 /**
696                  * Rewire the head blocks ins and their phi ins.
697                  * Requires phi list per block.
698                  */
699                 if (is_backedge(loophead, i) && !is_alien_edge(loophead, i)) {
700                         loopheadnins[lheadin_c] = orgjmp;
701                         for_each_phi(loophead, phi) {
702                                 get_node_info( phi )->ins[lheadin_c] =  get_irn_n( phi, i) ;
703                         }
704                         ++lheadin_c;
705
706                         /* former bes of the peeled code origin now from the loophead */
707                         loopheadnins[lheadin_c] = copyjmp;
708
709                         /* get_irn_n( get_copy_of(phi), i ) <!=> get_copy_of( get_irn_n( phi, i) )
710                          * Order is crucial! Predecessors outside of the loop are non existent.
711                          * The copy (cloned with its ins!) has pred i,
712                          * but phis pred i might not have a copy of itself.
713                          */
714                         for_each_phi(loophead, phi) {
715                                 get_node_info( phi )->ins[lheadin_c] =  get_irn_n( get_copy(phi), i) ;
716                         }
717                         ++lheadin_c;
718                 } else {
719                         peelheadnins[pheadin_c] = orgjmp;
720                         for_each_phi(peelhead, phi) {
721                                 get_node_info( phi )->ins[pheadin_c] = get_irn_n(phi, i);
722                         }
723                         ++pheadin_c;
724                 }
725         }/* for */
726
727         assert(pheadin_c == ARR_LEN(peelheadnins) &&
728                         lheadin_c == ARR_LEN(loopheadnins) &&
729                         "the constructed head arities do not match the predefined arities");
730
731         /* assign the ins to the nodes */
732         set_irn_in(loophead, ARR_LEN(loopheadnins), loopheadnins);
733         set_irn_in(peelhead, ARR_LEN(peelheadnins), peelheadnins);
734
735 #if 0
736         fix_backedge_info(loophead);
737         fix_backedge_info(peelhead);
738 #else
739         (void)fix_backedge_info;
740 #endif
741
742         for_each_phi(loophead, phi) {
743                 ir_node **ins = get_node_info( phi )->ins;
744                 set_irn_in(phi, lhead_arity, ins);
745         }
746
747         for_each_phi(peelhead, phi) {
748                 ir_node **ins = get_node_info( phi )->ins;
749                 set_irn_in(phi, phead_arity, ins);
750         }
751 }
752
753 /**
754  * Create a raw copy (ins are still the old ones) of the given node.
755  */
756 static ir_node *rawcopy_node(ir_node *node)
757 {
758         int i, arity;
759         ir_node *cp;
760         node_info *cpstate;
761
762         cp = exact_copy(node);
763
764         arity = get_irn_arity(node);
765
766         for (i = 0; i < arity; ++i) {
767                 if (is_backedge(node, i))
768                         set_backedge(cp, i);
769         }
770
771         set_copy(node, cp);
772         cpstate = new_node_info();
773         set_irn_link(cp, cpstate);
774         mark_irn_visited(cp);
775         return cp;
776 }
777
778 /**
779  * This walker copies all walked nodes.
780  * If the walk_condition is true for a node, it is walked.
781  * All nodes node_info->copy attributes has to be NULL prior to every to every walk.
782  */
783 static void copy_walk(ir_node *node, walker_condition *walk_condition, ir_loop *set_loop)
784 {
785         int i;
786         int arity;
787         ir_node *cp;
788         ir_node **cpin;
789         ir_graph *irg = current_ir_graph;
790         node_info *node_info = get_node_info(node);
791
792         /**
793          * break condition and cycle resolver, creating temporary node copies
794          */
795         if (get_irn_visited(node) >= get_irg_visited(irg)) {
796                 /* Here we rely on nodestate's copy being initialized with NULL */
797                 DB((dbg, LEVEL_5, "copy_walk: We have already visited %ld\n", get_irn_node_nr(node)));
798                 if (node_info->copy == NULL) {
799                         cp = rawcopy_node(node);
800                         DB((dbg, LEVEL_5, "The TEMP copy of %ld is created %ld\n", get_irn_node_nr(node), get_irn_node_nr(cp)));
801                 }
802                 return;
803         }
804
805         /* Walk */
806         mark_irn_visited(node);
807
808         if (!is_Block(node)) {
809                 ir_node *pred = get_nodes_block(node);
810                 if (walk_condition(pred))
811                         DB((dbg, LEVEL_5, "walk block %ld\n", get_irn_node_nr(pred)));
812                         copy_walk(pred, walk_condition, set_loop);
813         }
814
815         arity = get_irn_arity(node);
816
817         NEW_ARR_A(ir_node *, cpin, arity);
818
819         for (i = get_irn_arity(node) - 1; i >= 0; --i) {
820                 ir_node *pred = get_irn_n(node, i);
821
822                 if (walk_condition(pred)) {
823                         DB((dbg, LEVEL_5, "walk node %ld\n", get_irn_node_nr(pred)));
824                         copy_walk(pred, walk_condition, set_loop);
825                         cpin[i] = get_copy(pred);
826                         DB((dbg, LEVEL_5, "copy of %ld gets new in %ld which is copy of %ld\n",
827                                 get_irn_node_nr(node), get_irn_node_nr(get_copy(pred)), get_irn_node_nr(pred)));
828                 } else {
829                         cpin[i] = pred;
830                 }
831         }
832
833         /* copy node / finalize temp node */
834         if (node_info->copy == NULL) {
835                 /* No temporary copy existent */
836                 cp = rawcopy_node(node);
837                 DB((dbg, LEVEL_5, "The FINAL copy of %ld is CREATED %ld\n", get_irn_node_nr(node), get_irn_node_nr(cp)));
838         } else {
839                 /* temporary copy is existent but without correct ins */
840                 cp = get_copy(node);
841                 DB((dbg, LEVEL_5, "The FINAL copy of %ld is EXISTENT %ld\n", get_irn_node_nr(node), get_irn_node_nr(cp)));
842         }
843
844         if (!is_Block(node)) {
845                 ir_node *cpblock = get_copy(get_nodes_block(node));
846
847                 set_nodes_block(cp, cpblock );
848                 /* fix the phi information in attr.phis */
849                 if( is_Phi(cp) )
850                         add_Block_phi(cpblock, cp);
851         } else {
852                 /* macroblock info has not been copied */
853                 set_Block_MacroBlock(cp, cp);
854         }
855
856         set_irn_loop(cp, set_loop);
857         set_irn_in(cp, ARR_LEN(cpin), cpin);
858 }
859
860 /**
861  * This walker copies all walked nodes.
862  * If the walk_condition is true for a node, it is walked.
863  * All nodes node_info->copy attributes has to be NULL prior to every to every walk.
864  */
865 static void loop_walker(ir_node *node, walker_condition *func)
866 {
867         int i;
868         int arity;
869         ir_graph *irg = current_ir_graph;
870
871         if (get_irn_visited(node) >= get_irg_visited(irg))
872                 return;
873
874         /* Walk */
875         mark_irn_visited(node);
876
877         if (!is_Block(node)) {
878                 ir_node *pred = get_nodes_block(node);
879                 loop_walker(pred, func);
880         }
881
882         arity = get_irn_arity(node);
883
884         for (i = get_irn_arity(node) - 1; i >= 0; --i) {
885                 ir_node *pred = get_irn_n(node, i);
886                 loop_walker(pred, func);
887         }
888
889         func(node);
890 }
891
892
893 /**
894  * Loop peeling, and fix the cf for the loop entry nodes, which have now more preds
895  */
896 static void peel(out_edge *loop_outs)
897 {
898         int i;
899         ir_node **entry_buffer;
900         int entry_c = 0;
901
902         ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED);
903
904         NEW_ARR_A(ir_node *, entry_buffer, ARR_LEN(loop_outs));
905
906         /* duplicate loop walk */
907         inc_irg_visited(current_ir_graph);
908
909         for(i = 0; i < ARR_LEN(loop_outs); i++) {
910                 out_edge entry = loop_outs[i];
911                 ir_node *node = entry.node;
912                 ir_node *pred = get_irn_n(entry.node, entry.pred_irn_n);
913
914                 if (is_Block(node)) {
915                         copy_walk(pred, is_in_loop, NULL);
916                         duplicate_preds(node, entry.pred_irn_n, get_copy(pred) );
917                 } else {
918                         copy_walk(pred, is_in_loop, NULL);
919                         /* leave out keepalives */
920                         if (!is_End(node))
921                                 /* Node is user of a value defined inside the loop.
922                                  * We'll need a phi since we duplicated the loop. */
923                                 /* Cannot construct_ssa here, because it needs another walker. */
924                                 entry_buffer[entry_c++] = pred;
925                 }
926         }
927
928         ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED);
929
930         /* Rewires the 2 heads */
931         peel_fix_heads();
932
933         /* Generate phis for values from peeled code and original loop */
934         construct_ssa_foreach(entry_buffer, entry_c);
935         /*for(i = 0; i < entry_c; i++)
936         {
937                 ir_node *cppred, *block, *cpblock, *pred;
938
939                 pred = entry_buffer[i];
940                 cppred = get_copy(pred);
941                 block = get_nodes_block(pred);
942                 cpblock = get_nodes_block(cppred);
943                 construct_ssa(block, pred, cpblock, cppred);
944         }*/
945 }
946
947 /**
948  * Populates head_entries with (node, pred_pos) tuple
949  * whereas the node's pred at pred_pos is in the head but not the node itself.
950  * Head and condition chain blocks must be marked.
951  */
952 static void get_head_outs(ir_node *node, void *env)
953 {
954         int i;
955         int arity = get_irn_arity(node);
956         (void) env;
957
958         DB((dbg, LEVEL_5, "get head entries %ld \n", get_irn_node_nr(node)));
959
960         for(i = 0; i < arity; ++i) {
961                 /* node is not in the head, but the predecessor is.
962                  * (head or loop chain nodes are marked) */
963
964                 DB((dbg, LEVEL_5, "... "));
965                 DB((dbg, LEVEL_5, "node %ld  marked %d (0)  pred %d marked %d (1) \n",
966                     node->node_nr, is_nodesblock_marked(node),i, is_nodesblock_marked(get_irn_n(node, i))));
967
968                 if (!is_nodesblock_marked(node) && is_nodesblock_marked(get_irn_n(node, i))) {
969                         out_edge entry;
970                         entry.node = node;
971                         entry.pred_irn_n = i;
972                         DB((dbg, LEVEL_5,
973                                 "Found head chain entry %ld @%d because !inloop %ld and inloop %ld\n",
974                                 get_irn_node_nr(node), i, get_irn_node_nr(node), get_irn_node_nr(get_irn_n(node, i))));
975                         ARR_APP1(out_edge, cur_head_outs, entry);
976                 }
977         }
978 }
979
980 /**
981  * Find condition chains, and add them to be inverted, until the node count exceeds the limit.
982  * A block belongs to the chain if a condition branches out of the loop.
983  * Returns 1 if the given block belongs to the condition chain.
984  */
985 static unsigned find_condition_chains(ir_node *block) {
986         const ir_edge_t *edge;
987         unsigned mark = 0;
988         int nodes_n = 0;
989
990         DB((dbg, LEVEL_5, "condition_chains for block %ld\n", get_irn_node_nr(block)));
991
992         /* Collect all outs, including keeps.
993          * (TODO firm function for number of out edges?) */
994         foreach_out_edge_kind(block, edge, EDGE_KIND_NORMAL) {
995                 ++nodes_n;
996         }
997
998
999         /* We do not want to collect more nodes from condition chains, than the limit allows us to.
1000          * Also, leave at least one block as body. */
1001         if (head_inversion_node_count + nodes_n > head_inversion_node_limit
1002                     || head_inversion_block_count + 1 == loop_info.blocks) {
1003                 set_Block_mark(block, 0);
1004
1005                 return 0;
1006         }
1007
1008         /* First: check our successors, and add all succs that are outside of the loop to the list */
1009         foreach_block_succ(block, edge) {
1010                 ir_node *src = get_edge_src_irn( edge );
1011                 int pos = get_edge_src_pos( edge );
1012
1013                 if (!is_in_loop(src)) {
1014                         out_edge entry;
1015
1016                         mark = 1;
1017                         entry.node = src;
1018                         entry.pred_irn_n = pos;
1019                         ARR_APP1(out_edge, cond_chain_entries, entry);
1020                         mark_irn_visited(src);
1021                 }
1022         }
1023
1024         /* this block is not part of the chain,
1025          * because the chain would become too long or we have no successor outside of the loop */
1026         if (mark == 0) {
1027                 set_Block_mark(block, 0);
1028                 return 0;
1029         } else {
1030                 set_Block_mark(block, 1);
1031                 ++head_inversion_block_count;
1032                 DB((dbg, LEVEL_5, "block %ld is part of condition chain\n", get_irn_node_nr(block)));
1033                 head_inversion_node_count += nodes_n;
1034         }
1035
1036         /* Second: walk all successors, and add them to the list if they are not part of the chain */
1037         foreach_block_succ(block, edge) {
1038                 unsigned inchain;
1039                 ir_node *src = get_edge_src_irn( edge );
1040                 int pos = get_edge_src_pos( edge );
1041
1042                 /* already done cases */
1043                 if (!is_in_loop( src ) || (get_irn_visited(src) >= get_irg_visited(current_ir_graph))) {
1044                         continue;
1045                 }
1046
1047                 mark_irn_visited(src);
1048                 DB((dbg, LEVEL_5, "condition chain walk %ld\n", get_irn_node_nr(src)));
1049                 inchain = find_condition_chains(src);
1050
1051                 /* if successor is not part of chain we need to collect its outs */
1052                 if (!inchain) {
1053                         out_edge entry;
1054                         entry.node = src;
1055                         entry.pred_irn_n = pos;
1056                         ARR_APP1(out_edge, cond_chain_entries, entry);
1057                 }
1058         }
1059         return mark;
1060 }
1061
1062 /**
1063  * Rewire the loop head and inverted head for loop inversion.
1064  */
1065 static void inversion_fix_heads(void)
1066 {
1067         ir_node **loopheadnins, **invheadnins;
1068         ir_node *loophead = loop_cf_head;
1069         ir_node *invhead =      get_copy(loophead);
1070
1071         int headarity =         get_irn_arity(loophead);
1072         ir_node *phi;
1073         int i;
1074
1075         int lheadin_c = 0;
1076         int iheadin_c = 0;
1077
1078         int backedges_n = get_backedge_n(loophead, 0);
1079         int lhead_arity = backedges_n;
1080         int ihead_arity = headarity - backedges_n;
1081
1082         assert(lhead_arity != 0 && "loophead has arity 0. Probably wrong backedge informations.");
1083         assert(ihead_arity != 0 && "inversionhead has arity 0. Probably wrong backedge informations.");
1084
1085         /* new in arrays for all phis in the head blocks */
1086         NEW_ARR_A(ir_node *, loopheadnins, lhead_arity);
1087         NEW_ARR_A(ir_node *, invheadnins, ihead_arity);
1088
1089         for_each_phi(loophead, phi) {
1090                 NEW_ARR_A(ir_node *, get_node_info(phi)->ins, lhead_arity);
1091         }
1092         for_each_phi(invhead, phi) {
1093                 NEW_ARR_A(ir_node *, get_node_info(phi)->ins, ihead_arity);
1094         }
1095
1096         for (i = 0; i < headarity; i++) {
1097                 ir_node *pred = get_irn_n(loophead, i);
1098
1099                 /**
1100                  * Rewire the head blocks ins and their phi ins.
1101                  * Requires phi list per block.
1102                  */
1103                 if (is_backedge(loophead, i) && !is_alien_edge(loophead, i)) {
1104                         /* just copy these edges */
1105                         loopheadnins[lheadin_c] = pred;
1106                         /*get_node_info(pred)->link = NULL;*/
1107                         for_each_phi(loophead, phi) {
1108                                 get_node_info(phi)->ins[lheadin_c] = get_irn_n(phi, i);
1109                         }
1110                         ++lheadin_c;
1111                 } else {
1112                         invheadnins[iheadin_c] = pred;
1113                         /*get_node_info(pred)->link = (void *)1;*/
1114                         for_each_phi(invhead, phi) {
1115                                 get_node_info(phi)->ins[iheadin_c] = get_irn_n(phi, i) ;
1116                         }
1117                         ++iheadin_c;
1118                 }
1119         }
1120
1121         /* assign the ins to the head blocks */
1122         set_irn_in(loophead, ARR_LEN(loopheadnins), loopheadnins);
1123         set_irn_in(invhead, ARR_LEN(invheadnins), invheadnins);
1124
1125
1126 #if 0
1127         /* FIX and set former be to normal edges */
1128         fix_backedge_info(loophead);
1129         fix_backedge_info(invhead);
1130 #else
1131         (void)fix_backedge_info;
1132 #endif
1133
1134         /* assign the ins for the phis */
1135         for_each_phi(loophead, phi) {
1136                 ir_node **ins = get_node_info(phi)->ins;
1137                 set_irn_in(phi, lhead_arity, ins);
1138         }
1139
1140         for_each_phi(invhead, phi) {
1141                 ir_node **ins = get_node_info(phi)->ins;
1142                 set_irn_in(phi, ihead_arity, ins);
1143         }
1144 }
1145
1146 static void inversion_walk(out_edge *head_entries)
1147 {
1148         int i;
1149         ir_node *phi;
1150         int entry_c = 0;
1151         ir_node **entry_buffer;
1152         ir_node **head_phi_assign;
1153
1154         NEW_ARR_A(ir_node *, entry_buffer, ARR_LEN(head_entries));
1155
1156         head_phi_assign = NEW_ARR_F(ir_node *, 0);
1157
1158         /* Find assignments in the condition chain, to construct_ssa for them after the loop inversion. */
1159         for_each_phi(loop_cf_head , phi) {
1160                 for(i=0; i<get_irn_arity(phi); ++i) {
1161                         ir_node *def = get_irn_n(phi, i);
1162                         if (is_nodesblock_marked(def)) {
1163                                 ARR_APP1(ir_node *, head_phi_assign, def);
1164                         }
1165                 }
1166         }
1167
1168         ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED);
1169
1170         /**
1171          * duplicate condition chain
1172          **/
1173         inc_irg_visited(current_ir_graph);
1174
1175         for(i = 0; i < ARR_LEN(head_entries); ++i) {
1176                 out_edge entry = head_entries[i];
1177                 ir_node *node = entry.node;
1178                 ir_node *pred = get_irn_n(entry.node, entry.pred_irn_n);
1179
1180                 if (is_Block(node)) {
1181                         DB((dbg, LEVEL_5, "\nINIT walk block %ld\n", get_irn_node_nr(pred)));
1182                         copy_walk(pred, is_nodesblock_marked, cur_loop);
1183                         duplicate_preds(node, entry.pred_irn_n, get_copy(pred) );
1184                 } else {
1185                         DB((dbg, LEVEL_5, "\nInit walk node  %ld\n", get_irn_node_nr(pred)));
1186                         copy_walk(pred, is_nodesblock_marked, cur_loop);
1187
1188                         /* ignore keepalives */
1189                         if (!is_End(node))
1190                                 /* Node is user of a value assigned inside the loop.
1191                                  * We will need a phi since we duplicated the head. */
1192                                 entry_buffer[entry_c++] = pred;
1193                 }
1194         }
1195
1196         ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED);
1197
1198         inversion_fix_heads();
1199
1200         /* Generate phis for users of values assigned in the condition chain
1201          * and read in the loops body */
1202         construct_ssa_foreach(entry_buffer, entry_c);
1203
1204         /* Generate phis for values that are assigned in the condition chain
1205          * but not read in the loops body. */
1206         construct_ssa_foreach(head_phi_assign, ARR_LEN(head_phi_assign));
1207
1208         loop_cf_head = get_copy(loop_cf_head);
1209 }
1210
1211 /* Loop peeling */
1212 void loop_peeling(void)
1213 {
1214         cur_loop_outs = NEW_ARR_F(out_edge, 0);
1215         irg_walk_graph( current_ir_graph, get_loop_outs, NULL, NULL );
1216
1217         DB((dbg, LEVEL_3, "is endless loop: %d (no outs but keepalives)\n", loop_info.outs == 0));
1218
1219 #if 0
1220         /*inc_irg_visited(current_ir_graph);*/
1221         loop_walker( loop_outs, NULL, get_invariants, NULL );
1222
1223         /* This could be improved with knowledge about variable range. */
1224         if (loop_info.stores == 0 && loop_info.invariant_loads > 0)
1225                 do_peel = 1;
1226 #endif
1227
1228         peel(cur_loop_outs);
1229
1230         /* clean up */
1231         reset_node_infos();
1232
1233         set_irg_doms_inconsistent(current_ir_graph);
1234         set_irg_loopinfo_inconsistent(current_ir_graph);
1235         set_irg_outs_inconsistent(current_ir_graph);
1236
1237         DEL_ARR_F(cur_loop_outs);
1238 }
1239
1240 /* Loop inversion */
1241 void loop_inversion(void)
1242 {
1243         unsigned do_inversion = 1;
1244         int     i;
1245
1246
1247 #if 0
1248         /* Loop complexity too high */
1249         if (loop_info.opnodes_n > max_opnodes)
1250                 return;
1251 #endif
1252
1253         head_inversion_node_limit = INT_MAX;
1254
1255 #if 0
1256         /* Search for invariant loads. */
1257         /* Did not apply on any of the testsuite cases. */
1258
1259         cur_loop_outs = NEW_ARR_F(out_edge, 0);
1260         irg_walk_graph( current_ir_graph, get_loop_outs, NULL, NULL );
1261
1262         for(i = 0; i < ARR_LEN(cur_loop_outs); ++i) {
1263                 out_edge entry = cur_loop_outs[i];
1264                 ir_node *pred = get_irn_n(entry.node, entry.pred_irn_n);
1265                 loop_walker(pred, get_invariants);
1266         }
1267
1268         DEL_ARR_F(cur_loop_outs);
1269
1270         /* This could be improved with knowledge about variable range.*/
1271         if (loop_info.stores == 0 && loop_info.invariant_loads > 0) {
1272                 /* Does not happen for any of the testsuite cases. */
1273                 loop_info.do_invariant_opt = 1;
1274         }
1275
1276 #else
1277         (void)loop_walker;
1278         (void)get_invariants;
1279         (void)i;
1280 #endif
1281
1282         /* Search for condition chains. */
1283         ir_reserve_resources(current_ir_graph, IR_RESOURCE_BLOCK_MARK);
1284
1285         irg_walk_graph(current_ir_graph, reset_block_mark, NULL, NULL);
1286
1287         loop_info.blocks = get_loop_n_blocks(cur_loop);
1288         cond_chain_entries = NEW_ARR_F(out_edge, 0);
1289
1290         head_inversion_node_count = 0;
1291         head_inversion_block_count = 0;
1292
1293         set_Block_mark(loop_cf_head, 1);
1294         mark_irn_visited(loop_cf_head);
1295         inc_irg_visited(current_ir_graph);
1296
1297         find_condition_chains(loop_cf_head);
1298
1299         DB((dbg, LEVEL_3, "Loop contains %d blocks.\n", loop_info.blocks));
1300         if (loop_info.blocks < 2) {
1301                 do_inversion = 0;
1302                 DB((dbg, LEVEL_3, "Loop contains %d (less than 2) blocks => No Inversion done.\n", loop_info.blocks));
1303         }
1304
1305         /* We also catch endless loops here,
1306          * because they do not have a condition chain. */
1307         if (head_inversion_block_count < 1) {
1308                 do_inversion = 0;
1309                 DB((dbg, LEVEL_3, "Loop contains %d (less than 1) invertible blocks => No Inversion done.\n", head_inversion_block_count));
1310         }
1311
1312         if (do_inversion) {
1313                 cur_head_outs = NEW_ARR_F(out_edge, 0);
1314
1315                 /* Get all edges pointing into the head or condition chain (outs). */
1316                 irg_walk_graph(current_ir_graph, get_head_outs, NULL, NULL);
1317                 inversion_walk(cur_head_outs);
1318
1319                 DEL_ARR_F(cur_head_outs);
1320
1321                 set_irg_doms_inconsistent(current_ir_graph);
1322                 set_irg_loopinfo_inconsistent(current_ir_graph);
1323                 set_irg_outs_inconsistent(current_ir_graph);
1324         }
1325
1326         /* free */
1327         DEL_ARR_F(cond_chain_entries);
1328         ir_free_resources(current_ir_graph, IR_RESOURCE_BLOCK_MARK);
1329 }
1330
1331 /**
1332  * Returns last element of linked list of copies by
1333  * walking the linked list.
1334  */
1335 ir_node *get_last_copy(ir_node *node)
1336 {
1337         ir_node *copy, *cur;
1338         cur = node;
1339         while ((copy = get_copy(cur))) {
1340                 cur = copy;
1341         }
1342         return cur;
1343 }
1344
1345 /**
1346  * Rewire floating copies of the current loop.
1347  */
1348 void unrolling_fix_cf(void)
1349 {
1350         ir_node *loophead = loop_cf_head;
1351         int headarity =         get_irn_arity(loophead);
1352         ir_node *phi, *headnode;
1353         /*ir_node *high, *low;*/
1354         int i;
1355
1356
1357         int uhead_in_n = 0;
1358
1359         int backedges_n = get_backedge_n(loophead, 0);
1360         int unroll_arity = backedges_n;
1361
1362         /* Create ins for all heads and their phis */
1363         headnode = get_copy(loophead);
1364         for_each_copy(headnode) {
1365                 NEW_ARR_A(ir_node *, get_node_info(headnode)->ins, unroll_arity);
1366                 for_each_phi(headnode, phi) {
1367                         NEW_ARR_A(ir_node *, get_node_info(phi)->ins, unroll_arity);
1368                 }
1369         }
1370
1371         /* Append the copies to the existing loop. */
1372         for (i = 0; i < headarity; i++) {
1373                 ir_node *upper_head = loophead;
1374                 ir_node *lower_head = get_copy(loophead);
1375
1376                 ir_node *upper_pred = get_irn_n(loophead, i);
1377                 ir_node *lower_pred = get_copy(get_irn_n(loophead, i));
1378
1379                 ir_node *last_pred;
1380
1381                 /**
1382                  * Build unrolled loop top down
1383                  */
1384                 if (is_backedge(loophead, i) && !is_alien_edge(loophead, i)) {
1385                         for_each_copy2(upper_head, lower_head, upper_pred, lower_pred) {
1386                                 get_node_info(lower_head)->ins[uhead_in_n] = upper_pred;
1387
1388                                 for_each_phi(upper_head, phi) {
1389                                         ir_node *phi_copy = get_copy(phi);
1390                                         get_node_info(phi_copy)->ins[uhead_in_n] = get_irn_n(phi, i);
1391                                 }
1392                         }
1393                         /*last_head = upper_head;*/
1394                         last_pred = upper_pred;
1395                         ++uhead_in_n;
1396
1397                         /* Fix the topmost loop heads backedges. */
1398                         set_irn_n(loophead, i, last_pred);
1399                         for_each_phi(loophead, phi) {
1400                                 ir_node *last_phi = get_last_copy(phi);
1401                                 ir_node *pred = get_irn_n(last_phi, i);
1402                                 set_irn_n(phi, i, pred);
1403                         }
1404                 }
1405         }
1406
1407         headnode = get_copy(loophead);
1408         for_each_copy(headnode) {
1409                 set_irn_in(headnode, unroll_arity, get_node_info(headnode)->ins);
1410                 for_each_phi(headnode, phi) {
1411                         set_irn_in(phi, unroll_arity, get_node_info(phi)->ins);
1412                 }
1413         }
1414 }
1415
1416 /**
1417  * Loop unrolling
1418  * Could be improved with variable range informations.
1419  */
1420 void loop_unrolling(void)
1421 {
1422         int i, j;
1423         ir_node **entry_buffer;
1424         int entry_c = 0;
1425
1426         unroll_times = 2;
1427
1428         cur_loop_outs = NEW_ARR_F(out_edge, 0);
1429         irg_walk_graph( current_ir_graph, get_loop_outs, NULL, NULL );
1430
1431         ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED);
1432         NEW_ARR_A(ir_node *, entry_buffer, ARR_LEN(cur_loop_outs));
1433
1434         /* duplicate whole loop content */
1435         inc_irg_visited(current_ir_graph);
1436
1437         for(i = 0; i < ARR_LEN(cur_loop_outs); i++) {
1438                 out_edge entry = cur_loop_outs[i];
1439                 ir_node *node = entry.node;
1440                 ir_node *pred = get_irn_n(entry.node, entry.pred_irn_n);
1441
1442                 /* build linked list of copied nodes/blocks */
1443                 if (is_Block(node)) {
1444                         for (j = 0; j < unroll_times-1; ++j) {
1445                                 copy_walk(pred, is_in_loop, cur_loop);
1446                                 duplicate_preds(node, entry.pred_irn_n, get_copy(pred));
1447
1448                                 pred = get_copy(pred);
1449                         }
1450                 } else {
1451                         for (j = 0; j < unroll_times-1; ++j) {
1452                                 copy_walk(pred, is_in_loop, cur_loop);
1453                                 if (!is_End(node))              /* leave out keepalives */
1454                                         /* Node node is user of a value defined inside the loop.
1455                                          * We need a phi since we duplicated the loop. */
1456                                         /* Cannot construct_ssa here, because that would need another walker. */
1457                                         entry_buffer[entry_c++] = pred;
1458
1459                                 pred = get_copy(pred);
1460                         }
1461                 }
1462         }
1463
1464         ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED);
1465         DEL_ARR_F(cur_loop_outs);
1466
1467         /* Line up the floating copies. */
1468         unrolling_fix_cf();
1469
1470         /* Generate phis for all loop outs */
1471         construct_ssa_foreach(entry_buffer, entry_c);
1472 }
1473
1474 /* Initialization and */
1475 static void init_analyze(ir_loop *loop)
1476 {
1477         /* Init new for every loop */
1478         cur_loop = loop;
1479
1480         loop_cf_head = NULL;
1481         loop_cf_head_valid = 1;
1482         loop_inv_head = NULL;
1483         loop_peeled_head = NULL;
1484
1485         loop_info.calls = 0;
1486         loop_info.invariant_loads = 0;
1487         loop_info.loads = 0;
1488         loop_info.stores = 0;
1489         loop_info.opnodes_n = 0;
1490         loop_info.blocks = 0;
1491         loop_info.outs = 0;
1492
1493         DB((dbg, LEVEL_2, "  >>>> current loop includes node %ld <<<\n", get_irn_node_nr(get_loop_node(loop, 0))));
1494
1495         irg_walk_graph(current_ir_graph, get_loop_info, NULL, NULL);
1496
1497         /* RETURN if there is no valid head */
1498         if (!loop_cf_head || !loop_cf_head_valid) {
1499                 DB((dbg, LEVEL_2,   "No valid loop head. Nothing done.\n"));
1500                 return;
1501         }
1502
1503         if (enable_peeling)
1504                 loop_peeling();
1505
1506         if (enable_inversion)
1507                 loop_inversion();
1508         if (enable_unrolling)
1509                 loop_unrolling();
1510
1511 #if 0
1512         /* RETURN if there is a call in the loop */
1513         if (loop_info.calls)
1514                 return;
1515 #endif
1516
1517         DB((dbg, LEVEL_2, "      <<<< end of loop with node %ld >>>>\n", get_irn_node_nr(get_loop_node(loop, 0))));
1518 }
1519
1520 /* Find most inner loops and send them to analyze_loop */
1521 static void find_most_inner_loop(ir_loop *loop)
1522 {
1523         /* descend into sons */
1524         int sons = get_loop_n_sons(loop);
1525
1526         if (sons == 0) {
1527                 loop_element elem;
1528                 int el_n, i;
1529
1530                 el_n = get_loop_n_elements(loop);
1531
1532                 for (i=0; i < el_n; ++i) {
1533                         elem = get_loop_element(loop, i);
1534                         /* We can only rely on the blocks,
1535                          * as the loop attribute of the nodes seems not to be set. */
1536                         if (is_ir_node(elem.kind) && is_Block(elem.node)) {
1537                                 ARR_APP1(ir_node *, loops, elem.node);
1538                                 DB((dbg, LEVEL_5, "Found most inner loop (contains block %+F)\n", elem.node));
1539                                 break;
1540                         }
1541                 }
1542         } else {
1543                 int s;
1544                 for(s=0; s<sons; s++) {
1545                         find_most_inner_loop(get_loop_son(loop, s));
1546                 }
1547         }
1548 }
1549
1550 /**
1551  * Assure preconditions are met and go through all loops.
1552  */
1553 void loop_optimization(ir_graph *irg)
1554 {
1555         ir_loop *loop;
1556         int     i, sons, nr;
1557
1558         /* Init */
1559         link_node_state_list = NULL;
1560         set_current_ir_graph(irg);
1561
1562         /* preconditions */
1563         edges_assure(irg);
1564         assure_irg_outs(irg);
1565         /* NOTE: sets only the loop attribute of blocks, not nodes */
1566         /* NOTE: Kills links */
1567         assure_cf_loop(irg);
1568
1569         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1570         collect_phiprojs(irg);
1571         ir_free_resources(irg, IR_RESOURCE_IRN_LINK);
1572
1573         /* allocate node_info for additional information on nodes */
1574         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK);
1575         irg_walk_graph(current_ir_graph, alloc_node_info, NULL, NULL);
1576
1577         loop = get_irg_loop(irg);
1578         sons = get_loop_n_sons(loop);
1579
1580         loops = NEW_ARR_F(ir_node *, 0);
1581
1582         for (nr = 0; nr < sons; ++nr) {
1583                 find_most_inner_loop(get_loop_son(loop, nr));
1584         }
1585
1586 /* TODO Keep backedges during optimization to avoid this unecessary dealloc/alloc.
1587  * (set_irn_in seems to destroy them)
1588  */
1589 #if 0
1590         for (i = 0; i < ARR_LEN(loops); ++i) {
1591                 ir_loop *loop;
1592
1593                 loop = get_irn_loop(loops[i]);
1594                 init_analyze(loop);
1595         }
1596 #else
1597         /* This part is useful for testing
1598          * or has to be used if the backedge information is destroyed.
1599          * Which is the case at the moment, because the backedge information gets lost
1600          * before inversion_fix_heads/unrolling_fix_cf, which results in bads.
1601          * NOTE!: Testsuite runs successfully nevertheless...
1602          */
1603
1604         /**
1605          * assure_cf_loop() creates a completely new loop tree.
1606          * Thus we cannot optimize a loop, assure_cf_loop() and continue with the next loop,
1607          * as the next loop must be searched because it is not distinguishable from the
1608          * already done loops.
1609          * The links of the loops are also not available anymore (to store a "loop done" flag).
1610          * Therefore we save a block per loop.
1611          * NOTE: We rely on the loop optimizations not to remove any block from the loop.
1612          * Later, we fetch the blocks loop attribute, as it is updated by assure_cf_loop.
1613          */
1614         for (i = 0; i < ARR_LEN(loops); ++i) {
1615                 ir_loop *loop;
1616
1617                 free_node_info();
1618                 ir_free_resources(irg, IR_RESOURCE_IRN_LINK);
1619
1620                 edges_assure(current_ir_graph);
1621                 assure_irg_outs(current_ir_graph);
1622
1623                 /* NOTE: sets only the loop attribute of blocks */
1624                 /* NOTE: Kills links */
1625                 assure_cf_loop(current_ir_graph);
1626
1627                 irg_walk_graph(current_ir_graph, alloc_node_info, NULL, NULL);
1628                 ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK);
1629
1630                 /* Get loop from block */
1631                 loop = get_irn_loop(loops[i]);
1632                 init_analyze(loop);
1633         }
1634 #endif
1635
1636         /* Free */
1637         DEL_ARR_F(loops);
1638
1639         free_node_info();
1640         ir_free_resources(irg, IR_RESOURCE_IRN_LINK);
1641         ir_free_resources(irg, IR_RESOURCE_PHI_LIST);
1642 }
1643
1644 void do_loop_unrolling(ir_graph *irg)
1645 {
1646         enable_unrolling = 1;
1647         enable_peeling = 0;
1648         enable_inversion = 0;
1649
1650         DB((dbg, LEVEL_2, " >>> unrolling (Startnode %ld) <<<\n",
1651                 get_irn_node_nr(get_irg_start(irg))));
1652
1653         loop_optimization(irg);
1654
1655         DB((dbg, LEVEL_2, " >>> unrolling done (Startnode %ld) <<<\n",
1656                 get_irn_node_nr(get_irg_start(irg))));
1657 }
1658
1659 void do_loop_inversion(ir_graph *irg)
1660 {
1661         enable_unrolling = 0;
1662         enable_peeling = 0;
1663         enable_inversion = 1;
1664
1665         DB((dbg, LEVEL_2, " >>> inversion (Startnode %ld) <<<\n",
1666                 get_irn_node_nr(get_irg_start(irg))));
1667
1668         loop_optimization(irg);
1669
1670         DB((dbg, LEVEL_2, " >>> inversion done (Startnode %ld) <<<\n",
1671                 get_irn_node_nr(get_irg_start(irg))));
1672 }
1673
1674 void do_loop_peeling(ir_graph *irg)
1675 {
1676         enable_unrolling = 0;
1677         enable_peeling = 1;
1678         enable_inversion = 0;
1679
1680         DB((dbg, LEVEL_2, " >>> peeling (Startnode %ld) <<<\n",
1681                 get_irn_node_nr(get_irg_start(irg))));
1682
1683         loop_optimization(irg);
1684
1685         DB((dbg, LEVEL_2, " >>> peeling done (Startnode %ld) <<<\n",
1686                 get_irn_node_nr(get_irg_start(irg))));
1687
1688 }
1689
1690 void firm_init_loop_opt(void)
1691 {
1692         FIRM_DBG_REGISTER(dbg, "firm.opt.loop");
1693 }