Initial version of loop peeling
[libfirm] / ir / opt / loop.c
1 /*
2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief    Loop peeling and unrolling
23  * @author   Christian Helmer
24  * @version  $Id$
25  */
26
27 //#include "config.h"
28
29 //#include <limits.h>
30 #include <assert.h>
31
32 #include "irnode.h"
33 #include "irnode_t.h"
34 #include "irgraph_t.h"
35 //#include "irprog_t.h"
36
37 //#include "iroptimize.h"
38 #include "ircons_t.h"
39 #include "iropt_t.h"
40 #include "irgopt.h"
41 //#include "irgmod.h"
42 #include "irgwalk.h"
43
44 //#include "array_t.h"
45 #include "list.h"
46 //#include "pset.h"
47 //#include "pmap.h"
48 //#include "pdeq.h"
49 //#include "xmalloc.h"
50 //#include "pqueue.h"
51
52 #include "irouts.h"
53 #include "irloop_t.h"
54 #include "irbackedge_t.h"
55 //#include "opt_inline_t.h"
56 //#include "cgana.h"
57 //#include "trouts.h"
58 //#include "error.h"
59
60 //#include "analyze_irg_args.h"
61 #include "iredges_t.h"
62 //#include "irflag_t.h"
63 //#include "irhooks.h"
64 #include "irtools.h"
65 //#include "iropt_dbg.h"
66 #include "irpass_t.h"
67 #include "irloop.h"
68
69 #include "array_t.h"
70 #include "irdump.h"
71
72
73 /* convenience macro iterating over every phi node of the block */
74 #define for_each_phi(block, phi) \
75         for ( (phi) = get_Block_phis( (block) ); (phi) ; (phi) = get_Phi_next( (phi) ) )
76
77 ir_loop *cur_loop;
78
79 /* The loop walker should be possible to abort if nothing can be done anymore */
80 typedef unsigned irg_walk_func_abortable(ir_node *, void *);
81
82 /* stores pair of node and number for nodes predecessor */
83 typedef struct loop_entry_t {
84         ir_node *node;                  /* node outside of the loop */
85         int pred_irn_n;                 /* with pred_irn_n pointing inside loop */
86 } loop_entry_t;
87
88 /* Store complex values in the nodes link */
89 //TODO optimize. Every node has these values and seldom many otm are used.
90 typedef struct link_node_state_t {
91         unsigned cloned:1;
92         unsigned temp:1;
93         unsigned invariant:1;
94         ir_node *link;
95         ir_node *ssalink;       /* we will have to keep the link to the copies, as well as have temporary links for ssa creation */
96         ir_node **ins;          /* ins for phi nodes, during rewiring of blocks */
97         // TODO omit ins. can be replaced by new ins and newunknown ins for each
98 } link_node_state_t;
99
100
101 loop_entry_t *loop_entries;             /* loop entries (from below) in the node graph */
102 loop_entry_t *backedges;                /* backedges exclusively from the current loop */
103 loop_entry_t *alien_backedges;  /* The head can be head of several loops. */
104 loop_entry_t *head_edges;       /* The head can be head of several loops. */
105
106 ir_node *loop_cf_head = NULL;           /* loop exit in the node graph */
107 unsigned loop_cf_head_valid = 1;        /* a loop may/must have one head, otherwise invalid */
108
109 unsigned has_sto = 0;                           /* If we store inside the loop we might
110                                                                          * have disambiguation problems */
111 //DBG
112 //void arrdump(ir_node **arr)
113 //{
114 //      int i;
115 //      for (i=0; i<ARR_LEN(arr); i++)
116 //      {
117 //              printf("inarr: %ld          is block %d \n", (arr[i]->node_nr), is_Block(arr[i]));
118 //      }
119 //}
120
121 /**
122  * Returns the state of the given node.
123  */
124 link_node_state_t *get_lstate(ir_node *n)
125 {
126         return ((link_node_state_t *)n->link);
127 }
128
129 /**
130  * Returns the link inside of the nodes state which is pointing to its copy
131  * most of the time during loop peeling.
132  */
133 ir_node *get_copy_of(ir_node *n)
134 {
135         return ((link_node_state_t *)n->link)->link;
136 }
137
138 /**
139  * Returns true if the node or block is in cur_loop.
140  */
141 unsigned is_in_loop(ir_node *node)
142 {
143 //      if (is_Block(node)) {
144 //              if (node->loop == cur_loop) {
145 //                      printf("                                   INLOOP %ld \n", node->node_nr);
146 //              }
147 //              return (node->loop == cur_loop);
148 //      } else {
149 //              if ( get_nodes_block(node)->loop == cur_loop ) {
150 //                      printf("                                   INLOOP %ld \n", node->node_nr);
151 //              }
152 //              return ( get_nodes_block(node)->loop == cur_loop );
153 //      }
154         if (is_Block(node)) {
155                 return (node->loop == cur_loop);
156         } else {
157                 return ( get_nodes_block(node)->loop == cur_loop );
158         }
159 }
160
161 /**
162  * Returns if the given be is an alien edge
163  */
164 unsigned is_alien_edge(ir_node *n, int i)
165 {
166         return( !is_in_loop( get_irn_n( n, i ) ) );
167 }
168
169 static void add_pred(ir_node* node, ir_node* x)
170 {
171         ir_node** ins;
172         int n;
173         int i;
174
175 //      if(!node)
176 //              printf("NONODE\n");
177
178         //printf("addpred %ld   pred %ld \n", node->node_nr, x->node_nr);
179
180         // WHY limit it to blocks and phi?
181         //assert(is_Block(node) || is_Phi(node));
182
183         n = get_irn_arity(node);
184         NEW_ARR_A(ir_node*, ins, n + 1);
185         for (i = 0; i < n; i++)
186                 ins[i] = get_irn_n(node, i);
187         ins[n] = x;
188         set_irn_in(node, n + 1, ins);
189 }
190
191 void block_phi_walker(ir_node *n, void *env)
192 {
193         const ir_edge_t *edge;
194         (void) env;
195
196         /* RETURN */
197         if (!is_Block(n))
198                 return;
199
200         /* generate phi list for every block */
201         n->attr.block.phis = NULL;
202
203         foreach_out_edge(n, edge) {
204                 ir_node *src = get_edge_src_irn(edge);
205                 if (is_Phi(src))
206                 {
207                         //printf("%ld has phi %ld \n", block->node_nr, src->node_nr);
208                         add_Block_phi(n, src);
209                 }
210         }
211 }
212
213 /**
214  * Calls func() for every block in the given loop.
215  */
216 void for_each_loop_block(ir_loop *loop, irg_walk_func *func, void *env)
217 {
218         int elements, e;
219         elements = get_loop_n_elements(loop);
220
221         for(e=0; e<elements; e++)
222         {
223                 loop_element elem = get_loop_element(loop, e);
224                 if  (is_ir_node(elem.kind) && is_Block(elem.node) )
225                 {
226                         //printf(" foreach LOOPBLOCK %ld \n", elem.node->node_nr);
227                         func(elem.node, env);
228                 }
229         }
230 }
231
232 /**
233  * collects the blocks backedges and creates the phi list for every block
234  */
235 void collect_backedges(ir_node *block, void *env)
236 {
237         (void) env;
238
239         printf("LOOP BLOCK %ld\n", block->node_nr);
240
241         /* collect backedges */
242         if (has_backedges(block))
243         {
244                 int i;
245                 int arity = get_irn_arity(block);
246
247                 for(i = 0; i < arity; ++i) {
248                         ir_node *pred = get_irn_n(block, i);
249
250                         loop_entry_t be;
251                         be.node = block;
252                         be.pred_irn_n = i;
253
254                         ARR_APP1(loop_entry_t, head_edges, be);
255
256                         if (is_backedge(block, i) )
257                         {
258                                 if ( is_in_loop(pred) ) {
259                                         //printf("be: %ld --> %ld \n", block->node_nr, pred->node_nr);
260                                         ARR_APP1(loop_entry_t, backedges, be);
261                                 } else {
262                                         //printf("alien be: %ld --> %ld \n", block->node_nr, pred->node_nr);
263                                         ARR_APP1(loop_entry_t, alien_backedges, be);
264                                 }
265                         }
266 //                      else {
267 //                              if ( !is_in_loop(pred) ) {
268 //                                      ARR_APP1(loop_entry_t, head_edges, be);
269 //                      }
270
271                 }
272         }
273 }
274
275 /**
276  *      Walks through all loop nodes.
277  */
278 unsigned loop_walker_rec(ir_node *node,
279                 irg_walk_func_abortable *pre,
280                 irg_walk_func_abortable *post, void * env)
281 {
282         int i;
283         unsigned stop = 0;
284
285         ir_graph *irg = current_ir_graph;
286
287         /* RETURN if we walked out of the loop*/
288         if (!is_in_loop(node))
289                 return 0;
290
291         if (pre)
292         {
293                 unsigned stop = pre(node, env);
294                 if (stop)
295                         return stop;
296         }
297
298         set_irn_visited(node, irg->visited);
299
300         if (node->op != op_Block) {
301                 ir_node *pred = get_irn_n(node, -1);
302                 if (pred->visited < irg->visited)
303                 {
304                         stop = loop_walker_rec(pred, pre, post, env);
305                         if (stop)
306                                 return stop;
307                 }
308         }
309
310         for (i = get_irn_arity(node) - 1; i >= 0; --i) {
311                 ir_node *pred = get_irn_n(node, i);
312                 if (pred->visited < irg->visited)
313                 {
314                         stop = loop_walker_rec(pred, pre, post, env);
315                         if (stop)
316                                 return stop;
317                 }
318         }
319
320         if (post)
321                 return post(node, env);
322         return 0;
323 }
324
325 /**
326  * Walks through loop nodes.
327  * The entries of the loop (all edges pointing into the loop) have to be given.
328  */
329 unsigned loop_walker(loop_entry_t *entries,
330                 irg_walk_func_abortable *pre, irg_walk_func_abortable *post, void * env)
331 {
332         int i;
333         int stop = 0;
334
335         for (i=0; !stop && i<ARR_LEN(entries); i++)
336         {
337                 ir_node *pred;
338                 loop_entry_t entry;
339                 entry = entries[i];
340
341                 pred = get_irn_n( entry.node , entry.pred_irn_n);
342
343                 stop = loop_walker_rec( pred, pre, post, env);
344         }
345         return stop;
346 }
347
348 /**
349  *
350  */
351 void find_loop_entries_walk(ir_node *node, void *env)
352 {
353         unsigned node_in_loop, pred_in_loop;
354         (void) env;
355
356         link_node_state_t *state = XMALLOCZ(link_node_state_t);
357         node->link = (void *)state;
358
359         int i, arity;
360         arity = get_irn_arity(node);
361         for (i = 0; i < arity; i++) {
362                 ir_node *pred = get_irn_n(node, i);
363
364                 pred_in_loop = is_in_loop(pred);
365                 node_in_loop = is_in_loop(node);
366
367                 //Find the loops head/the blocks with cfpred outside of the loop
368                 if (is_Block(node) && node_in_loop
369                                 && !pred_in_loop && loop_cf_head_valid)
370                 {
371                         ir_node *cfgpred = get_Block_cfgpred(node, i);
372
373                         if ( !is_in_loop(cfgpred) )
374                         {
375                                 //another head? We do not touch this.
376                                 if (loop_cf_head && loop_cf_head != node)
377                                 {
378                                         loop_cf_head_valid = 0;
379                                 }
380                                 else
381                                 {
382                                         loop_cf_head = node;
383                                 }
384                         }
385                 }
386
387                 if ( pred_in_loop && !node_in_loop )
388                 {
389                         /* we walked right into the loop. */
390                         loop_entry_t entry;
391                         entry.node = node;
392                         entry.pred_irn_n = i;
393
394                         //DBG
395 //                      printf("inloop: %ld --> inloop %ld (@ %d)  \n",
396 //                                      node->node_nr, pred->node_nr, i);
397
398                         ARR_APP1(loop_entry_t, loop_entries, entry);
399                 }
400         }
401 }
402
403 // TODO needed?
404 ///**
405 // * Finds invariant nodes and marks them as invariant.
406 // * (Post walk)
407 // */
408 //unsigned get_invariants(ir_node *node, void *env)
409 //{
410 //      unsigned invar = 1;
411 //      (void) env;
412 //
413 //      if (is_Store(node))
414 //      {
415 //              has_sto = 1;
416 //              /* RETURN and abort walker */
417 //              return 1;
418 //      }
419 //
420 //      int arity = get_irn_arity(node);
421 //
422 //      /* RETURN, no preds to visit */
423 //      if (arity == 0) return 0;
424 //
425 //      if (is_Load(node))
426 //      {
427 //              assert(arity>=2 && "expected load to have edge nr 1 (address)");
428 //
429 //              ir_node *pred = get_irn_n(node, 1);
430 //              if (!is_in_loop(pred)                   /* Everything outside the loop is considered invariant */
431 //                              || is_Const(pred)               /* This is not true, but we also want the quasi-invariants. */
432 //                              || is_SymConst(pred)
433 //                              || get_lstate(node)->invariant)
434 //              {
435 //                      //printf("## CONSTLOAD: %ld \n", node->node_nr);
436 //                      get_lstate(node)->invariant = 1;
437 //              } else
438 //              {
439 //                      get_lstate(node)->invariant = 0;
440 //              }
441 //      }
442 //      else
443 //      {
444 //              int i;
445 //              invar = 1;
446 //              /* find loop variant preds */
447 //              for(i = 0; i < arity; ++i)
448 //              {
449 //                      ir_node *pred = get_irn_n(node, i);
450 //
451 //                      if ( !(!is_in_loop(pred)        /* outside loop is loop invariant */
452 //                                      || is_Const(pred)                       /* constants */
453 //                                      || is_SymConst(pred)            /* SymConst, if no Store */
454 //                                      || get_lstate(node)->invariant /* pred is marked as invariant */
455 //                                      ) )
456 //                      {
457 //                              invar = 0;
458 //                      }
459 //              }
460 //
461 //              if (invar) {
462 //                      printf("const: %ld \n", node->node_nr);
463 //                      get_lstate(node)->invariant = 1;
464 //              } else {
465 //                      get_lstate(node)->invariant = 0;
466 //              }
467 //// DBG
468 ////                            if (!is_nodes_block_marked(pred)) {
469 ////                                    //printf("pred outloop: %ld, pred %ld (const)\n", node->node_nr, pred->node_nr);
470 ////                            } else if (is_Const(pred) || is_SymConst(pred)) // || is_Phi(pred)) {
471 ////                                    //printf("predconst: %ld, pred %ld CONST\n", node->node_nr, pred->node_nr);
472 ////                            } else if (pred->link == MARKED_CONST) {
473 ////                                    //printf("predmarked: %ld, pred %ld const\n", node->node_nr, pred->node_nr);
474 ////                            } else {
475 ////                                    mark=0;
476 ////                            }
477 //      }
478 //      return 0;
479 //}
480
481 ////TODO DBG Remove
482 void phifix(ir_node *node, ir_node *newpred)
483 {
484         ir_node *phi=get_Block_phis(node);
485         while(phi)
486         {
487                 int pa = get_irn_arity(phi);
488                 int ba = get_irn_arity(node);
489
490
491
492                 while(ba>pa)
493                 {
494                         printf("!!!!!!!!!! block has %d, phi had %d\n", ba, pa );
495                         add_pred(phi, newpred);
496                         pa++;
497                         printf("!!!!!!!!!! block has %d, phi has now %d\n", ba, pa );
498                 }
499                 phi=get_Phi_next(phi);
500         }
501 }
502
503 static ir_node *ssa_second_def;
504 static ir_node *ssa_second_def_block;
505
506 static ir_node *search_def_and_create_phis(ir_node *block, ir_mode *mode,
507                                            int first)
508 {
509         int i;
510         int n_cfgpreds;
511         ir_graph *irg;
512         ir_node *phi;
513         ir_node **in;
514
515         /* This is needed because we create bads sometimes */
516         if (is_Bad(block))
517                 return new_Bad();
518
519         /* the other defs can't be marked for cases where a user of the original
520          * value is in the same block as the alternative definition.
521          * In this case we mustn't use the alternative definition.
522          * So we keep a flag that indicated wether we walked at least 1 block
523          * away and may use the alternative definition */
524         if (block == ssa_second_def_block && !first) {
525                 return ssa_second_def;
526         }
527
528         /* already processed this block? */
529         if (irn_visited(block)) {
530                 ir_node *value = get_lstate(block)->ssalink;
531                 return value;
532         }
533
534         irg = get_irn_irg(block);
535         assert(block != get_irg_start_block(irg));
536
537         /* a Block with only 1 predecessor needs no Phi */
538         n_cfgpreds = get_Block_n_cfgpreds(block);
539         if (n_cfgpreds == 1) {
540                 ir_node *pred_block = get_Block_cfgpred_block(block, 0);
541                 ir_node *value      = search_def_and_create_phis(pred_block, mode, 0);
542
543                 get_lstate(block)->ssalink = value;
544                 //set_irn_link(block, value);
545                 mark_irn_visited(block);
546                 return value;
547         }
548
549         /* create a new Phi */
550         NEW_ARR_A(ir_node*, in, n_cfgpreds);
551         for(i = 0; i < n_cfgpreds; ++i)
552                 in[i] = new_Unknown(mode);
553
554         phi = new_r_Phi(block, n_cfgpreds, in, mode);
555         //set_irn_link(block, phi);
556         get_lstate(block)->ssalink = phi;
557         mark_irn_visited(block);
558
559         /* set Phi predecessors */
560         for(i = 0; i < n_cfgpreds; ++i) {
561                 ir_node *pred_block = get_Block_cfgpred_block(block, i);
562                 ir_node *pred_val   = search_def_and_create_phis(pred_block, mode, 0);
563
564                 set_irn_n(phi, i, pred_val);
565         }
566         return phi;
567 }
568
569 /**
570  * Given a set of values this function constructs SSA-form for the users of the
571  * first value (the users are determined through the out-edges of the value).
572  * Uses the irn_visited flags. Works without using the dominance tree.
573  */
574 static void construct_ssa(ir_node *orig_block, ir_node *orig_val,
575                           ir_node *second_block, ir_node *second_val)
576 {
577         ir_graph *irg;
578         ir_mode *mode;
579         const ir_edge_t *edge;
580         const ir_edge_t *next;
581
582         /* no need to do anything */
583         if (orig_val == second_val)
584                 return;
585
586         irg = get_irn_irg(orig_val);
587         inc_irg_visited(irg);
588
589         mode = get_irn_mode(orig_val);
590         get_lstate(orig_block)->ssalink = orig_val;
591         //set_irn_link(orig_block, orig_val);
592         mark_irn_visited(orig_block);
593
594         ssa_second_def_block = second_block;
595         ssa_second_def       = second_val;
596
597         /* Only fix the users of the first, i.e. the original node */
598         foreach_out_edge_safe(orig_val, edge, next) {
599                 ir_node *user = get_edge_src_irn(edge);
600                 int j = get_edge_src_pos(edge);
601                 ir_node *user_block = get_nodes_block(user);
602                 ir_node *newval;
603
604                 /* ignore keeps */
605                 if (is_End(user))
606                         continue;
607
608                 //DB((dbg, LEVEL_3, ">>> Fixing user %+F (pred %d == %+F)\n", user, j, get_irn_n(user, j)));
609
610                 if (is_Phi(user)) {
611                         ir_node *pred_block = get_Block_cfgpred_block(user_block, j);
612                         newval = search_def_and_create_phis(pred_block, mode, 1);
613                 } else {
614                         newval = search_def_and_create_phis(user_block, mode, 1);
615                 }
616
617                 /* don't fix newly created Phis from the SSA construction */
618                 if (newval != user) {
619                         //DB((dbg, LEVEL_4, ">>>> Setting input %d of %+F to %+F\n", j, user, newval));
620                         set_irn_n(user, j, newval);
621                 }
622         }
623 }
624
625
626
627 /**
628  * Rewires the heads after peeling
629  */
630 void fix_head(ir_node *loophead)
631 {
632         int headarity = get_irn_arity(loophead);
633         int i;
634         ir_node **loopheadnins;
635         ir_node **peelheadnins;
636         ir_node *phi;
637         ir_node *peelhead = get_copy_of(loophead);
638         int lheadin_c = 0;
639         int pheadin_c = 0;
640
641         /**
642          * the loopheads new preds are:
643          * its own backedge(s) and the former backedge(s) of the peeled code
644          */
645         int lhead_arity = 2 * ARR_LEN(backedges);
646         int phead_arity = headarity - ARR_LEN(backedges);
647
648         NEW_ARR_A(ir_node *, loopheadnins, lhead_arity );
649         NEW_ARR_A(ir_node *, peelheadnins, phead_arity );
650
651         phi = get_Block_phis(loophead);
652         while(phi) {
653                 NEW_ARR_A(ir_node *, get_lstate(phi)->ins, lhead_arity);
654                 phi=get_Phi_next(phi);
655         }
656
657         phi = get_Block_phis(peelhead);
658         while(phi)
659         {
660                 NEW_ARR_A(ir_node *, get_lstate(phi)->ins, phead_arity);
661                 phi=get_Phi_next(phi);
662         }
663
664         for (i = 0; i < headarity; i++)
665         {
666                 ir_node *phi;
667                 ir_node *orgjmp = get_irn_n(loophead, i);
668                 ir_node *copyjmp = get_copy_of(orgjmp);
669
670                 /**
671                  * Rewire the head blocks ins and their phi ins.
672                  * Requires blocks phi list.
673                  *
674                  * 1. Alien bes origin from the peeled head (new head of the whole loop)
675                  * 2. Loops own bes must be kept/copied to the loophead.
676                  * 3. All other edges origin from the peeled head (new head of the loop)
677                  */
678
679
680                 //printf("head i %d\n", i);
681
682                 if (is_backedge(loophead, i))
683                 {
684                         if (is_alien_edge(loophead, i)) {
685                                 peelheadnins[pheadin_c] = orgjmp;       /* alien bes go to the peeled head */
686                                 //set_backedge(peelhead, pheadin_c);
687
688                                 // alien bes origin at the peeled head
689                                 for_each_phi(peelhead, phi)
690                                 {
691                                         //printf("alienbe phi %ld @ %d -> %ld\n", phi->node_nr, i, get_irn_n(phi, i)->node_nr);
692                                         get_lstate( phi )->ins[pheadin_c] = get_irn_n(phi, i);
693                                 }
694                                 //printf("alienbe %ld @ %d -> add to peelhead orgjump %ld\n", peelhead->node_nr, i, orgjmp->node_nr);
695                                 ++pheadin_c;
696                         } else {
697                                 loopheadnins[lheadin_c] = orgjmp;       /* keep/copy the loops own bes */
698                                 //set_backedge(loophead, lheadin_c);
699
700                                 for_each_phi(loophead, phi) {
701                                         //printf("normalbe phi %ld @ %d -> %ld\n", phi->node_nr, i, get_irn_n(phi, i)->node_nr);
702                                         get_lstate( phi )->ins[lheadin_c] = get_irn_n(phi, i);
703                                 }
704                                 //printf("normalbe %ld @ %d -> add to loophead orgjump %ld\n", loophead->node_nr, i, orgjmp->node_nr);
705                                 ++lheadin_c;
706
707                                 loopheadnins[lheadin_c] = copyjmp;      /* former bes of the peeled code origin now from the loophead */
708                                 //set_not_backedge(loophead, lheadin_c);
709
710                                 /* get_irn_n( get_copy_of(phi), i) <!=> get_copy_of(get_irn_n( phi, i))
711                                  * Order is crucial! Preds outside of the loop are non existent, like Const.
712                                  */
713                                 for_each_phi(loophead, phi) {
714                                         //printf("normalbe phi %ld @ %d -> %ld\n", phi->node_nr, i,  get_irn_n( get_copy_of(phi), i)->node_nr);
715                                         get_lstate( phi )->ins[lheadin_c] =     get_irn_n( get_copy_of(phi), i) ;
716                                 }
717                                 //printf("normalbe %ld @ %d -> add to loophead copyjump %ld\n", loophead->node_nr, i, copyjmp->node_nr);
718                                 ++lheadin_c;
719                         }
720                 } else {
721                         peelheadnins[pheadin_c] = orgjmp;
722                         //set_not_backedge(peelhead, pheadin_c);
723
724                         for_each_phi(peelhead, phi) {
725                                 //printf("edge phi %ld @ %d -> %ld\n", phi->node_nr, i, get_irn_n( phi, i)->node_nr);
726                                 get_lstate( phi )->ins[pheadin_c] = get_irn_n(phi, i);
727                         }
728                         //printf("edge %ld @ %d -> add to peelhead orgjump %ld\n", peelhead->node_nr, i, orgjmp->node_nr);
729                         ++pheadin_c;
730                 }
731         }/* for */
732
733 //      printf("pheadin %d  arr %d            lheadin %d arr %d \n",
734 //                      pheadin_c, ARR_LEN(peelheadnins),
735 //                      lheadin_c, ARR_LEN(loopheadnins));
736
737         assert(pheadin_c == ARR_LEN(peelheadnins) &&
738                         lheadin_c == ARR_LEN(loopheadnins) &&
739                         "the number of head elements does not match the predefined one");
740
741         set_irn_in(loophead, ARR_LEN(loopheadnins), loopheadnins);
742         set_irn_in(peelhead, ARR_LEN(peelheadnins), peelheadnins);
743
744         for_each_phi(loophead, phi) {
745                 ir_node **ins = get_lstate( phi )->ins;
746                 set_irn_in(phi, lhead_arity, ins);
747         }
748
749         for_each_phi(peelhead, phi) {
750                 ir_node **ins = get_lstate( phi )->ins;
751                 set_irn_in(phi, phead_arity, ins);
752         }
753 }
754
755
756 /**
757  * Peels the loop by copying the contents. Graph needs some rewiring after that.
758  */
759 void peel_walk(ir_node *node, void *env)
760 {
761         int i;
762         int arity;
763         ir_node *cp;
764         ir_node **cpin;
765         ir_graph *irg = current_ir_graph;
766         link_node_state_t *cpstate;
767         (void) env;
768
769         link_node_state_t *nodestate = get_lstate(node);
770
771         /**
772          * break condition and cycle resolver, creating temporary node copies
773          */
774         if (node->visited >= irg->visited)
775         {
776                 if (!nodestate->cloned && !nodestate->temp)
777                 {
778                         /** temporary clone this node
779                          * because we were here before and would walk into a cycle
780                          */
781                         cp = exact_copy(node);
782                         //DBG
783                         //printf("COPY TEMP : %ld -T> %ld \n", node->node_nr, cp->node_nr);
784                         nodestate->link = cp;
785                         if (is_Block(cp))
786                                 cp->loop = NULL;
787                         cpstate = XMALLOCZ(link_node_state_t);
788                         cp->link = cpstate;
789                         nodestate->temp=1;
790                         set_irn_visited(cp, irg->visited);
791                 }
792                 return;
793         }
794         //printf("  -----  WALK %ld -----  \n", node->node_nr);
795
796         /**
797          * WALK
798          */
799         set_irn_visited(node, irg->visited);
800
801         if ( !is_Block(node) ) {
802                 ir_node *pred = get_irn_n(node, -1);
803                 if (is_in_loop(pred))
804                         peel_walk(pred, NULL);
805         }
806
807         arity = get_irn_arity(node);
808
809         NEW_ARR_A(ir_node *, cpin, arity);
810
811         for (i = get_irn_arity(node) - 1; i >= 0; --i) {
812                 ir_node *pred = get_irn_n(node, i);
813
814                 if (is_in_loop(pred))
815                 {
816                         peel_walk(pred, NULL);
817                         cpin[i] = get_lstate(pred)->link;
818                         //printf("copy of %ld gets in %ld", node->node_nr, cpin[i]->node_nr);
819                 } else {
820                         cpin[i] = pred;
821
822                 }
823                 //printf("copy of %ld gets in %ld \n", node->node_nr, cpin[i]->node_nr);
824         }
825
826         /**
827          *  copy node / finalize temp node
828          */
829         if (!nodestate->temp)
830         {
831 //              if (!is_Const(node) && !is_SymConst(node)) {
832                         cp = exact_copy(node);
833                         //DBG
834                         //printf("COPY FINAL: %ld -F> %ld \n", node->node_nr, cp->node_nr);
835                         nodestate->link = cp;
836                         cpstate = XMALLOCZ(link_node_state_t);
837                         cp->link = cpstate;
838                         if (is_Block(cp))
839                                 cp->loop = NULL;
840                         set_irn_visited(cp, irg->visited);
841 //              } else {
842 //                      cp = node;
843 //                      //DBG
844 //                      printf("CONST FINAL: %ld -F> %ld \n", node->node_nr, cp->node_nr);
845 //                      nodestate->link = cp;
846 //              }
847         } else {
848                 /* temporary copy is existent but without correct ins */
849                 cp = nodestate->link;
850                 //printf("FINALIZE: %ld \n", cp->node_nr);
851         }
852
853         //TODO REM
854         //add_End_keepalive(get_irg_end(current_ir_graph), cp );
855
856         if (!is_Block(node))
857         {
858                 ir_node *cpblock = get_copy_of(get_nodes_block(node));
859
860                 /* set the block of the copy to the copied block */
861                 //printf("    PRE  NODE %ld   BLOCK %ld \n", cp->node_nr, get_nodes_block(cp)->node_nr);
862                 set_nodes_block(cp, cpblock );
863                 //printf("    POST NODE %ld   BLOCK %ld \n", cp->node_nr, get_nodes_block(cp)->node_nr);
864
865                 /* fix the phi information in attr.phis (does not add the phi node to the block) */
866                 if( is_Phi(cp) )
867                 {
868                         add_Block_phi(cpblock, cp);
869                         //printf("PHI-BLOCK block %ld got its phi %ld\n", cpblock->node_nr, cp->node_nr);
870                 }
871         }
872         else {
873                 /* macroblock info is not copied */
874                 set_Block_MacroBlock(cp, cp);
875         }
876
877         //dbg valid ins?
878 //      for(i=0; i<ARR_LEN(cpin); i++)
879 //              printf(" cpin of %ld (cp %ld):  %ld \n ", node->node_nr, cp->node_nr, cpin[i]->node_nr);
880
881         set_irn_in(cp, ARR_LEN(cpin), cpin);
882
883 //      for(i=0; i< ARR_LEN(cpin); i++)
884 //      {
885 //              printf("ins %ld: %ld \n", cp->node_nr, cpin[i]->node_nr);
886 //      }
887
888 //TODO REM
889 //      if (!nodestate->temp)
890 //      {
891 //              nodestate->link = cp;
892 //              cpstate = XMALLOCZ(link_node_state_t);
893 //              cp->link = cpstate;
894 //      } else {
895 //              /* temporary copy is existent but without correct ins */
896 //              cp = nodestate->link;
897 //      }
898
899
900         nodestate->temp = 0;
901         nodestate->cloned = 1;
902 }
903
904 //void chklink (ir_node *n, void * e)
905 //{
906 //      ir_node *link = n->link;
907 //      link_node_state_t *l = (link_node_state_t *)link;
908 //
909 //      printf("n %ld\n", n->node_nr);
910 //      printf("l p %ld\n", l->link);
911 //      if (l->link)
912 //              printf("l %ld\n", l->link->node_nr);
913 //
914 //}
915
916 /**
917  * Loop peeling, and fix the cf for the loop entry nodes, which have now more preds
918  */
919 void peel(void)
920 {
921         int i;
922         ir_node **entry_buffer;
923         int entry_c = 0;
924         int entry_i;
925
926         NEW_ARR_A(ir_node *, entry_buffer, ARR_LEN(loop_entries));
927
928         for(i = 0; i < ARR_LEN(loop_entries); i++)
929         {
930                 loop_entry_t entry = loop_entries[i];
931                 ir_node *node = entry.node;
932                 ir_node *pred = get_irn_n(entry.node, entry.pred_irn_n);
933
934                 if (is_Block(node)) {
935                         /* node is block and the given pred points inside the loop  */
936                         ir_node *cppred;
937
938                         peel_walk( pred, 0);
939
940                         // leave keepalives out
941                         if (is_End(node) && (is_Block(pred) || is_Phi(pred)) ) {
942                                 //add_End_keepalive(get_irg_end(current_ir_graph), get_copy_of(pred) );
943                         } else {
944                                 cppred = get_copy_of(pred);
945                                 //printf("fix block entry %ld to cp %ld\n", node->node_nr, cppred->node_nr);
946                                 add_pred( node, cppred );
947                                 //printf("fix block entry %ld to cp %ld\n", node->node_nr, cppred->node_nr);
948                         }
949
950                         //add_End_keepalive(get_irg_end(current_ir_graph), get_copy_of(pred) );
951
952                         //DBG
953                         //phifix(node, cppred);
954                 } else {
955                         /* node is somewhere in the graph, outside of the loop */
956                         //ir_node *cppred;
957                         //ir_node *block;
958                         //ir_node *cpblock;
959                         peel_walk( pred, 0);
960
961                         // no ssa for keepalives
962                         if (is_End(node) && (is_Block(pred) || is_Phi(pred)) ) {
963                                 //add_End_keepalive(get_irg_end(current_ir_graph), get_copy_of(pred) );
964                         } else {
965                                 //printf("fix entry %ld to %ld\n", node->node_nr, pred->node_nr);
966                                 entry_buffer[entry_c++] = pred;
967                         }
968
969                         //add_End_keepalive(get_irg_end(current_ir_graph), get_copy_of(pred) );
970
971                         // cannot construct_ssa here, because it needs another walker
972
973                 } /* is block */
974         } /* for */
975
976         //irg_walk_graph(current_ir_graph, chklink, NULL, NULL);
977
978         fix_head(loop_cf_head);
979
980         //printf (" FIXHEAD DONE :D \n");
981
982         entry_i = 0;
983
984         /* Generate phis for values from peeled code and original loop */
985         for(i = 0; entry_i < entry_c; i++)
986         {
987                 loop_entry_t entry = loop_entries[i];
988                 ir_node *node = entry.node;
989
990                 if (is_Block(node))
991                 {
992                         /* block */
993                         ir_node *phi=get_Block_phis(node);
994
995                         while(phi)
996                         {
997                                 add_pred(phi, entry_buffer[entry_i++]);
998                                 phi=get_Phi_next(phi);
999                         }
1000                 } else {
1001                         /* not block */
1002
1003                         ir_node *cppred, *block, *cpblock, *pred;
1004
1005                         /**
1006                          * pred = get_irn_n(entry.node, entry.pred_irn_n);
1007                          * does not work, because we could have changed the nodes preds in construct_ssa
1008                          */
1009
1010                         pred = entry_buffer[entry_i++];
1011
1012                         //printf("pred %ld\n", pred->node_nr);
1013                         cppred = get_copy_of(pred);
1014                         //printf("cppred %ld\n", cppred->node_nr);
1015                         block = get_nodes_block(pred);
1016                         //printf("block %ld\n", block->node_nr);
1017                         cpblock = get_nodes_block(cppred);
1018                         //printf("cpblock %ld\n", cpblock->node_nr);
1019
1020
1021                         //dump_ir_block_graph(current_ir_graph, "vorher");
1022                         construct_ssa(block, pred, cpblock, cppred);
1023                         //add_End_keepalive(get_irg_end(current_ir_graph), cppred);
1024
1025
1026                         //add_pred(get_irg_end(current_ir_graph), cppred);
1027                         //dump_ir_block_graph(current_ir_graph, "nachher");
1028
1029
1030                 }
1031         }
1032 }
1033
1034 void decision_maker(void)
1035 {
1036         //inc_irg_visited(current_ir_graph);
1037         //loop_walker( loop_entries, NULL, get_invariants, NULL );
1038
1039         inc_irg_visited(current_ir_graph);
1040         peel();
1041
1042 }
1043
1044
1045 /**
1046  * TODO use list , not arr_F
1047  */
1048 void analyze_loop(ir_loop *loop)
1049 {
1050         /* Init new for every loop */
1051         loop_cf_head = NULL;
1052         loop_cf_head_valid = 1;
1053         has_sto = 0;
1054
1055         cur_loop = loop;
1056
1057         /* arrays */
1058         backedges = NEW_ARR_F(loop_entry_t, 0);
1059         alien_backedges = NEW_ARR_F(loop_entry_t, 0);
1060         loop_entries = NEW_ARR_F(loop_entry_t, 0);
1061         head_edges = NEW_ARR_F(loop_entry_t, 0);
1062
1063         inc_irg_visited( current_ir_graph );
1064         irg_walk_graph( current_ir_graph, block_phi_walker, NULL, NULL );
1065
1066         /* Collect all backedges */
1067         for_each_loop_block(loop, collect_backedges, NULL );
1068
1069         /* Find loop entries walk, find head */
1070         inc_irg_visited( current_ir_graph );
1071         irg_walk_graph( current_ir_graph, find_loop_entries_walk, NULL, NULL );
1072
1073         /* RETURN if there is no valid head */
1074         if (!loop_cf_head || !loop_cf_head_valid)
1075         {
1076                 //DBG printf("NOTE: There is no valid loop head. Nothing done.\n");
1077                 return;
1078         }
1079
1080         decision_maker();
1081
1082         // TODO free all link states... or better put them on functionstack
1083
1084         /* FREE */
1085         DEL_ARR_F(loop_entries);
1086         DEL_ARR_F(backedges);
1087         DEL_ARR_F(alien_backedges);
1088         DEL_ARR_F(head_edges);
1089
1090         //dump_ir_block_graph(current_ir_graph, "-lu1");
1091 }
1092
1093 /**
1094  *      Find most inner loops and send them to analyze_loop
1095  */
1096 void analyze_inner_loop(ir_loop *loop)
1097 {
1098         /* descend into sons */
1099         int sons = get_loop_n_sons(loop);
1100
1101         //printf("found %d loops \n", sons);
1102
1103         if (sons==0)
1104         {
1105                 //printf("analyze loop %ld\n", loop->loop_nr);
1106                 analyze_loop(loop);
1107         }
1108         else
1109         {
1110                 int s;
1111                 for(s=0; s<sons; s++)
1112                 {
1113                         //printf("descend into loop son %ld\n", get_loop_son(loop, s)->loop_nr);
1114                         analyze_inner_loop( get_loop_son(loop, s) );
1115                 }
1116         }
1117 }
1118
1119
1120
1121 //
1122 //void phicheck(ir_node *node, void * env)
1123 //{
1124 //      if (!is_Block(node)) return;
1125 //
1126 //      ir_node *phi=get_Block_phis(node);
1127 //      while(phi)
1128 //      {
1129 //              if (!is_Phi(phi))
1130 //              {
1131 //                      printf("NOT PHI %ld\n", phi->node_nr);
1132 //                      phi = NULL;
1133 //              } else {
1134 //                      phi=get_Phi_next(phi);
1135 //              }
1136 //      }
1137 //}
1138
1139 void loop_unroll(ir_graph *irg)
1140 {
1141         //printf(" --- loop unroll start --- \n");
1142
1143         //irg_walk_graph(irg, phicheck, NULL, NULL);
1144
1145         ir_loop *loop;
1146
1147         assure_cf_loop(irg);
1148
1149         loop = get_irg_loop(irg);
1150         int sons = get_loop_n_sons(loop);
1151         //printf("FOUND %d LOOPS \n", sons);
1152         int nr;
1153         for (nr=0; nr<sons; nr++)
1154         {
1155                 analyze_inner_loop( get_loop_son(loop, nr) );
1156         }
1157
1158         //DBG
1159         //printf(" --- loop unroll done --- \n");
1160 }
1161
1162 struct loop_unroll_pass_t {
1163         ir_graph_pass_t pass;
1164 };
1165
1166 /**
1167  * Wrapper to run ...() as a ir_prog pass.
1168  */
1169 static int loop_unroll_wrapper(ir_graph *irg, void *context) {
1170
1171         (void)context;
1172         loop_unroll(irg);
1173         return 0;
1174 }
1175
1176 //TODO ??
1177 ir_graph_pass_t *loop_unroll_pass(const char *name)
1178 {
1179         struct loop_unroll_pass_t *pass =
1180                 XMALLOCZ(struct loop_unroll_pass_t);
1181
1182         return def_graph_pass_constructor(
1183                 &pass->pass, name ? name : "loop_unroll",
1184                 loop_unroll_wrapper);
1185 }
1186
1187 /*
1188 void firm_init_loopunroll(void) {
1189         FIRM_DBG_REGISTER(dbg, "firm.opt.loopunroll");
1190 }*/