- removed C99 features
[libfirm] / ir / opt / loop.c
1 /*
2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief    Loop peeling and unrolling
23  * @author   Christian Helmer
24  * @version  $Id$
25  */
26
27 //#include "config.h"
28
29 //#include <limits.h>
30 #include <assert.h>
31
32 #include "irnode.h"
33 #include "irnode_t.h"
34 #include "irgraph_t.h"
35 //#include "irprog_t.h"
36
37 //#include "iroptimize.h"
38 #include "ircons_t.h"
39 #include "iropt_t.h"
40 #include "irgopt.h"
41 //#include "irgmod.h"
42 #include "irgwalk.h"
43
44 //#include "array_t.h"
45 #include "list.h"
46 //#include "pset.h"
47 //#include "pmap.h"
48 //#include "pdeq.h"
49 //#include "xmalloc.h"
50 //#include "pqueue.h"
51
52 #include "irouts.h"
53 #include "irloop_t.h"
54 #include "irbackedge_t.h"
55 //#include "opt_inline_t.h"
56 //#include "cgana.h"
57 //#include "trouts.h"
58 //#include "error.h"
59
60 //#include "analyze_irg_args.h"
61 #include "iredges_t.h"
62 //#include "irflag_t.h"
63 //#include "irhooks.h"
64 #include "irtools.h"
65 //#include "iropt_dbg.h"
66 #include "irpass_t.h"
67 #include "irloop.h"
68
69 #include "array_t.h"
70 #include "irdump.h"
71
72 /* convenience macro for iterating over every phi node of the given block */
73 #define for_each_phi(block, phi) \
74         for ( (phi) = get_Block_phis( (block) ); (phi) ; (phi) = get_Phi_next( (phi) ) )
75
76 /* current loop */
77 ir_loop *cur_loop;
78
79 /* The loop walker should be possible to abort if nothing can be done anymore */
80 typedef unsigned irg_walk_func_abortable(ir_node *, void *);
81
82 /* stores pair of node and number for the nodes predecessor */
83 typedef struct loop_entry_t {
84         ir_node *node;                  /* node outside of the loop */
85         int pred_irn_n;                 /* with pred_irn_n pointing inside loop */
86         //loop_entry_t *next;
87 } loop_entry_t;
88
89 //loop_entry_t loop_entry_list;
90
91 /* Store complex values in the nodes link */
92 typedef struct link_node_state_t {
93         unsigned cloned:1;
94         unsigned temp:1;                /* < Node is temporarily copied, to resolve cycles */
95         unsigned invariant:1;
96         ir_node *copy;
97         ir_node *link;                  /*< temporary links for ssa creation */
98         ir_node **ins;                  /* ins for phi nodes, during rewiring of blocks */
99 } link_node_state_t;
100
101
102 loop_entry_t *loop_entries;             /* loop entries (from below) in the node graph */
103 //int loop_entries_n;
104 loop_entry_t *head_entries;             /* loop entries (from below) in the node graph */
105 int backedges_n;
106 //loop_entry_t *backedges;              /* backedges exclusively from the current loop */
107 //loop_entry_t *alien_backedges;        /* The head can be head of several loops. */
108 //loop_entry_t *head_edges;     /* The head can be head of several loops. */
109
110 ir_node *loop_cf_head = NULL;           /* loop exit in the node graph */
111 unsigned loop_cf_head_valid = 1;        /* a loop may/must have one head, otherwise invalid */
112
113 unsigned has_sto;                               /* If we store inside the loop we might
114                                                                          * have disambiguation problems */
115 //DBG
116 //void arrdump(ir_node **arr)
117 //{
118 //      int i;
119 //      for (i=0; i<ARR_LEN(arr); i++)
120 //      {
121 //              printf("inarr: %ld          is block %d \n", (arr[i]->node_nr), is_Block(arr[i]));
122 //      }
123 //}
124
125 /**
126  * Returns the state of the given node.
127  */
128 link_node_state_t *get_lstate(ir_node *n)
129 {
130         return ((link_node_state_t *)n->link);
131 }
132
133 /**
134  * Returns the link inside of the nodes state which is pointing to its copy
135  * most of the time during loop peeling.
136  */
137 ir_node *get_copy(ir_node *n)
138 {
139         return ((link_node_state_t *)n->link)->copy;
140 }
141
142 /**
143  * Sets the nodes copy information
144  */
145 void set_copy(ir_node *n, ir_node *copy)
146 {
147         ((link_node_state_t *)n->link)->copy = copy;
148 }
149
150 /**
151  * Returns true if the node or block is in cur_loop.
152  */
153 unsigned is_in_loop(ir_node *node)
154 {
155 //      if (is_Block(node)) {
156 //              if (node->loop == cur_loop) {
157 //                      printf("                                   INLOOP %ld \n", node->node_nr);
158 //              }
159 //              return (node->loop == cur_loop);
160 //      } else {
161 //              if ( get_nodes_block(node)->loop == cur_loop ) {
162 //                      printf("                                   INLOOP %ld \n", node->node_nr);
163 //              }
164 //              return ( get_nodes_block(node)->loop == cur_loop );
165 //      }
166         if (is_Block(node)) {
167                 return (node->loop == cur_loop);
168         } else {
169                 return ( get_nodes_block(node)->loop == cur_loop );
170         }
171 }
172
173 unsigned is_in_head(ir_node *node)
174 {
175         if (is_Block(node)) {
176                 return (node == loop_cf_head);
177         } else {
178                 return ( get_nodes_block(node) == loop_cf_head );
179         }
180 }
181
182 /**
183  * Returns if the given be is an alien edge
184  */
185 unsigned is_alien_edge(ir_node *n, int i)
186 {
187         return( !is_in_loop( get_irn_n( n, i ) ) );
188 }
189
190 static void add_pred(ir_node* node, ir_node* x)
191 {
192         ir_node** ins;
193         int n;
194         int i;
195
196 //      if(!node)
197 //              printf("NONODE\n");
198
199         //printf("addpred %ld   pred %ld \n", node->node_nr, x->node_nr);
200
201         // WHY limit it to blocks and phi?
202         assert(is_Block(node) || is_Phi(node));
203
204         n = get_irn_arity(node);
205         NEW_ARR_A(ir_node*, ins, n + 1);
206         for (i = 0; i < n; i++)
207                 ins[i] = get_irn_n(node, i);
208         ins[n] = x;
209         set_irn_in(node, n + 1, ins);
210 }
211
212 void block_phi_walker(ir_node *n, void *env)
213 {
214         const ir_edge_t *edge;
215         (void) env;
216
217         /* RETURN */
218         if (!is_Block(n))
219                 return;
220
221         /* generate phi list for every block */
222         n->attr.block.phis = NULL;
223
224         foreach_out_edge(n, edge) {
225                 ir_node *src = get_edge_src_irn(edge);
226                 if (is_Phi(src))
227                 {
228                         //printf("%ld has phi %ld \n", block->node_nr, src->node_nr);
229                         add_Block_phi(n, src);
230                 }
231         }
232 }
233
234 /**
235  * Calls func() for every block in the given loop.
236  */
237 void for_each_loop_block(ir_loop *loop, irg_walk_func *func, void *env)
238 {
239         int elements, e;
240         elements = get_loop_n_elements(loop);
241
242         for(e=0; e<elements; e++)
243         {
244                 loop_element elem = get_loop_element(loop, e);
245                 if  (is_ir_node(elem.kind) && is_Block(elem.node) )
246                 {
247                         //printf(" foreach LOOPBLOCK %ld \n", elem.node->node_nr);
248                         func(elem.node, env);
249                 }
250         }
251 }
252
253 /**
254  * collects the blocks backedges and creates the phi list for every block
255  */
256 void collect_backedges(ir_node *block, void *env)
257 {
258         (void) env;
259
260         //printf("LOOP BLOCK %ld\n", block->node_nr);
261
262         /* collect backedges */
263         if (has_backedges(block))
264         {
265                 int i;
266                 int arity = get_irn_arity(block);
267
268                 for(i = 0; i < arity; ++i) {
269                         ir_node *pred = get_irn_n(block, i);
270
271                         loop_entry_t be;
272                         be.node = block;
273                         be.pred_irn_n = i;
274
275                         //ARR_APP1(loop_entry_t, head_edges, be);
276
277                         if (is_backedge(block, i) )
278                         {
279                                 if ( is_in_loop(pred) ) {
280                                         //printf("be: %ld --> %ld \n", block->node_nr, pred->node_nr);
281                                         //ARR_APP1(loop_entry_t, backedges, be);
282                                         ++backedges_n;
283                                 }
284 //                              else {
285 //                                      //printf("alien be: %ld --> %ld \n", block->node_nr, pred->node_nr);
286 //                                      ARR_APP1(loop_entry_t, alien_backedges, be);
287 //                              }
288                         }
289 //                      else {
290 //                              if ( !is_in_loop(pred) ) {
291 //                                      ARR_APP1(loop_entry_t, head_edges, be);
292 //                      }
293
294                 }
295         }
296 }
297
298 /**
299  *      Walks through all loop nodes.
300  */
301 unsigned loop_walker_rec(ir_node *node,
302                 irg_walk_func_abortable *pre,
303                 irg_walk_func_abortable *post, void * env)
304 {
305         int i;
306         unsigned stop = 0;
307
308         ir_graph *irg = current_ir_graph;
309
310         /* RETURN if we walked out of the loop*/
311         if (!is_in_loop(node))
312                 return 0;
313
314         if (pre)
315         {
316                 unsigned stop = pre(node, env);
317                 if (stop)
318                         return stop;
319         }
320
321         set_irn_visited(node, irg->visited);
322
323         if (node->op != op_Block) {
324                 ir_node *pred = get_irn_n(node, -1);
325                 if (pred->visited < irg->visited)
326                 {
327                         stop = loop_walker_rec(pred, pre, post, env);
328                         if (stop)
329                                 return stop;
330                 }
331         }
332
333         for (i = get_irn_arity(node) - 1; i >= 0; --i) {
334                 ir_node *pred = get_irn_n(node, i);
335                 if (pred->visited < irg->visited)
336                 {
337                         stop = loop_walker_rec(pred, pre, post, env);
338                         if (stop)
339                                 return stop;
340                 }
341         }
342
343         if (post)
344                 return post(node, env);
345         return 0;
346 }
347
348 /**
349  * Walks through loop nodes.
350  * The entries of the loop (all edges pointing into the loop) have to be given.
351  */
352 unsigned loop_walker(loop_entry_t *entries,
353                 irg_walk_func_abortable *pre, irg_walk_func_abortable *post, void * env)
354 {
355         int i;
356         int stop = 0;
357
358         for (i=0; !stop && i<ARR_LEN(entries); i++)
359         {
360                 ir_node *pred;
361                 loop_entry_t entry;
362                 entry = entries[i];
363
364                 pred = get_irn_n( entry.node , entry.pred_irn_n);
365
366                 stop = loop_walker_rec( pred, pre, post, env);
367         }
368         return stop;
369 }
370
371 /**
372  *
373  */
374 void find_loop_entries_walk(ir_node *node, void *env)
375 {
376         unsigned node_in_loop, pred_in_loop;
377         int i, arity;
378         (void) env;
379
380         arity = get_irn_arity(node);
381         for (i = 0; i < arity; i++) {
382                 ir_node *pred = get_irn_n(node, i);
383
384                 pred_in_loop = is_in_loop(pred);
385                 node_in_loop = is_in_loop(node);
386
387                 //Find the loops head/the blocks with cfpred outside of the loop
388                 if (is_Block(node) && node_in_loop
389                                 && !pred_in_loop && loop_cf_head_valid)
390                 {
391                         ir_node *cfgpred = get_Block_cfgpred(node, i);
392
393                         if ( !is_in_loop(cfgpred) )
394                         {
395                                 // another head? We do not touch this.
396                                 // is this possible?
397                                 if (loop_cf_head && loop_cf_head != node)
398                                 {
399                                         loop_cf_head_valid = 0;
400                                 }
401                                 else
402                                 {
403                                         loop_cf_head = node;
404                                 }
405                         }
406                 }
407
408                 if ( pred_in_loop && !node_in_loop )
409                 {
410                         /* we walked right into the loop. */
411                         loop_entry_t entry;
412                         entry.node = node;
413                         entry.pred_irn_n = i;
414
415                         //DBG
416 //                      printf("inloop: %ld --> inloop %ld (@ %d)  \n",
417 //                                      node->node_nr, pred->node_nr, i);
418
419                         ARR_APP1(loop_entry_t, loop_entries, entry);
420                 }
421         }
422 }
423
424 ///**
425 // * Finds invariant nodes and marks them as invariant.
426 // * (Post walk)
427 // */
428 //unsigned get_invariants(ir_node *node, void *env)
429 //{
430 //      unsigned invar = 1;
431 //      (void) env;
432 //
433 //      if (is_Store(node))
434 //      {
435 //              has_sto = 1;
436 //              /* RETURN and abort walker */
437 //              return 1;
438 //      }
439 //
440 //      int arity = get_irn_arity(node);
441 //
442 //      /* RETURN, no preds to visit */
443 //      if (arity == 0) return 0;
444 //
445 //      if (is_Load(node))
446 //      {
447 //              assert(arity>=2 && "expected load to have edge nr 1 (address)");
448 //
449 //              ir_node *pred = get_irn_n(node, 1);
450 //              if (!is_in_loop(pred)                   /* Everything outside the loop is considered invariant */
451 //                              || is_Const(pred)               /* This is not true, but we also want the quasi-invariants. */
452 //                              || is_SymConst(pred)
453 //                              || get_lstate(node)->invariant)
454 //              {
455 //                      //printf("## CONSTLOAD: %ld \n", node->node_nr);
456 //                      get_lstate(node)->invariant = 1;
457 //              } else
458 //              {
459 //                      get_lstate(node)->invariant = 0;
460 //              }
461 //      }
462 //      else
463 //      {
464 //              int i;
465 //              invar = 1;
466 //              /* find loop variant preds */
467 //              for(i = 0; i < arity; ++i)
468 //              {
469 //                      ir_node *pred = get_irn_n(node, i);
470 //
471 //                      if ( !(!is_in_loop(pred)        /* outside loop is loop invariant */
472 //                                      || is_Const(pred)                       /* constants */
473 //                                      || is_SymConst(pred)            /* SymConst, if no Store */
474 //                                      || get_lstate(node)->invariant /* pred is marked as invariant */
475 //                                      ) )
476 //                      {
477 //                              invar = 0;
478 //                      }
479 //              }
480 //
481 //              if (invar) {
482 //                      printf("const: %ld \n", node->node_nr);
483 //                      get_lstate(node)->invariant = 1;
484 //              } else {
485 //                      get_lstate(node)->invariant = 0;
486 //              }
487 //// DBG
488 ////                            if (!is_nodes_block_marked(pred)) {
489 ////                                    //printf("pred outloop: %ld, pred %ld (const)\n", node->node_nr, pred->node_nr);
490 ////                            } else if (is_Const(pred) || is_SymConst(pred)) // || is_Phi(pred)) {
491 ////                                    //printf("predconst: %ld, pred %ld CONST\n", node->node_nr, pred->node_nr);
492 ////                            } else if (pred->link == MARKED_CONST) {
493 ////                                    //printf("predmarked: %ld, pred %ld const\n", node->node_nr, pred->node_nr);
494 ////                            } else {
495 ////                                    mark=0;
496 ////                            }
497 //      }
498 //      return 0;
499 //}
500
501 ////TODO DBG Remove
502 //void phifix(ir_node *node, ir_node *newpred)
503 //{
504 //      ir_node *phi=get_Block_phis(node);
505 //      while(phi)
506 //      {
507 //              int pa = get_irn_arity(phi);
508 //              int ba = get_irn_arity(node);
509 //
510 //
511 //
512 //              while(ba>pa)
513 //              {
514 //                      printf("!!!!!!!!!! block has %d, phi had %d\n", ba, pa );
515 //                      add_pred(phi, newpred);
516 //                      pa++;
517 //                      printf("!!!!!!!!!! block has %d, phi has now %d\n", ba, pa );
518 //              }
519 //              phi=get_Phi_next(phi);
520 //      }
521 //}
522
523 static ir_node *ssa_second_def;
524 static ir_node *ssa_second_def_block;
525
526 /**
527  *
528  */
529 static ir_node *search_def_and_create_phis(ir_node *block, ir_mode *mode,
530                                            int first)
531 {
532         int i;
533         int n_cfgpreds;
534         ir_graph *irg;
535         ir_node *phi;
536         ir_node **in;
537
538         /* This is needed because we create bads sometimes */
539         if (is_Bad(block))
540                 return new_Bad();
541
542         /* the other defs can't be marked for cases where a user of the original
543          * value is in the same block as the alternative definition.
544          * In this case we mustn't use the alternative definition.
545          * So we keep a flag that indicated wether we walked at least 1 block
546          * away and may use the alternative definition */
547         if (block == ssa_second_def_block && !first) {
548                 return ssa_second_def;
549         }
550
551         /* already processed this block? */
552         if (irn_visited(block)) {
553                 ir_node *value = get_lstate(block)->link;
554                 return value;
555         }
556
557         irg = get_irn_irg(block);
558         assert(block != get_irg_start_block(irg));
559
560         /* a Block with only 1 predecessor needs no Phi */
561         n_cfgpreds = get_Block_n_cfgpreds(block);
562         if (n_cfgpreds == 1) {
563                 ir_node *pred_block = get_Block_cfgpred_block(block, 0);
564                 ir_node *value      = search_def_and_create_phis(pred_block, mode, 0);
565
566                 get_lstate(block)->link = value;
567                 //set_irn_link(block, value);
568                 mark_irn_visited(block);
569                 return value;
570         }
571
572         /* create a new Phi */
573         NEW_ARR_A(ir_node*, in, n_cfgpreds);
574         for(i = 0; i < n_cfgpreds; ++i)
575                 in[i] = new_Unknown(mode);
576
577         phi = new_r_Phi(block, n_cfgpreds, in, mode);
578         //set_irn_link(block, phi);
579         get_lstate(block)->link = phi;
580         mark_irn_visited(block);
581
582         /* set Phi predecessors */
583         for(i = 0; i < n_cfgpreds; ++i) {
584                 ir_node *pred_block = get_Block_cfgpred_block(block, i);
585                 ir_node *pred_val   = search_def_and_create_phis(pred_block, mode, 0);
586
587                 set_irn_n(phi, i, pred_val);
588         }
589         return phi;
590 }
591
592 /**
593  * Given a set of values this function constructs SSA-form for the users of the
594  * first value (the users are determined through the out-edges of the value).
595  * Uses the irn_visited flags. Works without using the dominance tree.
596  */
597 static void construct_ssa(ir_node *orig_block, ir_node *orig_val,
598                           ir_node *second_block, ir_node *second_val)
599 {
600         ir_graph *irg;
601         ir_mode *mode;
602         const ir_edge_t *edge;
603         const ir_edge_t *next;
604
605         /* no need to do anything */
606         if (orig_val == second_val)
607                 return;
608
609         irg = get_irn_irg(orig_val);
610         inc_irg_visited(irg);
611
612         mode = get_irn_mode(orig_val);
613         get_lstate(orig_block)->link = orig_val;
614         //set_irn_link(orig_block, orig_val);
615         mark_irn_visited(orig_block);
616
617         ssa_second_def_block = second_block;
618         ssa_second_def       = second_val;
619
620         /* Only fix the users of the first, i.e. the original node */
621         foreach_out_edge_safe(orig_val, edge, next) {
622                 ir_node *user = get_edge_src_irn(edge);
623                 int j = get_edge_src_pos(edge);
624                 ir_node *user_block = get_nodes_block(user);
625                 ir_node *newval;
626
627                 /* ignore keeps */
628                 if (is_End(user))
629                         continue;
630
631                 //DB((dbg, LEVEL_3, ">>> Fixing user %+F (pred %d == %+F)\n", user, j, get_irn_n(user, j)));
632
633                 if (is_Phi(user)) {
634                         ir_node *pred_block = get_Block_cfgpred_block(user_block, j);
635                         newval = search_def_and_create_phis(pred_block, mode, 1);
636                 } else {
637                         newval = search_def_and_create_phis(user_block, mode, 1);
638                 }
639
640                 /* don't fix newly created Phis from the SSA construction */
641                 if (newval != user) {
642                         //DB((dbg, LEVEL_4, ">>>> Setting input %d of %+F to %+F\n", j, user, newval));
643                         set_irn_n(user, j, newval);
644                 }
645         }
646 }
647
648
649
650 /**
651  * Rewires the heads after peeling. This results in a tail-controlled loop.
652  */
653 void fix_head(ir_node *loophead)
654 {
655         int headarity = get_irn_arity(loophead);
656         int i;
657         ir_node **loopheadnins;
658         ir_node **peelheadnins;
659         ir_node *phi;
660         ir_node *peelhead = get_copy(loophead);
661         int lheadin_c = 0;
662         int pheadin_c = 0;
663
664         /**
665          * the loopheads new preds are:
666          * its own backedge(s) and the former backedge(s) of the peeled code
667          */
668         int lhead_arity = 2 * backedges_n; //ARR_LEN(backedges);
669         int phead_arity = headarity - backedges_n;  //ARR_LEN(backedges);
670
671         /** We assume the worst case, in which every head entry
672          * origins from the same node. +1 for a null terminated list.
673          */
674         //int tchead_arity = ARR_LEN(head_entries) + ( headarity - backedges_n) + 1  ;
675
676         NEW_ARR_A(ir_node *, loopheadnins, lhead_arity );
677         NEW_ARR_A(ir_node *, peelheadnins, phead_arity );
678
679         phi = get_Block_phis(loophead);
680         while(phi) {
681                 NEW_ARR_A(ir_node *, get_lstate(phi)->ins, lhead_arity);
682                 phi=get_Phi_next(phi);
683         }
684
685         phi = get_Block_phis(peelhead);
686         while(phi)
687         {
688                 NEW_ARR_A(ir_node *, get_lstate(phi)->ins, phead_arity);
689                 phi=get_Phi_next(phi);
690         }
691
692         for (i = 0; i < headarity; i++)
693         {
694                 ir_node *phi;
695                 ir_node *orgjmp = get_irn_n(loophead, i);
696                 ir_node *copyjmp = get_copy(orgjmp);
697
698                 /**
699                  * Rewire the head blocks ins and their phi ins.
700                  * Requires blocks phi list.
701                  *
702                  * 1. Alien bes origin from the peeled head (new head of the whole loop)
703                  * 2. Loops own bes must be kept/copied to the loophead.
704                  * 3. All other edges origin from the peeled head (new head of the loop)
705                  */
706
707
708                 //printf("head i %d\n", i);
709
710                 if (is_backedge(loophead, i))
711                 {
712                         if (is_alien_edge(loophead, i)) {
713                                 peelheadnins[pheadin_c] = orgjmp;       /* alien bes go to the peeled head */
714                                 //set_backedge(peelhead, pheadin_c);
715
716                                 // alien bes origin at the peeled head
717                                 for_each_phi(peelhead, phi)
718                                 {
719                                         //printf("alienbe phi %ld @ %d -> %ld\n", phi->node_nr, i, get_irn_n(phi, i)->node_nr);
720                                         get_lstate( phi )->ins[pheadin_c] = get_irn_n(phi, i);
721                                 }
722                                 //printf("alienbe %ld @ %d -> add to peelhead orgjump %ld\n", peelhead->node_nr, i, orgjmp->node_nr);
723                                 ++pheadin_c;
724                         } else {
725                                 loopheadnins[lheadin_c] = orgjmp;       /* keep/copy the loops own bes */
726                                 //set_backedge(loophead, lheadin_c);
727
728                                 for_each_phi(loophead, phi) {
729                                         //printf("normalbe phi %ld @ %d -> %ld\n", phi->node_nr, i, get_irn_n(phi, i)->node_nr);
730                                         get_lstate( phi )->ins[lheadin_c] = get_irn_n(phi, i);
731                                 }
732                                 //printf("normalbe %ld @ %d -> add to loophead orgjump %ld\n", loophead->node_nr, i, orgjmp->node_nr);
733                                 ++lheadin_c;
734
735                                 loopheadnins[lheadin_c] = copyjmp;      /* former bes of the peeled code origin now from the loophead */
736                                 //set_not_backedge(loophead, lheadin_c);
737
738                                 /* get_irn_n( get_copy_of(phi), i) <!=> get_copy_of(get_irn_n( phi, i))
739                                  * Order is crucial! Preds outside of the loop are non existent, like Const.
740                                  */
741                                 for_each_phi(loophead, phi) {
742                                         //printf("normalbe phi %ld @ %d -> %ld\n", phi->node_nr, i,  get_irn_n( get_copy_of(phi), i)->node_nr);
743                                         get_lstate( phi )->ins[lheadin_c] =     get_irn_n( get_copy(phi), i) ;
744                                 }
745                                 //printf("normalbe %ld @ %d -> add to loophead copyjump %ld\n", loophead->node_nr, i, copyjmp->node_nr);
746                                 ++lheadin_c;
747                         }
748                 } else {
749                         peelheadnins[pheadin_c] = orgjmp;
750                         //set_not_backedge(peelhead, pheadin_c);
751
752                         for_each_phi(peelhead, phi) {
753                                 //printf("edge phi %ld @ %d -> %ld\n", phi->node_nr, i, get_irn_n( phi, i)->node_nr);
754                                 get_lstate( phi )->ins[pheadin_c] = get_irn_n(phi, i);
755                         }
756                         //printf("edge %ld @ %d -> add to peelhead orgjump %ld\n", peelhead->node_nr, i, orgjmp->node_nr);
757                         ++pheadin_c;
758                 }
759         }/* for */
760
761 //      printf("pheadin %d  arr %d            lheadin %d arr %d \n",
762 //                      pheadin_c, ARR_LEN(peelheadnins),
763 //                      lheadin_c, ARR_LEN(loopheadnins));
764
765         assert(pheadin_c == ARR_LEN(peelheadnins) &&
766                         lheadin_c == ARR_LEN(loopheadnins) &&
767                         "the number of head elements does not match the predefined one");
768
769         set_irn_in(loophead, ARR_LEN(loopheadnins), loopheadnins);
770         set_irn_in(peelhead, ARR_LEN(peelheadnins), peelheadnins);
771
772         for_each_phi(loophead, phi) {
773                 ir_node **ins = get_lstate( phi )->ins;
774                 set_irn_in(phi, lhead_arity, ins);
775         }
776
777         for_each_phi(peelhead, phi) {
778                 ir_node **ins = get_lstate( phi )->ins;
779                 set_irn_in(phi, phead_arity, ins);
780         }
781 }
782
783 ir_node *rawcopy_node(ir_node *node)
784 {
785         ir_node *cp;
786         link_node_state_t *cpstate;
787
788         cp = exact_copy(node);
789         set_copy(node, cp);
790         cpstate = XMALLOCZ(link_node_state_t);
791         cp->link = cpstate;
792         if (is_Block(cp))
793                 cp->loop = NULL;                /* the copy does not belong to the loop */
794         set_irn_visited(cp, current_ir_graph->visited);
795         return cp;
796 }
797
798 /**
799  * Peels the loop by copying the contents. Graph needs some rewiring after that.
800  */
801 void peel_walk(ir_node *node)
802 {
803         int i;
804         int arity;
805         ir_node *cp;
806         ir_node **cpin;
807         ir_graph *irg = current_ir_graph;
808
809         //(void) env;
810
811         link_node_state_t *nodestate = get_lstate(node);
812
813         /**
814          * break condition and cycle resolver, creating temporary node copies
815          */
816         if (node->visited >= irg->visited)
817         {
818                 if (!nodestate->cloned && !nodestate->temp)
819                 {
820                         /** temporary clone this node
821                          * because we were here before and would walk into a cycle
822                          */
823                         rawcopy_node(node);
824                         nodestate->temp=1;
825                 }
826                 return;
827         }
828         //printf("  -----  WALK %ld -----  \n", node->node_nr);
829
830         /**
831          * WALK
832          */
833         set_irn_visited(node, irg->visited);
834
835         if ( !is_Block(node) ) {
836                 ir_node *pred = get_irn_n(node, -1);
837                 if (is_in_loop(pred))
838                         peel_walk(pred);
839         }
840
841         arity = get_irn_arity(node);
842
843         NEW_ARR_A(ir_node *, cpin, arity);
844
845
846         for (i = get_irn_arity(node) - 1; i >= 0; --i) {
847                 ir_node *pred = get_irn_n(node, i);
848
849                 /* collect head entries */
850                 if ( is_in_head(pred) && !is_in_head(node) )
851                 {
852                         loop_entry_t entry;
853                         entry.node = node;
854                         entry.pred_irn_n = i;
855                         ARR_APP1(loop_entry_t, head_entries, entry);
856                 }
857
858                 if (is_in_loop(pred))
859                 {
860                         peel_walk(pred);
861                         cpin[i] = get_copy(pred);  //get_lstate(pred)->link;
862                         //printf("copy of %ld gets in %ld", node->node_nr, cpin[i]->node_nr);
863                 } else {
864                         cpin[i] = pred;
865                 }
866                 //printf("copy of %ld gets in %ld \n", node->node_nr, cpin[i]->node_nr);
867         }
868
869         /**
870          *  copy node / finalize temp node
871          */
872         if (!nodestate->temp) {
873 //              if (!is_Const(node) && !is_SymConst(node)) {
874                         cp = rawcopy_node(node);
875 //              } else {
876 //                      cp = node;
877 //                      //DBG
878 //                      printf("CONST FINAL: %ld -F> %ld \n", node->node_nr, cp->node_nr);
879 //                      nodestate->link = cp;
880 //              }
881         } else {
882                 /* temporary copy is existent but without correct ins */
883                 cp = get_copy(node);  // nodestate->link;
884                 //printf("FINALIZE: %ld \n", cp->node_nr);
885         }
886
887         // special treatment for the head/condition: we need 3 heads for a tail-controlled and peeled loop
888         if (is_in_head(node)) {
889                 // head/condition for the tail-controlled loop
890                 // These copies are linked to the copies
891                 rawcopy_node(cp);
892         }
893
894         if (!is_Block(node))
895         {
896                 ir_node *cpblock = get_copy(get_nodes_block(node));
897
898                 /* set the block of the copy to the copied block */
899                 //printf("    PRE  NODE %ld   BLOCK %ld \n", cp->node_nr, get_nodes_block(cp)->node_nr);
900                 set_nodes_block(cp, cpblock );
901                 //printf("    POST NODE %ld   BLOCK %ld \n", cp->node_nr, get_nodes_block(cp)->node_nr);
902
903                 /* fix the phi information in attr.phis (does not add the phi node to the block) */
904                 if( is_Phi(cp) )
905                 {
906                         add_Block_phi(cpblock, cp);
907                         //printf("PHI-BLOCK block %ld got its phi %ld\n", cpblock->node_nr, cp->node_nr);
908                 }
909         }
910         else {
911                 /* macroblock info is not copied */
912                 set_Block_MacroBlock(cp, cp);
913         }
914
915         //dbg valid ins?
916 //      for(i=0; i<ARR_LEN(cpin); i++)
917 //              printf(" cpin of %ld (cp %ld):  %ld \n ", node->node_nr, cp->node_nr, cpin[i]->node_nr);
918
919         set_irn_in(cp, ARR_LEN(cpin), cpin);
920
921 //      for(i=0; i< ARR_LEN(cpin); i++)
922 //      {
923 //              printf("ins %ld: %ld \n", cp->node_nr, cpin[i]->node_nr);
924 //      }
925
926 //TODO REM
927 //      if (!nodestate->temp)
928 //      {
929 //              nodestate->link = cp;
930 //              cpstate = XMALLOCZ(link_node_state_t);
931 //              cp->link = cpstate;
932 //      } else {
933 //              /* temporary copy is existent but without correct ins */
934 //              cp = nodestate->link;
935 //      }
936
937
938         nodestate->temp = 0;
939         nodestate->cloned = 1;
940 }
941
942 //void chklink (ir_node *n, void * e)
943 //{
944 //      ir_node *link = n->link;
945 //      link_node_state_t *l = (link_node_state_t *)link;
946 //
947 //      printf("n %ld\n", n->node_nr);
948 //      printf("l p %ld\n", l->link);
949 //      if (l->link)
950 //              printf("l %ld\n", l->link->node_nr);
951 //
952 //}
953
954 /**
955  * Loop peeling, and fix the cf for the loop entry nodes, which have now more preds
956  */
957 void peel(void)
958 {
959         int i;
960         ir_node **entry_buffer;
961         int entry_c = 0;
962         int entry_i;
963
964         NEW_ARR_A(ir_node *, entry_buffer, ARR_LEN(loop_entries));
965
966         for(i = 0; i < ARR_LEN(loop_entries); i++)
967         {
968                 loop_entry_t entry = loop_entries[i];
969                 ir_node *node = entry.node;
970                 ir_node *pred = get_irn_n(entry.node, entry.pred_irn_n);
971
972                 if (is_Block(node)) {
973                         /* node is block and the given pred points inside the loop  */
974                         ir_node *cppred;
975
976                         peel_walk( pred );
977
978                         // leave keepalives out
979                         if (is_End(node) && (is_Block(pred) || is_Phi(pred)) ) {
980                                 //add_End_keepalive(get_irg_end(current_ir_graph), get_copy_of(pred) );
981                         } else {
982                                 cppred = get_copy(pred);
983                                 //printf("fix block entry %ld to cp %ld\n", node->node_nr, cppred->node_nr);
984                                 add_pred( node, cppred );
985                                 //printf("fix block entry %ld to cp %ld\n", node->node_nr, cppred->node_nr);
986                         }
987
988                         //add_End_keepalive(get_irg_end(current_ir_graph), get_copy_of(pred) );
989
990                         //DBG
991                         //phifix(node, cppred);
992                 } else {
993                         /* node is somewhere in the graph, outside of the loop */
994                         //ir_node *cppred;
995                         //ir_node *block;
996                         //ir_node *cpblock;
997                         peel_walk( pred );
998
999                         // no ssa for keepalives
1000                         if (is_End(node) && (is_Block(pred) || is_Phi(pred)) ) {
1001                                 //add_End_keepalive(get_irg_end(current_ir_graph), get_copy_of(pred) );
1002                         } else {
1003                                 //printf("fix entry %ld to %ld\n", node->node_nr, pred->node_nr);
1004                                 entry_buffer[entry_c++] = pred;
1005                         }
1006
1007                         //add_End_keepalive(get_irg_end(current_ir_graph), get_copy_of(pred) );
1008
1009                         // cannot construct_ssa here, because it needs another walker
1010
1011                 } /* is block */
1012         } /* for */
1013
1014         //irg_walk_graph(current_ir_graph, chklink, NULL, NULL);
1015
1016         fix_head(loop_cf_head);
1017
1018         //printf (" FIXHEAD DONE :D \n");
1019
1020         entry_i = 0;
1021
1022         /* Generate phis for values from peeled code and original loop */
1023         for(i = 0; entry_i < entry_c; i++)
1024         {
1025                 loop_entry_t entry = loop_entries[i];
1026                 ir_node *node = entry.node;
1027
1028                 if (is_Block(node))
1029                 {
1030                         /* block */
1031                         ir_node *phi=get_Block_phis(node);
1032
1033                         while(phi)
1034                         {
1035                                 add_pred(phi, entry_buffer[entry_i++]);
1036                                 phi=get_Phi_next(phi);
1037                         }
1038                 } else {
1039                         /* not block */
1040
1041                         ir_node *cppred, *block, *cpblock, *pred;
1042
1043                         /**
1044                          * pred = get_irn_n(entry.node, entry.pred_irn_n);
1045                          * does not work, because we could have changed the nodes preds in construct_ssa
1046                          */
1047
1048                         pred = entry_buffer[entry_i++];
1049
1050                         //printf("pred %ld\n", pred->node_nr);
1051                         cppred = get_copy(pred);
1052                         //printf("cppred %ld\n", cppred->node_nr);
1053                         block = get_nodes_block(pred);
1054                         //printf("block %ld\n", block->node_nr);
1055                         cpblock = get_nodes_block(cppred);
1056                         //printf("cpblock %ld\n", cpblock->node_nr);
1057
1058
1059                         //dump_ir_block_graph(current_ir_graph, "vorher");
1060                         construct_ssa(block, pred, cpblock, cppred);
1061                         //add_End_keepalive(get_irg_end(current_ir_graph), cppred);
1062
1063
1064                         //add_pred(get_irg_end(current_ir_graph), cppred);
1065                         //dump_ir_block_graph(current_ir_graph, "nachher");
1066
1067                 }
1068         }
1069 }
1070
1071 void alloc_linkstructs(ir_node *node, void *env)
1072 {
1073         link_node_state_t *state = XMALLOCZ(link_node_state_t);
1074         (void) env;
1075         node->link = (void *)state;
1076 }
1077
1078 void free_linkstructs(ir_node *node, void *env)
1079 {
1080         (void) env;
1081         xfree( (link_node_state_t*) node->link);
1082 }
1083
1084 void decision_maker(void)
1085 {
1086         //inc_irg_visited(current_ir_graph);
1087         //loop_walker( loop_entries, NULL, get_invariants, NULL );
1088
1089
1090         inc_irg_visited(current_ir_graph);
1091         irg_walk_graph(current_ir_graph, alloc_linkstructs, NULL, NULL);
1092
1093         inc_irg_visited(current_ir_graph);
1094         peel();
1095
1096         inc_irg_visited(current_ir_graph);
1097         irg_walk_graph(current_ir_graph, free_linkstructs, NULL, NULL);
1098 }
1099
1100
1101 /**
1102  * TODO use list , not arr_F
1103  */
1104 void analyze_loop(ir_loop *loop)
1105 {
1106         /* Init new for every loop */
1107         loop_cf_head = NULL;
1108         loop_cf_head_valid = 1;
1109         //loop_entries_n = 0;
1110         backedges_n = 0;
1111         has_sto = 0;
1112
1113         cur_loop = loop;
1114
1115         /* arrays */
1116         //backedges = NEW_ARR_F(loop_entry_t, 0);
1117         //alien_backedges = NEW_ARR_F(loop_entry_t, 0);
1118         //head_edges = NEW_ARR_F(loop_entry_t, 0);
1119
1120         loop_entries = NEW_ARR_F(loop_entry_t, 0);
1121         head_entries = NEW_ARR_F(loop_entry_t, 0);
1122
1123         inc_irg_visited( current_ir_graph );
1124         irg_walk_graph( current_ir_graph, block_phi_walker, NULL, NULL );
1125
1126         /* Collect all backedges */
1127         for_each_loop_block(loop, collect_backedges, NULL );
1128
1129         /* Find loop entries walk, find head */
1130         inc_irg_visited( current_ir_graph );
1131         irg_walk_graph( current_ir_graph, find_loop_entries_walk, NULL, NULL );
1132
1133         /* RETURN if there is no valid head */
1134         if (!loop_cf_head || !loop_cf_head_valid)
1135         {
1136                 //DBG printf("NOTE: There is no valid loop head. Nothing done.\n");
1137                 return;
1138         }
1139
1140         decision_maker();
1141
1142         // TODO free all link states... or better put them on functionstack
1143
1144         /* FREE */
1145         DEL_ARR_F(loop_entries);
1146         DEL_ARR_F(head_entries);
1147         //DEL_ARR_F(backedges);
1148         //DEL_ARR_F(alien_backedges);
1149         //DEL_ARR_F(head_edges);
1150
1151         //dump_ir_block_graph(current_ir_graph, "-lu1");
1152 }
1153
1154 /**
1155  *      Find most inner loops and send them to analyze_loop
1156  */
1157 void analyze_inner_loop(ir_loop *loop)
1158 {
1159         /* descend into sons */
1160         int sons = get_loop_n_sons(loop);
1161
1162         //printf("found %d loops \n", sons);
1163
1164         if (sons==0)
1165         {
1166                 //printf("analyze loop %ld\n", loop->loop_nr);
1167                 analyze_loop(loop);
1168         }
1169         else
1170         {
1171                 int s;
1172                 for(s=0; s<sons; s++)
1173                 {
1174                         //printf("descend into loop son %ld\n", get_loop_son(loop, s)->loop_nr);
1175                         analyze_inner_loop( get_loop_son(loop, s) );
1176                 }
1177         }
1178 }
1179
1180 void loop_optimization(ir_graph *irg)
1181 {
1182         //printf(" --- loop unroll start --- \n");
1183
1184         //irg_walk_graph(irg, phicheck, NULL, NULL);
1185
1186         ir_loop *loop;
1187         int     sons, nr;
1188
1189         assure_cf_loop(irg);
1190
1191         loop = get_irg_loop(irg);
1192         sons = get_loop_n_sons(loop);
1193         //printf("FOUND %d LOOPS \n", sons);
1194
1195         for (nr=0; nr<sons; nr++)
1196         {
1197                 analyze_inner_loop( get_loop_son(loop, nr) );
1198         }
1199
1200         //DBG
1201         //printf(" --- loop unroll done --- \n");
1202 }
1203
1204 //struct loop_unroll_pass_t {
1205 //      ir_graph_pass_t pass;
1206 //};
1207
1208 /**
1209  * Wrapper to run loop_unroll() as a ir_prog pass.
1210  */
1211 //static int loop_unroll_wrapper(ir_graph *irg, void *context) {
1212 //
1213 //      (void)context;
1214 //      loop_unroll(irg);
1215 //      return 0;
1216 //}
1217
1218
1219 //ir_graph_pass_t *loop_unroll_pass(const char *name)
1220 //{
1221 //      struct loop_unroll_pass_t *pass =
1222 //              XMALLOCZ(struct loop_unroll_pass_t);
1223 //
1224 //      return def_graph_pass_constructor(
1225 //              &pass->pass, name ? name : "loop_unroll",
1226 //              loop_unroll_wrapper);
1227 //}
1228
1229 /*
1230 void firm_init_loopunroll(void) {
1231         FIRM_DBG_REGISTER(dbg, "firm.opt.loopunroll");
1232 }*/