cparser now knows 'unroll-loops'.
[libfirm] / ir / opt / loop.c
index b0d0f8f..7aceb2d 100644 (file)
 
 /**
  * @file
- * @brief    Loop peeling and unrolling
  * @author   Christian Helmer
+ * @brief    loop inversion and loop unrolling, loop peeling
+ *
  * @version  $Id$
  */
-
-//#include "config.h"
-
-//#include <limits.h>
-#include <assert.h>
+#include "config.h"
 
 #include "irnode.h"
-#include "irnode_t.h"
-#include "irgraph_t.h"
-//#include "irprog_t.h"
+#include "debug.h"
 
-//#include "iroptimize.h"
-#include "ircons_t.h"
-#include "iropt_t.h"
+#include "ircons.h"
 #include "irgopt.h"
-//#include "irgmod.h"
+#include "irgmod.h"
 #include "irgwalk.h"
-
-//#include "array_t.h"
-#include "list.h"
-//#include "pset.h"
-//#include "pmap.h"
-//#include "pdeq.h"
-//#include "xmalloc.h"
-//#include "pqueue.h"
-
 #include "irouts.h"
-#include "irloop_t.h"
-#include "irbackedge_t.h"
-//#include "opt_inline_t.h"
-//#include "cgana.h"
-//#include "trouts.h"
-//#include "error.h"
-
-//#include "analyze_irg_args.h"
-#include "iredges_t.h"
-//#include "irflag_t.h"
-//#include "irhooks.h"
+#include "iredges.h"
 #include "irtools.h"
-//#include "iropt_dbg.h"
-#include "irpass_t.h"
-#include "irloop.h"
+#include "array_t.h"   /* automatic array */
+#include "beutil.h"            /* get_block */
+#include "irloop_t.h"  /* set_irn_loop*/
+
+#if 0
+       #include "irdump_t.h"
+#endif
 
-#include "array_t.h"
-#include "irdump.h"
 
+DEBUG_ONLY(static firm_dbg_module_t *dbg);
 
-/* convenience macro iterating over every phi node of the block */
+/**
+ * Convenience macro for iterating over every phi node of the given block.
+ * Requires phi list per block.
+ */
 #define for_each_phi(block, phi) \
        for ( (phi) = get_Block_phis( (block) ); (phi) ; (phi) = get_Phi_next( (phi) ) )
 
-ir_loop *cur_loop;
+/* current loop */
+static ir_loop *cur_loop;
 
-/* The loop walker should be possible to abort if nothing can be done anymore */
+/* abortable walker function */
 typedef unsigned irg_walk_func_abortable(ir_node *, void *);
 
-/* stores pair of node and number for nodes predecessor */
-typedef struct loop_entry_t {
-       ir_node *node;                  /* node outside of the loop */
-       int pred_irn_n;                 /* with pred_irn_n pointing inside loop */
-} loop_entry_t;
-
-/* Store complex values in the nodes link */
-//TODO optimize. Every node has these values and seldom many otm are used.
-typedef struct link_node_state_t {
-       unsigned cloned:1;
-       unsigned temp:1;
+/* condition for walking a node during a copy_walk */
+typedef unsigned walker_condition(ir_node *);
+
+/* node and position of a predecessor */
+typedef struct out_edges {
+       ir_node *node;
+       int pred_irn_n;
+} out_edge;
+
+/* access complex values through the nodes links */
+typedef struct node_info {
        unsigned invariant:1;
-       ir_node *link;
-       ir_node *ssalink;       /* we will have to keep the link to the copies, as well as have temporary links for ssa creation */
-       ir_node **ins;          /* ins for phi nodes, during rewiring of blocks */
-       // TODO omit ins. can be replaced by new ins and newunknown ins for each
-} link_node_state_t;
-
-
-loop_entry_t *loop_entries;            /* loop entries (from below) in the node graph */
-loop_entry_t *backedges;               /* backedges exclusively from the current loop */
-loop_entry_t *alien_backedges; /* The head can be head of several loops. */
-loop_entry_t *head_edges;      /* The head can be head of several loops. */
-
-ir_node *loop_cf_head = NULL;          /* loop exit in the node graph */
-unsigned loop_cf_head_valid = 1;       /* a loop may/must have one head, otherwise invalid */
-
-unsigned has_sto = 0;                          /* If we store inside the loop we might
-                                                                        * have disambiguation problems */
-//DBG
-//void arrdump(ir_node **arr)
-//{
-//     int i;
-//     for (i=0; i<ARR_LEN(arr); i++)
-//     {
-//             printf("inarr: %ld          is block %d \n", (arr[i]->node_nr), is_Block(arr[i]));
-//     }
-//}
+       ir_node *copy;
+       ir_node *link;                                  /* temporary links for ssa creation */
+       ir_node **ins;                                  /* ins for phi nodes, during rewiring of blocks */
+       unsigned done;
+       struct node_info *freelistnext; /* linked list to free all node_infos */
+} node_info;
+
+static node_info *link_node_state_list;                /* head of the linked list to free all node_infos */
+
+static out_edge *cur_loop_outs;                                /* A walker may start visiting the current loop with these nodes. */
+static out_edge *cur_head_outs;                                /* A walker may start visiting the cur head with these nodes. */
+
+static ir_node *loop_cf_head = NULL;                           /* Loop head node */
+static unsigned loop_cf_head_valid = 1;                                /* A loop may have one head, otherwise we do not touch it. */
+
+ir_node **loops;
+
+/* Inverted head */
+static ir_node *loop_inv_head = NULL;
+/* Peeled head */
+static ir_node *loop_peeled_head = NULL;
+
+/* Loop analysis informations */
+typedef struct loop_info_t {
+       unsigned blocks;                        /* number of blocks in the loop */
+       unsigned calls;                         /* number of calls */
+       unsigned loads;                         /* number of load nodes */
+       unsigned outs;                          /* outs without keepalives */
+#if 0
+       unsigned invariant_loads;
+       unsigned stores;                        /* number of store nodes */
+       unsigned opnodes_n;                     /* nodes that probably result in an instruction */
+       unsigned do_invariant_opt;
+#endif
+} loop_info_t;
+
+/* Information about the current loop */
+static loop_info_t loop_info;
+
+/* A walker may start visiting a condition chain with these nodes. */
+static out_edge *cond_chain_entries;
+
+/* Number of unrolling */
+int unroll_times;
+
+static unsigned head_inversion_node_count;
+static unsigned inversion_head_node_limit;
+static unsigned head_inversion_block_count;
+
+static unsigned enable_peeling;
+static unsigned enable_inversion;
+static unsigned enable_unrolling;
 
 /**
- * Returns the state of the given node.
+ *
+ * ============= AUXILIARY FUNCTIONS =====================================
  */
-link_node_state_t *get_lstate(ir_node *n)
-{
-       return ((link_node_state_t *)n->link);
-}
+
 
 /**
- * Returns the link inside of the nodes state which is pointing to its copy
- * most of the time during loop peeling.
+ * Creates object on the heap, and adds it to a linked list to free it later.
  */
-ir_node *get_copy_of(ir_node *n)
+static node_info *new_node_info(void) {
+       node_info *l = XMALLOCZ(node_info);
+       l->freelistnext = link_node_state_list;
+       link_node_state_list = l;
+       l->copy = NULL;
+       l->invariant = 0;
+       return l;
+}
+
+static node_info *get_node_info(ir_node *n)
 {
-       return ((link_node_state_t *)n->link)->link;
+       return ((node_info *)get_irn_link(n));
 }
 
-/**
- * Returns true if the node or block is in cur_loop.
- */
-unsigned is_in_loop(ir_node *node)
+/* Allocates a node_info struct for the given node. For use with a walker. */
+static void alloc_node_info(ir_node *node, void *env)
 {
-//     if (is_Block(node)) {
-//             if (node->loop == cur_loop) {
-//                     printf("                                   INLOOP %ld \n", node->node_nr);
-//             }
-//             return (node->loop == cur_loop);
-//     } else {
-//             if ( get_nodes_block(node)->loop == cur_loop ) {
-//                     printf("                                   INLOOP %ld \n", node->node_nr);
-//             }
-//             return ( get_nodes_block(node)->loop == cur_loop );
-//     }
-       if (is_Block(node)) {
-               return (node->loop == cur_loop);
-       } else {
-               return ( get_nodes_block(node)->loop == cur_loop );
+       node_info *state;
+       (void) env;
+       state = new_node_info();
+       set_irn_link(node, (void *)state);
+}
+
+static void free_node_info(void)
+{
+       int a = 0;
+       node_info *n;
+       n = link_node_state_list;
+       while(n) {
+               node_info *next = n->freelistnext;
+               ++a;
+               xfree(n);
+               n = next;
        }
+       link_node_state_list = NULL;
 }
 
 /**
- * Returns if the given be is an alien edge
+ * Use the linked list to reset the reused values of all node_infos
+ * Reset in particular the copy attribute as copy_walk uses it to determine a present copy
  */
-unsigned is_alien_edge(ir_node *n, int i)
+static void reset_node_infos(void)
 {
-       return( !is_in_loop( get_irn_n( n, i ) ) );
+       node_info *next;
+       next = link_node_state_list;
+       while(next->freelistnext) {
+               node_info *cur = next;
+               next = cur->freelistnext;
+               cur->copy = NULL;
+               cur->ins = NULL;
+               cur->link = NULL;
+       }
 }
 
-static void add_pred(ir_node* node, ir_node* x)
+/* Returns the nodes node_info link. */
+static ir_node *get_link(ir_node *n)
 {
-       ir_node** ins;
-       int n;
-       int i;
-
-//     if(!node)
-//             printf("NONODE\n");
-
-       //printf("addpred %ld   pred %ld \n", node->node_nr, x->node_nr);
-
-       // WHY limit it to blocks and phi?
-       //assert(is_Block(node) || is_Phi(node));
-
-       n = get_irn_arity(node);
-       NEW_ARR_A(ir_node*, ins, n + 1);
-       for (i = 0; i < n; i++)
-               ins[i] = get_irn_n(node, i);
-       ins[n] = x;
-       set_irn_in(node, n + 1, ins);
+       return ((node_info *)get_irn_link(n))->link;
 }
 
-void block_phi_walker(ir_node *n, void *env)
+/* Sets the nodes node_info link. */
+static void set_link(ir_node *n, ir_node *link)
 {
-       const ir_edge_t *edge;
-       (void) env;
-
-       /* RETURN */
-       if (!is_Block(n))
-               return;
+       ((node_info *)get_irn_link(n))->link = link;
+}
 
-       /* generate phi list for every block */
-       n->attr.block.phis = NULL;
+/* Returns a nodes copy. */
+static ir_node *get_copy(ir_node *n)
+{
+       return ((node_info *)get_irn_link(n))->copy;
+}
 
-       foreach_out_edge(n, edge) {
-               ir_node *src = get_edge_src_irn(edge);
-               if (is_Phi(src))
-               {
-                       //printf("%ld has phi %ld \n", block->node_nr, src->node_nr);
-                       add_Block_phi(n, src);
-               }
-       }
+/* Sets a nodes copy. */
+static void set_copy(ir_node *n, ir_node *copy)
+{
+       ((node_info *)get_irn_link(n) )->copy = copy;
 }
 
 /**
- * Calls func() for every block in the given loop.
+ * Convenience macro for iterating over every copy in a linked list
+ * of copies.
  */
-void for_each_loop_block(ir_loop *loop, irg_walk_func *func, void *env)
-{
-       int elements, e;
-       elements = get_loop_n_elements(loop);
-
-       for(e=0; e<elements; e++)
-       {
-               loop_element elem = get_loop_element(loop, e);
-               if  (is_ir_node(elem.kind) && is_Block(elem.node) )
-               {
-                       //printf(" foreach LOOPBLOCK %ld \n", elem.node->node_nr);
-                       func(elem.node, env);
-               }
-       }
-}
+#define for_each_copy(node) \
+       for ( ; (node) ; (node) = get_copy(node))
 
 /**
- * collects the blocks backedges and creates the phi list for every block
+ * Convenience macro for iterating over every copy in 2 linked lists
+ * of copies in parallel.
  */
-void collect_backedges(ir_node *block, void *env)
-{
-       (void) env;
+#define for_each_copy2(high1, low1, high2, low2) \
+       for ( ; (low1) && (low2); (high1) = (low1), (low1) = get_copy(low1), \
+                                                       (high2) = (low2), (low2) = get_copy(low2))
 
-       printf("LOOP BLOCK %ld\n", block->node_nr);
+/*
+ * Returns 0 if the node or block is not in cur_loop.
+ */
+static unsigned is_in_loop(ir_node *node)
+{
+       return (get_irn_loop(get_block(node)) == cur_loop);
+}
 
-       /* collect backedges */
-       if (has_backedges(block))
-       {
-               int i;
-               int arity = get_irn_arity(block);
+/* Returns if the given be is an alien edge. This is the case when the pred is not in the loop. */
+static unsigned is_alien_edge(ir_node *n, int i)
+{
+       return(!is_in_loop(get_irn_n(n, i)));
+}
 
-               for(i = 0; i < arity; ++i) {
-                       ir_node *pred = get_irn_n(block, i);
+/* used for block walker */
+static void reset_block_mark(ir_node *node, void * env)
+{
+       (void) env;
 
-                       loop_entry_t be;
-                       be.node = block;
-                       be.pred_irn_n = i;
+       if (is_Block(node))
+               set_Block_mark(node, 0);
+}
 
-                       ARR_APP1(loop_entry_t, head_edges, be);
+static unsigned is_nodesblock_marked(ir_node* node)
+{
+       return (get_Block_mark(get_block(node)));
+}
 
-                       if (is_backedge(block, i) )
-                       {
-                               if ( is_in_loop(pred) ) {
-                                       //printf("be: %ld --> %ld \n", block->node_nr, pred->node_nr);
-                                       ARR_APP1(loop_entry_t, backedges, be);
-                               } else {
-                                       //printf("alien be: %ld --> %ld \n", block->node_nr, pred->node_nr);
-                                       ARR_APP1(loop_entry_t, alien_backedges, be);
-                               }
-                       }
-//                     else {
-//                             if ( !is_in_loop(pred) ) {
-//                                     ARR_APP1(loop_entry_t, head_edges, be);
-//                     }
+/* Returns the number of blocks in a loop. */
+int get_loop_n_blocks(ir_loop *loop) {
+       int elements, e;
+       int blocks = 0;
+       elements = get_loop_n_elements(loop);
 
-               }
+       for(e=0; e<elements; e++) {
+               loop_element elem = get_loop_element(loop, e);
+               if  (is_ir_node(elem.kind) && is_Block(elem.node))
+                       ++blocks;
        }
+       return blocks;
 }
 
 /**
- *     Walks through all loop nodes.
+ * Add newpred at position pos to node and also add the corresponding value to the phis.
+ * Requires block phi list.
  */
-unsigned loop_walker_rec(ir_node *node,
-               irg_walk_func_abortable *pre,
-               irg_walk_func_abortable *post, void * env)
+static int duplicate_preds(ir_node* node, unsigned pos, ir_node* newpred)
 {
+       ir_node **ins;
+       /*int *is_be;*/
+       ir_node *phi;
+       int block_arity;
        int i;
-       unsigned stop = 0;
-
-       ir_graph *irg = current_ir_graph;
 
-       /* RETURN if we walked out of the loop*/
-       if (!is_in_loop(node))
-               return 0;
+       assert(is_Block(node) && "duplicate_preds is only allowed for blocks");
 
-       if (pre)
-       {
-               unsigned stop = pre(node, env);
-               if (stop)
-                       return stop;
-       }
+       DB((dbg, LEVEL_5, "duplicate_preds(node %ld, pos %d, newpred %ld)\n",
+           get_irn_node_nr(node), pos, get_irn_node_nr(newpred)));
 
-       set_irn_visited(node, irg->visited);
+       block_arity = get_irn_arity(node);
 
-       if (node->op != op_Block) {
-               ir_node *pred = get_irn_n(node, -1);
-               if (pred->visited < irg->visited)
-               {
-                       stop = loop_walker_rec(pred, pre, post, env);
-                       if (stop)
-                               return stop;
-               }
-       }
+       NEW_ARR_A(ir_node*, ins, block_arity + 1);
 
-       for (i = get_irn_arity(node) - 1; i >= 0; --i) {
-               ir_node *pred = get_irn_n(node, i);
-               if (pred->visited < irg->visited)
-               {
-                       stop = loop_walker_rec(pred, pre, post, env);
-                       if (stop)
-                               return stop;
-               }
+       for (i = 0; i < block_arity; ++i) {
+               ins[i] = get_irn_n(node, i);
        }
+       ins[block_arity] = newpred;
 
-       if (post)
-               return post(node, env);
-       return 0;
-}
+       set_irn_in(node, block_arity + 1, ins);
 
-/**
- * Walks through loop nodes.
- * The entries of the loop (all edges pointing into the loop) have to be given.
- */
-unsigned loop_walker(loop_entry_t *entries,
-               irg_walk_func_abortable *pre, irg_walk_func_abortable *post, void * env)
-{
-       int i;
-       int stop = 0;
-
-       for (i=0; !stop && i<ARR_LEN(entries); i++)
-       {
-               ir_node *pred;
-               loop_entry_t entry;
-               entry = entries[i];
-
-               pred = get_irn_n( entry.node , entry.pred_irn_n);
+       for_each_phi(node, phi) {
+               int phi_arity = get_irn_arity(phi);
+               DB((dbg, LEVEL_5, "duplicate_preds: fixing phi %ld\n", get_irn_node_nr(phi)));
 
-               stop = loop_walker_rec( pred, pre, post, env);
+               NEW_ARR_A(ir_node *, ins, block_arity + 1);
+               for (i=0; i < phi_arity; ++i) {
+                       DB((dbg, LEVEL_5, "in %ld\n", get_irn_node_nr(get_irn_n(phi, i))));
+                       ins[i] = get_irn_n(phi, i);
+               }
+               ins[block_arity] = get_irn_n(phi, pos);
+               set_irn_in(phi, block_arity + 1, ins);
        }
-       return stop;
+       return block_arity;
 }
 
 /**
- *
+ * Finds loop head and loop_info.
  */
-void find_loop_entries_walk(ir_node *node, void *env)
+static void get_loop_info(ir_node *node, void *env)
 {
        unsigned node_in_loop, pred_in_loop;
+       int i, arity;
        (void) env;
 
-       link_node_state_t *state = XMALLOCZ(link_node_state_t);
-       node->link = (void *)state;
-
-       int i, arity;
        arity = get_irn_arity(node);
        for (i = 0; i < arity; i++) {
                ir_node *pred = get_irn_n(node, i);
@@ -364,147 +328,59 @@ void find_loop_entries_walk(ir_node *node, void *env)
                pred_in_loop = is_in_loop(pred);
                node_in_loop = is_in_loop(node);
 
-               //Find the loops head/the blocks with cfpred outside of the loop
-               if (is_Block(node) && node_in_loop
-                               && !pred_in_loop && loop_cf_head_valid)
-               {
+               /* collect some loop information */
+               if (node_in_loop) {
+                       if (is_Call(node))
+                               ++loop_info.calls;
+               }
+
+               /* Find the loops head/the blocks with cfpred outside of the loop */
+               if (is_Block(node) && node_in_loop && !pred_in_loop && loop_cf_head_valid) {
                        ir_node *cfgpred = get_Block_cfgpred(node, i);
 
-                       if ( !is_in_loop(cfgpred) )
-                       {
-                               //another head? We do not touch this.
-                               if (loop_cf_head && loop_cf_head != node)
-                               {
+                       if (!is_in_loop(cfgpred)) {
+                               DB((dbg, LEVEL_5, "potential head %+F because inloop and pred %+F not inloop\n", node, pred));
+                               /* another head? We do not touch this. */
+                               if (loop_cf_head && loop_cf_head != node) {
                                        loop_cf_head_valid = 0;
-                               }
-                               else
-                               {
+                               } else {
                                        loop_cf_head = node;
                                }
                        }
                }
-
-               if ( pred_in_loop && !node_in_loop )
-               {
-                       /* we walked right into the loop. */
-                       loop_entry_t entry;
-                       entry.node = node;
-                       entry.pred_irn_n = i;
-
-                       //DBG
-//                     printf("inloop: %ld --> inloop %ld (@ %d)  \n",
-//                                     node->node_nr, pred->node_nr, i);
-
-                       ARR_APP1(loop_entry_t, loop_entries, entry);
-               }
        }
 }
 
-// TODO needed?
-///**
-// * Finds invariant nodes and marks them as invariant.
-// * (Post walk)
-// */
-//unsigned get_invariants(ir_node *node, void *env)
-//{
-//     unsigned invar = 1;
-//     (void) env;
-//
-//     if (is_Store(node))
-//     {
-//             has_sto = 1;
-//             /* RETURN and abort walker */
-//             return 1;
-//     }
-//
-//     int arity = get_irn_arity(node);
-//
-//     /* RETURN, no preds to visit */
-//     if (arity == 0) return 0;
-//
-//     if (is_Load(node))
-//     {
-//             assert(arity>=2 && "expected load to have edge nr 1 (address)");
-//
-//             ir_node *pred = get_irn_n(node, 1);
-//             if (!is_in_loop(pred)                   /* Everything outside the loop is considered invariant */
-//                             || is_Const(pred)               /* This is not true, but we also want the quasi-invariants. */
-//                             || is_SymConst(pred)
-//                             || get_lstate(node)->invariant)
-//             {
-//                     //printf("## CONSTLOAD: %ld \n", node->node_nr);
-//                     get_lstate(node)->invariant = 1;
-//             } else
-//             {
-//                     get_lstate(node)->invariant = 0;
-//             }
-//     }
-//     else
-//     {
-//             int i;
-//             invar = 1;
-//             /* find loop variant preds */
-//             for(i = 0; i < arity; ++i)
-//             {
-//                     ir_node *pred = get_irn_n(node, i);
-//
-//                     if ( !(!is_in_loop(pred)        /* outside loop is loop invariant */
-//                                     || is_Const(pred)                       /* constants */
-//                                     || is_SymConst(pred)            /* SymConst, if no Store */
-//                                     || get_lstate(node)->invariant /* pred is marked as invariant */
-//                                     ) )
-//                     {
-//                             invar = 0;
-//                     }
-//             }
-//
-//             if (invar) {
-//                     printf("const: %ld \n", node->node_nr);
-//                     get_lstate(node)->invariant = 1;
-//             } else {
-//                     get_lstate(node)->invariant = 0;
-//             }
-//// DBG
-////                           if (!is_nodes_block_marked(pred)) {
-////                                   //printf("pred outloop: %ld, pred %ld (const)\n", node->node_nr, pred->node_nr);
-////                           } else if (is_Const(pred) || is_SymConst(pred)) // || is_Phi(pred)) {
-////                                   //printf("predconst: %ld, pred %ld CONST\n", node->node_nr, pred->node_nr);
-////                           } else if (pred->link == MARKED_CONST) {
-////                                   //printf("predmarked: %ld, pred %ld const\n", node->node_nr, pred->node_nr);
-////                           } else {
-////                                   mark=0;
-////                           }
-//     }
-//     return 0;
-//}
-
-////TODO DBG Remove
-void phifix(ir_node *node, ir_node *newpred)
+/* Adds all nodes pointing into the loop to loop_entries and also finds the loops head */
+static void get_loop_outs(ir_node *node, void *env)
 {
-       ir_node *phi=get_Block_phis(node);
-       while(phi)
-       {
-               int pa = get_irn_arity(phi);
-               int ba = get_irn_arity(node);
+       unsigned node_in_loop, pred_in_loop;
+       int i, arity;
+       (void) env;
 
+       arity = get_irn_arity(node);
+       for (i = 0; i < arity; i++) {
+               ir_node *pred = get_irn_n(node, i);
 
+               pred_in_loop = is_in_loop(pred);
+               node_in_loop = is_in_loop(node);
 
-               while(ba>pa)
-               {
-                       printf("!!!!!!!!!! block has %d, phi had %d\n", ba, pa );
-                       add_pred(phi, newpred);
-                       pa++;
-                       printf("!!!!!!!!!! block has %d, phi has now %d\n", ba, pa );
+               if (pred_in_loop && !node_in_loop) {
+                       out_edge entry;
+                       entry.node = node;
+                       entry.pred_irn_n = i;
+                       ARR_APP1(out_edge, cur_loop_outs, entry);
                }
-               phi=get_Phi_next(phi);
        }
 }
 
 static ir_node *ssa_second_def;
 static ir_node *ssa_second_def_block;
 
-static ir_node *search_def_and_create_phis(ir_node *block, ir_mode *mode,
-                                           int first)
+/**
+ * Walks the graph bottom up, searching for definitions and creates phis.
+ */
+static ir_node *search_def_and_create_phis(ir_node *block, ir_mode *mode)
 {
        int i;
        int n_cfgpreds;
@@ -512,22 +388,22 @@ static ir_node *search_def_and_create_phis(ir_node *block, ir_mode *mode,
        ir_node *phi;
        ir_node **in;
 
-       /* This is needed because we create bads sometimes */
-       if (is_Bad(block))
+       DB((dbg, LEVEL_5, "ssa search_def_and_create_phis: block %ld\n", get_irn_node_nr(block)));
+
+       /* Prevents creation of phi that would be bad anyway.
+        * Dead and bad blocks. */
+       if (get_irn_arity(block) < 1 || is_Bad(block))
                return new_Bad();
 
-       /* the other defs can't be marked for cases where a user of the original
-        * value is in the same block as the alternative definition.
-        * In this case we mustn't use the alternative definition.
-        * So we keep a flag that indicated wether we walked at least 1 block
-        * away and may use the alternative definition */
-       if (block == ssa_second_def_block && !first) {
+       if (block == ssa_second_def_block) {
+               DB((dbg, LEVEL_5, "ssa found second definition: use second def %ld\n", get_irn_node_nr(ssa_second_def)));
                return ssa_second_def;
        }
 
        /* already processed this block? */
        if (irn_visited(block)) {
-               ir_node *value = get_lstate(block)->ssalink;
+               ir_node *value = get_link(block);
+               DB((dbg, LEVEL_5, "ssa already visited: use linked %ld\n", get_irn_node_nr(value)));
                return value;
        }
 
@@ -538,11 +414,14 @@ static ir_node *search_def_and_create_phis(ir_node *block, ir_mode *mode,
        n_cfgpreds = get_Block_n_cfgpreds(block);
        if (n_cfgpreds == 1) {
                ir_node *pred_block = get_Block_cfgpred_block(block, 0);
-               ir_node *value      = search_def_and_create_phis(pred_block, mode, 0);
+               ir_node *value;
+
+               DB((dbg, LEVEL_5, "ssa 1 pred: walk pred %ld\n", get_irn_node_nr(pred_block)));
 
-               get_lstate(block)->ssalink = value;
-               //set_irn_link(block, value);
+               value = search_def_and_create_phis(pred_block, mode);
+               set_link(block, value);
                mark_irn_visited(block);
+
                return value;
        }
 
@@ -552,15 +431,26 @@ static ir_node *search_def_and_create_phis(ir_node *block, ir_mode *mode,
                in[i] = new_Unknown(mode);
 
        phi = new_r_Phi(block, n_cfgpreds, in, mode);
-       //set_irn_link(block, phi);
-       get_lstate(block)->ssalink = phi;
+
+       /* Important: always keep block phi list up to date. */
+       add_Block_phi(block, phi);
+       /* EVERY node is assumed to have a node_info linked. */
+       alloc_node_info(phi, NULL);
+
+       DB((dbg, LEVEL_5, "ssa phi creation: link new phi %ld to block %ld\n", get_irn_node_nr(phi), get_irn_node_nr(block)));
+
+       set_link(block, phi);
        mark_irn_visited(block);
 
        /* set Phi predecessors */
        for(i = 0; i < n_cfgpreds; ++i) {
+               ir_node *pred_val;
                ir_node *pred_block = get_Block_cfgpred_block(block, i);
-               ir_node *pred_val   = search_def_and_create_phis(pred_block, mode, 0);
+               assert(pred_block != NULL);
+               pred_val = search_def_and_create_phis(pred_block, mode);
+               assert(pred_val != NULL);
 
+               DB((dbg, LEVEL_5, "ssa phi pred:phi %ld, pred %ld\n", get_irn_node_nr(phi), get_irn_node_nr(pred_val)));
                set_irn_n(phi, i, pred_val);
        }
        return phi;
@@ -569,7 +459,7 @@ static ir_node *search_def_and_create_phis(ir_node *block, ir_mode *mode,
 /**
  * Given a set of values this function constructs SSA-form for the users of the
  * first value (the users are determined through the out-edges of the value).
- * Uses the irn_visited flags. Works without using the dominance tree.
+ * Works without using the dominance tree.
  */
 static void construct_ssa(ir_node *orig_block, ir_node *orig_val,
                           ir_node *second_block, ir_node *second_val)
@@ -579,16 +469,20 @@ static void construct_ssa(ir_node *orig_block, ir_node *orig_val,
        const ir_edge_t *edge;
        const ir_edge_t *next;
 
+       assert(orig_block && orig_val && second_block && second_val &&
+                       "no parameter of construct_ssa may be NULL");
+
        /* no need to do anything */
        if (orig_val == second_val)
                return;
 
        irg = get_irn_irg(orig_val);
+
+       ir_reserve_resources(irg, IR_RESOURCE_IRN_VISITED);
        inc_irg_visited(irg);
 
        mode = get_irn_mode(orig_val);
-       get_lstate(orig_block)->ssalink = orig_val;
-       //set_irn_link(orig_block, orig_val);
+       set_link(orig_block, orig_val);
        mark_irn_visited(orig_block);
 
        ssa_second_def_block = second_block;
@@ -605,203 +499,258 @@ static void construct_ssa(ir_node *orig_block, ir_node *orig_val,
                if (is_End(user))
                        continue;
 
-               //DB((dbg, LEVEL_3, ">>> Fixing user %+F (pred %d == %+F)\n", user, j, get_irn_n(user, j)));
+               DB((dbg, LEVEL_5, "original user %ld\n", get_irn_node_nr(user)));
 
                if (is_Phi(user)) {
                        ir_node *pred_block = get_Block_cfgpred_block(user_block, j);
-                       newval = search_def_and_create_phis(pred_block, mode, 1);
+                       newval = search_def_and_create_phis(pred_block, mode);
                } else {
-                       newval = search_def_and_create_phis(user_block, mode, 1);
+                       newval = search_def_and_create_phis(user_block, mode);
                }
 
-               /* don't fix newly created Phis from the SSA construction */
-               if (newval != user) {
-                       //DB((dbg, LEVEL_4, ">>>> Setting input %d of %+F to %+F\n", j, user, newval));
+               /* If we get a bad node the user keeps the original in. No second definition needed. */
+               if (newval != user && !is_Bad(newval))
                        set_irn_n(user, j, newval);
+       }
+
+       ir_free_resources(irg, IR_RESOURCE_IRN_VISITED);
+}
+
+/*
+ * Construct SSA for def and all of its copies.
+ */
+static void construct_ssa_n(ir_node *def, ir_node *user)
+{
+       ir_graph *irg;
+       ir_mode *mode;
+       ir_node *iter = def;
+       const ir_edge_t *edge;
+       const ir_edge_t *next;
+       irg = get_irn_irg(def);
+
+       ir_reserve_resources(irg, IR_RESOURCE_IRN_VISITED);
+       inc_irg_visited(irg);
+
+       mode = get_irn_mode(def);
+
+       for_each_copy(iter) {
+               set_link(get_nodes_block(iter), iter);
+               mark_irn_visited(get_nodes_block(iter));
+
+               DB((dbg, LEVEL_5, "ssa_n:  Link def %ld to block %ld\n",
+                                                                       get_irn_node_nr(iter), get_irn_node_nr(get_nodes_block(iter))));
+       }
+
+       /* Need to search the outs, because we need the in-pos on the user node. */
+       foreach_out_edge_safe(def, edge, next) {
+               ir_node *edge_user = get_edge_src_irn(edge);
+               int edge_src = get_edge_src_pos(edge);
+               ir_node *user_block = get_nodes_block(user);
+               ir_node *newval;
+
+               if (edge_user != user)
+                       continue;
+
+               if (is_Phi(user)) {
+                       ir_node *pred_block = get_Block_cfgpred_block(user_block, edge_src);
+                       newval = search_def_and_create_phis(pred_block, mode);
+               } else {
+                       newval = search_def_and_create_phis(user_block, mode);
                }
+
+               if (newval != user && !is_Bad(newval))
+                       set_irn_n(user, edge_src, newval);
        }
+
+       ir_free_resources(irg, IR_RESOURCE_IRN_VISITED);
 }
 
+/**
+ * Construct SSA for all definitions in arr.
+ */
+void construct_ssa_foreach(ir_node **arr, int arr_n)
+{
+       int i;
+       for(i = 0; i < arr_n; ++i) {
+               ir_node *cppred, *block, *cpblock, *pred;
+
+               pred = arr[i];
+               cppred = get_copy(pred);
+               block = get_nodes_block(pred);
+               cpblock = get_nodes_block(cppred);
+               construct_ssa(block, pred, cpblock, cppred);
+       }
+}
 
+/* get the number of backedges without alien bes */
+static int get_backedge_n(ir_node *loophead, unsigned with_alien)
+{
+       int i;
+       int be_n = 0;
+       int arity = get_irn_arity(loophead);
+       for (i = 0; i < arity; ++i) {
+               ir_node *pred = get_irn_n(loophead, i);
+               if (is_backedge(loophead, i) && (with_alien || is_in_loop(pred)))
+                       ++be_n;
+       }
+       return be_n;
+}
 
 /**
- * Rewires the heads after peeling
+ * Rewires the heads after peeling.
  */
-void fix_head(ir_node *loophead)
+static void peel_fix_heads(void)
 {
+       ir_node **loopheadnins, **peelheadnins;
+       ir_node *loophead = loop_cf_head;
+       ir_node *peelhead = get_copy(loophead);
+
        int headarity = get_irn_arity(loophead);
-       int i;
-       ir_node **loopheadnins;
-       ir_node **peelheadnins;
        ir_node *phi;
-       ir_node *peelhead = get_copy_of(loophead);
+       int i;
+
        int lheadin_c = 0;
        int pheadin_c = 0;
 
-       /**
-        * the loopheads new preds are:
-        * its own backedge(s) and the former backedge(s) of the peeled code
-        */
-       int lhead_arity = 2 * ARR_LEN(backedges);
-       int phead_arity = headarity - ARR_LEN(backedges);
+       int backedges_n = get_backedge_n(loophead, 0);
+
+       int lhead_arity = 2 * backedges_n;
+       int phead_arity = headarity - backedges_n;
 
+       /* new in arrays */
        NEW_ARR_A(ir_node *, loopheadnins, lhead_arity );
        NEW_ARR_A(ir_node *, peelheadnins, phead_arity );
 
-       phi = get_Block_phis(loophead);
-       while(phi) {
-               NEW_ARR_A(ir_node *, get_lstate(phi)->ins, lhead_arity);
-               phi=get_Phi_next(phi);
+       for_each_phi(loophead, phi) {
+               NEW_ARR_A(ir_node *, get_node_info(phi)->ins, lhead_arity);
        }
-
-       phi = get_Block_phis(peelhead);
-       while(phi)
-       {
-               NEW_ARR_A(ir_node *, get_lstate(phi)->ins, phead_arity);
-               phi=get_Phi_next(phi);
+       for_each_phi(peelhead, phi) {
+               NEW_ARR_A(ir_node *, get_node_info(phi)->ins, phead_arity);
        }
 
        for (i = 0; i < headarity; i++)
        {
-               ir_node *phi;
                ir_node *orgjmp = get_irn_n(loophead, i);
-               ir_node *copyjmp = get_copy_of(orgjmp);
+               ir_node *copyjmp = get_copy(orgjmp);
 
                /**
                 * Rewire the head blocks ins and their phi ins.
-                * Requires blocks phi list.
-                *
-                * 1. Alien bes origin from the peeled head (new head of the whole loop)
-                * 2. Loops own bes must be kept/copied to the loophead.
-                * 3. All other edges origin from the peeled head (new head of the loop)
+                * Requires phi list per block.
                 */
+               if (is_backedge(loophead, i) && !is_alien_edge(loophead, i)) {
+                       loopheadnins[lheadin_c] = orgjmp;
+                       for_each_phi(loophead, phi) {
+                               get_node_info( phi )->ins[lheadin_c] =  get_irn_n( phi, i) ;
+                       }
+                       ++lheadin_c;
 
+                       /* former bes of the peeled code origin now from the loophead */
+                       loopheadnins[lheadin_c] = copyjmp;
 
-               //printf("head i %d\n", i);
-
-               if (is_backedge(loophead, i))
-               {
-                       if (is_alien_edge(loophead, i)) {
-                               peelheadnins[pheadin_c] = orgjmp;       /* alien bes go to the peeled head */
-                               //set_backedge(peelhead, pheadin_c);
-
-                               // alien bes origin at the peeled head
-                               for_each_phi(peelhead, phi)
-                               {
-                                       //printf("alienbe phi %ld @ %d -> %ld\n", phi->node_nr, i, get_irn_n(phi, i)->node_nr);
-                                       get_lstate( phi )->ins[pheadin_c] = get_irn_n(phi, i);
-                               }
-                               //printf("alienbe %ld @ %d -> add to peelhead orgjump %ld\n", peelhead->node_nr, i, orgjmp->node_nr);
-                               ++pheadin_c;
-                       } else {
-                               loopheadnins[lheadin_c] = orgjmp;       /* keep/copy the loops own bes */
-                               //set_backedge(loophead, lheadin_c);
-
-                               for_each_phi(loophead, phi) {
-                                       //printf("normalbe phi %ld @ %d -> %ld\n", phi->node_nr, i, get_irn_n(phi, i)->node_nr);
-                                       get_lstate( phi )->ins[lheadin_c] = get_irn_n(phi, i);
-                               }
-                               //printf("normalbe %ld @ %d -> add to loophead orgjump %ld\n", loophead->node_nr, i, orgjmp->node_nr);
-                               ++lheadin_c;
-
-                               loopheadnins[lheadin_c] = copyjmp;      /* former bes of the peeled code origin now from the loophead */
-                               //set_not_backedge(loophead, lheadin_c);
-
-                               /* get_irn_n( get_copy_of(phi), i) <!=> get_copy_of(get_irn_n( phi, i))
-                                * Order is crucial! Preds outside of the loop are non existent, like Const.
-                                */
-                               for_each_phi(loophead, phi) {
-                                       //printf("normalbe phi %ld @ %d -> %ld\n", phi->node_nr, i,  get_irn_n( get_copy_of(phi), i)->node_nr);
-                                       get_lstate( phi )->ins[lheadin_c] =     get_irn_n( get_copy_of(phi), i) ;
-                               }
-                               //printf("normalbe %ld @ %d -> add to loophead copyjump %ld\n", loophead->node_nr, i, copyjmp->node_nr);
-                               ++lheadin_c;
+                       /* get_irn_n( get_copy_of(phi), i ) <!=> get_copy_of( get_irn_n( phi, i) )
+                        * Order is crucial! Predecessors outside of the loop are non existent.
+                        * The copy (cloned with its ins!) has pred i,
+                        * but phis pred i might not have a copy of itself.
+                        */
+                       for_each_phi(loophead, phi) {
+                               get_node_info( phi )->ins[lheadin_c] =  get_irn_n( get_copy(phi), i) ;
                        }
+                       ++lheadin_c;
                } else {
                        peelheadnins[pheadin_c] = orgjmp;
-                       //set_not_backedge(peelhead, pheadin_c);
-
                        for_each_phi(peelhead, phi) {
-                               //printf("edge phi %ld @ %d -> %ld\n", phi->node_nr, i, get_irn_n( phi, i)->node_nr);
-                               get_lstate( phi )->ins[pheadin_c] = get_irn_n(phi, i);
+                               get_node_info( phi )->ins[pheadin_c] = get_irn_n(phi, i);
                        }
-                       //printf("edge %ld @ %d -> add to peelhead orgjump %ld\n", peelhead->node_nr, i, orgjmp->node_nr);
                        ++pheadin_c;
                }
        }/* for */
 
-//     printf("pheadin %d  arr %d            lheadin %d arr %d \n",
-//                     pheadin_c, ARR_LEN(peelheadnins),
-//                     lheadin_c, ARR_LEN(loopheadnins));
-
        assert(pheadin_c == ARR_LEN(peelheadnins) &&
                        lheadin_c == ARR_LEN(loopheadnins) &&
-                       "the number of head elements does not match the predefined one");
+                       "the constructed head arities do not match the predefined arities");
 
+       /* assign the ins to the nodes */
        set_irn_in(loophead, ARR_LEN(loopheadnins), loopheadnins);
        set_irn_in(peelhead, ARR_LEN(peelheadnins), peelheadnins);
 
        for_each_phi(loophead, phi) {
-               ir_node **ins = get_lstate( phi )->ins;
+               ir_node **ins = get_node_info( phi )->ins;
                set_irn_in(phi, lhead_arity, ins);
        }
 
        for_each_phi(peelhead, phi) {
-               ir_node **ins = get_lstate( phi )->ins;
+               ir_node **ins = get_node_info( phi )->ins;
                set_irn_in(phi, phead_arity, ins);
        }
 }
 
+/**
+ * Create a raw copy (ins are still the old ones) of the given node.
+ * We rely on copies to be NOT visited.
+ */
+static ir_node *rawcopy_node(ir_node *node)
+{
+       int i, arity;
+       ir_node *cp;
+       node_info *cpstate;
+
+       cp = exact_copy(node);
+
+       arity = get_irn_arity(node);
+
+       for (i = 0; i < arity; ++i) {
+               if (is_backedge(node, i))
+                       set_backedge(cp, i);
+       }
+
+       set_copy(node, cp);
+       cpstate = new_node_info();
+       set_irn_link(cp, cpstate);
+
+       if (is_Block(cp)) {
+               /* TODO
+                * exact_copy already sets Macroblock.
+                * Why do we need to do this anyway? */
+               set_Block_MacroBlock(cp, cp);
+       }
+       return cp;
+}
 
 /**
- * Peels the loop by copying the contents. Graph needs some rewiring after that.
+ * This walker copies all walked nodes.
+ * If the walk_condition is true for a node, it is walked.
+ * All nodes node_info->copy attributes has to be NULL prior to every to every walk.
  */
-void peel_walk(ir_node *node, void *env)
+static void copy_walk(ir_node *node, walker_condition *walk_condition, ir_loop *set_loop)
 {
        int i;
        int arity;
        ir_node *cp;
        ir_node **cpin;
        ir_graph *irg = current_ir_graph;
-       link_node_state_t *cpstate;
-       (void) env;
-
-       link_node_state_t *nodestate = get_lstate(node);
+       node_info *node_info = get_node_info(node);
 
        /**
         * break condition and cycle resolver, creating temporary node copies
         */
-       if (node->visited >= irg->visited)
-       {
-               if (!nodestate->cloned && !nodestate->temp)
-               {
-                       /** temporary clone this node
-                        * because we were here before and would walk into a cycle
-                        */
-                       cp = exact_copy(node);
-                       //DBG
-                       //printf("COPY TEMP : %ld -T> %ld \n", node->node_nr, cp->node_nr);
-                       nodestate->link = cp;
-                       if (is_Block(cp))
-                               cp->loop = NULL;
-                       cpstate = XMALLOCZ(link_node_state_t);
-                       cp->link = cpstate;
-                       nodestate->temp=1;
-                       set_irn_visited(cp, irg->visited);
+       if (get_irn_visited(node) >= get_irg_visited(irg)) {
+               /* Here we rely on nodestate's copy being initialized with NULL */
+               DB((dbg, LEVEL_5, "copy_walk: We have already visited %ld\n", get_irn_node_nr(node)));
+               if (node_info->copy == NULL) {
+                       cp = rawcopy_node(node);
+                       DB((dbg, LEVEL_5, "The TEMP copy of %ld is created %ld\n", get_irn_node_nr(node), get_irn_node_nr(cp)));
                }
                return;
        }
-       //printf("  -----  WALK %ld -----  \n", node->node_nr);
 
-       /**
-        * WALK
-        */
-       set_irn_visited(node, irg->visited);
+       /* Walk */
+       mark_irn_visited(node);
 
-       if ( !is_Block(node) ) {
-               ir_node *pred = get_irn_n(node, -1);
-               if (is_in_loop(pred))
-                       peel_walk(pred, NULL);
+       if (!is_Block(node)) {
+               ir_node *pred = get_nodes_block(node);
+               if (walk_condition(pred))
+                       DB((dbg, LEVEL_5, "walk block %ld\n", get_irn_node_nr(pred)));
+                       copy_walk(pred, walk_condition, set_loop);
        }
 
        arity = get_irn_arity(node);
@@ -811,380 +760,828 @@ void peel_walk(ir_node *node, void *env)
        for (i = get_irn_arity(node) - 1; i >= 0; --i) {
                ir_node *pred = get_irn_n(node, i);
 
-               if (is_in_loop(pred))
-               {
-                       peel_walk(pred, NULL);
-                       cpin[i] = get_lstate(pred)->link;
-                       //printf("copy of %ld gets in %ld", node->node_nr, cpin[i]->node_nr);
+               if (walk_condition(pred)) {
+                       DB((dbg, LEVEL_5, "walk node %ld\n", get_irn_node_nr(pred)));
+                       copy_walk(pred, walk_condition, set_loop);
+                       cpin[i] = get_copy(pred);
+                       DB((dbg, LEVEL_5, "copy of %ld gets new in %ld which is copy of %ld\n",
+                               get_irn_node_nr(node), get_irn_node_nr(get_copy(pred)), get_irn_node_nr(pred)));
                } else {
                        cpin[i] = pred;
-
                }
-               //printf("copy of %ld gets in %ld \n", node->node_nr, cpin[i]->node_nr);
        }
 
-       /**
-        *  copy node / finalize temp node
-        */
-       if (!nodestate->temp)
-       {
-//             if (!is_Const(node) && !is_SymConst(node)) {
-                       cp = exact_copy(node);
-                       //DBG
-                       //printf("COPY FINAL: %ld -F> %ld \n", node->node_nr, cp->node_nr);
-                       nodestate->link = cp;
-                       cpstate = XMALLOCZ(link_node_state_t);
-                       cp->link = cpstate;
-                       if (is_Block(cp))
-                               cp->loop = NULL;
-                       set_irn_visited(cp, irg->visited);
-//             } else {
-//                     cp = node;
-//                     //DBG
-//                     printf("CONST FINAL: %ld -F> %ld \n", node->node_nr, cp->node_nr);
-//                     nodestate->link = cp;
-//             }
+       /* copy node / finalize temp node */
+       if (node_info->copy == NULL) {
+               /* No temporary copy existent */
+               cp = rawcopy_node(node);
+               DB((dbg, LEVEL_5, "The FINAL copy of %ld is CREATED %ld\n", get_irn_node_nr(node), get_irn_node_nr(cp)));
        } else {
                /* temporary copy is existent but without correct ins */
-               cp = nodestate->link;
-               //printf("FINALIZE: %ld \n", cp->node_nr);
+               cp = get_copy(node);
+               DB((dbg, LEVEL_5, "The FINAL copy of %ld is EXISTENT %ld\n", get_irn_node_nr(node), get_irn_node_nr(cp)));
        }
 
-       //TODO REM
-       //add_End_keepalive(get_irg_end(current_ir_graph), cp );
+       if (!is_Block(node)) {
+               ir_node *cpblock = get_copy(get_nodes_block(node));
 
-       if (!is_Block(node))
-       {
-               ir_node *cpblock = get_copy_of(get_nodes_block(node));
-
-               /* set the block of the copy to the copied block */
-               //printf("    PRE  NODE %ld   BLOCK %ld \n", cp->node_nr, get_nodes_block(cp)->node_nr);
                set_nodes_block(cp, cpblock );
-               //printf("    POST NODE %ld   BLOCK %ld \n", cp->node_nr, get_nodes_block(cp)->node_nr);
-
-               /* fix the phi information in attr.phis (does not add the phi node to the block) */
+               /* fix the phi information in attr.phis */
                if( is_Phi(cp) )
-               {
                        add_Block_phi(cpblock, cp);
-                       //printf("PHI-BLOCK block %ld got its phi %ld\n", cpblock->node_nr, cp->node_nr);
-               }
        }
-       else {
-               /* macroblock info is not copied */
-               set_Block_MacroBlock(cp, cp);
-       }
-
-       //dbg valid ins?
-//     for(i=0; i<ARR_LEN(cpin); i++)
-//             printf(" cpin of %ld (cp %ld):  %ld \n ", node->node_nr, cp->node_nr, cpin[i]->node_nr);
 
+       set_irn_loop(cp, set_loop);
        set_irn_in(cp, ARR_LEN(cpin), cpin);
-
-//     for(i=0; i< ARR_LEN(cpin); i++)
-//     {
-//             printf("ins %ld: %ld \n", cp->node_nr, cpin[i]->node_nr);
-//     }
-
-//TODO REM
-//     if (!nodestate->temp)
-//     {
-//             nodestate->link = cp;
-//             cpstate = XMALLOCZ(link_node_state_t);
-//             cp->link = cpstate;
-//     } else {
-//             /* temporary copy is existent but without correct ins */
-//             cp = nodestate->link;
-//     }
-
-
-       nodestate->temp = 0;
-       nodestate->cloned = 1;
 }
 
-//void chklink (ir_node *n, void * e)
-//{
-//     ir_node *link = n->link;
-//     link_node_state_t *l = (link_node_state_t *)link;
-//
-//     printf("n %ld\n", n->node_nr);
-//     printf("l p %ld\n", l->link);
-//     if (l->link)
-//             printf("l %ld\n", l->link->node_nr);
-//
-//}
-
 /**
  * Loop peeling, and fix the cf for the loop entry nodes, which have now more preds
  */
-void peel(void)
+static void peel(out_edge *loop_outs)
 {
        int i;
        ir_node **entry_buffer;
        int entry_c = 0;
-       int entry_i;
 
-       NEW_ARR_A(ir_node *, entry_buffer, ARR_LEN(loop_entries));
+       ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED);
 
-       for(i = 0; i < ARR_LEN(loop_entries); i++)
-       {
-               loop_entry_t entry = loop_entries[i];
+       NEW_ARR_A(ir_node *, entry_buffer, ARR_LEN(loop_outs));
+
+       /* duplicate loop walk */
+       inc_irg_visited(current_ir_graph);
+
+       for(i = 0; i < ARR_LEN(loop_outs); i++) {
+               out_edge entry = loop_outs[i];
                ir_node *node = entry.node;
                ir_node *pred = get_irn_n(entry.node, entry.pred_irn_n);
 
                if (is_Block(node)) {
-                       /* node is block and the given pred points inside the loop  */
-                       ir_node *cppred;
-
-                       peel_walk( pred, 0);
-
-                       // leave keepalives out
-                       if (is_End(node) && (is_Block(pred) || is_Phi(pred)) ) {
-                               //add_End_keepalive(get_irg_end(current_ir_graph), get_copy_of(pred) );
-                       } else {
-                               cppred = get_copy_of(pred);
-                               //printf("fix block entry %ld to cp %ld\n", node->node_nr, cppred->node_nr);
-                               add_pred( node, cppred );
-                               //printf("fix block entry %ld to cp %ld\n", node->node_nr, cppred->node_nr);
-                       }
-
-                       //add_End_keepalive(get_irg_end(current_ir_graph), get_copy_of(pred) );
-
-                       //DBG
-                       //phifix(node, cppred);
+                       copy_walk(pred, is_in_loop, NULL);
+                       duplicate_preds(node, entry.pred_irn_n, get_copy(pred) );
                } else {
-                       /* node is somewhere in the graph, outside of the loop */
-                       //ir_node *cppred;
-                       //ir_node *block;
-                       //ir_node *cpblock;
-                       peel_walk( pred, 0);
-
-                       // no ssa for keepalives
-                       if (is_End(node) && (is_Block(pred) || is_Phi(pred)) ) {
-                               //add_End_keepalive(get_irg_end(current_ir_graph), get_copy_of(pred) );
-                       } else {
-                               //printf("fix entry %ld to %ld\n", node->node_nr, pred->node_nr);
+                       copy_walk(pred, is_in_loop, NULL);
+                       /* leave out keepalives */
+                       if (!is_End(node))
+                               /* Node is user of a value defined inside the loop.
+                                * We'll need a phi since we duplicated the loop. */
+                               /* Cannot construct_ssa here, because it needs another walker. */
                                entry_buffer[entry_c++] = pred;
-                       }
+               }
+       }
 
-                       //add_End_keepalive(get_irg_end(current_ir_graph), get_copy_of(pred) );
+       ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED);
 
-                       // cannot construct_ssa here, because it needs another walker
+       /* Rewires the 2 heads */
+       peel_fix_heads();
 
-               } /* is block */
-       } /* for */
+       /* Generate phis for values from peeled code and original loop */
+       construct_ssa_foreach(entry_buffer, entry_c);
+       /*for(i = 0; i < entry_c; i++)
+       {
+               ir_node *cppred, *block, *cpblock, *pred;
+
+               pred = entry_buffer[i];
+               cppred = get_copy(pred);
+               block = get_nodes_block(pred);
+               cpblock = get_nodes_block(cppred);
+               construct_ssa(block, pred, cpblock, cppred);
+       }*/
+}
 
-       //irg_walk_graph(current_ir_graph, chklink, NULL, NULL);
+/**
+ * Populates head_entries with (node, pred_pos) tuple
+ * whereas the node's pred at pred_pos is in the head but not the node itself.
+ * Head and condition chain blocks must be marked.
+ */
+static void get_head_outs(ir_node *node, void *env)
+{
+       int i;
+       int arity = get_irn_arity(node);
+       (void) env;
 
-       fix_head(loop_cf_head);
+       DB((dbg, LEVEL_5, "get head entries %ld \n", get_irn_node_nr(node)));
 
-       //printf (" FIXHEAD DONE :D \n");
+       for(i = 0; i < arity; ++i) {
+               /* node is not in the head, but the predecessor is.
+                * (head or loop chain nodes are marked) */
 
-       entry_i = 0;
+               DB((dbg, LEVEL_5, "... "));
+               DB((dbg, LEVEL_5, "node %ld  marked %d (0)  pred %d marked %d (1) \n",
+                   node->node_nr, is_nodesblock_marked(node),i, is_nodesblock_marked(get_irn_n(node, i))));
 
-       /* Generate phis for values from peeled code and original loop */
-       for(i = 0; entry_i < entry_c; i++)
-       {
-               loop_entry_t entry = loop_entries[i];
-               ir_node *node = entry.node;
+               if (!is_nodesblock_marked(node) && is_nodesblock_marked(get_irn_n(node, i))) {
+                       out_edge entry;
+                       entry.node = node;
+                       entry.pred_irn_n = i;
+                       DB((dbg, LEVEL_5,
+                               "Found head chain entry %ld @%d because !inloop %ld and inloop %ld\n",
+                               get_irn_node_nr(node), i, get_irn_node_nr(node), get_irn_node_nr(get_irn_n(node, i))));
+                       ARR_APP1(out_edge, cur_head_outs, entry);
+               }
+       }
+}
 
-               if (is_Block(node))
-               {
-                       /* block */
-                       ir_node *phi=get_Block_phis(node);
+/**
+ * Find condition chains, and add them to be inverted, until the node count exceeds the limit.
+ * A block belongs to the chain if a condition branches out of the loop.
+ * Returns 1 if the given block belongs to the condition chain.
+ */
+static unsigned find_condition_chains(ir_node *block) {
+       const ir_edge_t *edge;
+       unsigned mark = 0;
+       int nodes_n = 0;
 
-                       while(phi)
-                       {
-                               add_pred(phi, entry_buffer[entry_i++]);
-                               phi=get_Phi_next(phi);
+       DB((dbg, LEVEL_5, "condition_chains for block %ld\n", get_irn_node_nr(block)));
+
+       /* Collect all outs, including keeps.
+        * (TODO firm function for number of out edges?) */
+       foreach_out_edge_kind(block, edge, EDGE_KIND_NORMAL) {
+               ++nodes_n;
+       }
+
+       /* We do not want to collect more nodes from condition chains, than the limit allows us to.
+        * Also, leave at least one block as body. */
+       if (head_inversion_node_count + nodes_n > inversion_head_node_limit
+                   || head_inversion_block_count + 1 == loop_info.blocks) {
+               set_Block_mark(block, 0);
+
+               return 0;
+       }
+
+       /* First: check our successors, and add all succs that are outside of the loop to the list */
+       foreach_block_succ(block, edge) {
+               ir_node *src = get_edge_src_irn( edge );
+               int pos = get_edge_src_pos( edge );
+
+               if (!is_in_loop(src)) {
+                       out_edge entry;
+
+                       mark = 1;
+                       entry.node = src;
+                       entry.pred_irn_n = pos;
+                       ARR_APP1(out_edge, cond_chain_entries, entry);
+                       mark_irn_visited(src);
+               }
+       }
+
+       if (mark == 0) {
+               /* this block is not part of the chain,
+                * because the chain would become too long or we have no successor outside of the loop */
+
+               set_Block_mark(block, 0);
+               return 0;
+       } else {
+               set_Block_mark(block, 1);
+               ++head_inversion_block_count;
+               DB((dbg, LEVEL_5, "block %ld is part of condition chain\n", get_irn_node_nr(block)));
+               head_inversion_node_count += nodes_n;
+       }
+
+       /* Second: walk all successors, and add them to the list if they are not part of the chain */
+       foreach_block_succ(block, edge) {
+               unsigned inchain;
+               ir_node *src = get_edge_src_irn( edge );
+               int pos = get_edge_src_pos( edge );
+
+               /* already done cases */
+               if (!is_in_loop( src ) || (get_irn_visited(src) >= get_irg_visited(current_ir_graph))) {
+                       continue;
+               }
+
+               mark_irn_visited(src);
+               DB((dbg, LEVEL_5, "condition chain walk %ld\n", get_irn_node_nr(src)));
+               inchain = find_condition_chains(src);
+
+               /* if successor is not part of chain we need to collect its outs */
+               if (!inchain) {
+                       out_edge entry;
+                       entry.node = src;
+                       entry.pred_irn_n = pos;
+                       ARR_APP1(out_edge, cond_chain_entries, entry);
+               }
+       }
+       return mark;
+}
+
+/**
+ * Rewire the loop head and inverted head for loop inversion.
+ */
+static void inversion_fix_heads(void)
+{
+       ir_node **loopheadnins, **invheadnins;
+       ir_node *loophead = loop_cf_head;
+       ir_node *invhead =      get_copy(loophead);
+
+       int headarity =         get_irn_arity(loophead);
+       ir_node *phi;
+       int i;
+
+       int lheadin_c = 0;
+       int iheadin_c = 0;
+
+       int backedges_n = get_backedge_n(loophead, 0);
+       int lhead_arity = backedges_n;
+       int ihead_arity = headarity - backedges_n;
+
+       assert(lhead_arity != 0 && "Loophead has arity 0. Probably wrong backedge informations.");
+       assert(ihead_arity != 0 && "Inversionhead has arity 0. Probably wrong backedge informations.");
+
+       /* new in arrays for all phis in the head blocks */
+       NEW_ARR_A(ir_node *, loopheadnins, lhead_arity);
+       NEW_ARR_A(ir_node *, invheadnins, ihead_arity);
+
+       for_each_phi(loophead, phi) {
+               NEW_ARR_A(ir_node *, get_node_info(phi)->ins, lhead_arity);
+       }
+       for_each_phi(invhead, phi) {
+               NEW_ARR_A(ir_node *, get_node_info(phi)->ins, ihead_arity);
+       }
+
+       for (i = 0; i < headarity; i++) {
+               ir_node *pred = get_irn_n(loophead, i);
+
+               /**
+                * Rewire the head blocks ins and their phi ins.
+                * Requires phi list per block.
+                */
+               if (is_backedge(loophead, i) && !is_alien_edge(loophead, i)) {
+                       /* just copy these edges */
+                       loopheadnins[lheadin_c] = pred;
+                       for_each_phi(loophead, phi) {
+                               get_node_info(phi)->ins[lheadin_c] = get_irn_n(phi, i);
                        }
+                       ++lheadin_c;
                } else {
-                       /* not block */
+                       invheadnins[iheadin_c] = pred;
+                       for_each_phi(invhead, phi) {
+                               get_node_info(phi)->ins[iheadin_c] = get_irn_n(phi, i) ;
+                       }
+                       ++iheadin_c;
+               }
+       }
 
-                       ir_node *cppred, *block, *cpblock, *pred;
+       /* assign the ins to the head blocks */
+       set_irn_in(loophead, ARR_LEN(loopheadnins), loopheadnins);
+       set_irn_in(invhead, ARR_LEN(invheadnins), invheadnins);
 
-                       /**
-                        * pred = get_irn_n(entry.node, entry.pred_irn_n);
-                        * does not work, because we could have changed the nodes preds in construct_ssa
-                        */
+       /* assign the ins for the phis */
+       for_each_phi(loophead, phi) {
+               ir_node **ins = get_node_info(phi)->ins;
+               set_irn_in(phi, lhead_arity, ins);
+       }
 
-                       pred = entry_buffer[entry_i++];
+       for_each_phi(invhead, phi) {
+               ir_node **ins = get_node_info(phi)->ins;
+               set_irn_in(phi, ihead_arity, ins);
+       }
+}
 
-                       //printf("pred %ld\n", pred->node_nr);
-                       cppred = get_copy_of(pred);
-                       //printf("cppred %ld\n", cppred->node_nr);
-                       block = get_nodes_block(pred);
-                       //printf("block %ld\n", block->node_nr);
-                       cpblock = get_nodes_block(cppred);
-                       //printf("cpblock %ld\n", cpblock->node_nr);
+static void inversion_walk(out_edge *head_entries)
+{
+       int i;
+       ir_node *phi;
+       int entry_c = 0;
+       ir_node **entry_buffer;
+       ir_node **head_phi_assign;
 
+       NEW_ARR_A(ir_node *, entry_buffer, ARR_LEN(head_entries));
 
-                       //dump_ir_block_graph(current_ir_graph, "vorher");
-                       construct_ssa(block, pred, cpblock, cppred);
-                       //add_End_keepalive(get_irg_end(current_ir_graph), cppred);
+       head_phi_assign = NEW_ARR_F(ir_node *, 0);
 
+       /* Find assignments in the condition chain, to construct_ssa for them after the loop inversion. */
+       for_each_phi(loop_cf_head , phi) {
+               for(i=0; i<get_irn_arity(phi); ++i) {
+                       ir_node *def = get_irn_n(phi, i);
+                       if (is_nodesblock_marked(def)) {
+                               ARR_APP1(ir_node *, head_phi_assign, def);
+                       }
+               }
+       }
+
+       ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED);
+
+       /**
+        * duplicate condition chain
+        **/
+       inc_irg_visited(current_ir_graph);
 
-                       //add_pred(get_irg_end(current_ir_graph), cppred);
-                       //dump_ir_block_graph(current_ir_graph, "nachher");
+       for(i = 0; i < ARR_LEN(head_entries); ++i) {
+               out_edge entry = head_entries[i];
+               ir_node *node = entry.node;
+               ir_node *pred = get_irn_n(entry.node, entry.pred_irn_n);
 
+               if (is_Block(node)) {
+                       DB((dbg, LEVEL_5, "\nInit walk block %ld\n", get_irn_node_nr(pred)));
+                       copy_walk(pred, is_nodesblock_marked, cur_loop);
+                       duplicate_preds(node, entry.pred_irn_n, get_copy(pred) );
+               } else {
+                       DB((dbg, LEVEL_5, "\nInit walk node  %ld\n", get_irn_node_nr(pred)));
+                       copy_walk(pred, is_nodesblock_marked, cur_loop);
 
+                       /* ignore keepalives */
+                       if (!is_End(node))
+                               /* Node is user of a value assigned inside the loop.
+                                * We will need a phi since we duplicated the head. */
+                               entry_buffer[entry_c++] = pred;
                }
        }
+
+       ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED);
+
+       inversion_fix_heads();
+
+       /* Generate phis for users of values assigned in the condition chain
+        * and read in the loops body */
+       construct_ssa_foreach(entry_buffer, entry_c);
+
+       /* Generate phis for values that are assigned in the condition chain
+        * but not read in the loops body. */
+       construct_ssa_foreach(head_phi_assign, ARR_LEN(head_phi_assign));
+
+       loop_cf_head = get_copy(loop_cf_head);
 }
 
-void decision_maker(void)
+/* Loop peeling */
+void loop_peeling(void)
 {
-       //inc_irg_visited(current_ir_graph);
-       //loop_walker( loop_entries, NULL, get_invariants, NULL );
+       cur_loop_outs = NEW_ARR_F(out_edge, 0);
+       irg_walk_graph( current_ir_graph, get_loop_outs, NULL, NULL );
 
+       peel(cur_loop_outs);
+
+       /* clean up */
+       reset_node_infos();
+
+       set_irg_doms_inconsistent(current_ir_graph);
+       set_irg_loopinfo_inconsistent(current_ir_graph);
+       set_irg_outs_inconsistent(current_ir_graph);
+
+       DEL_ARR_F(cur_loop_outs);
+}
+
+/* Loop inversion */
+void loop_inversion(void)
+{
+       unsigned do_inversion = 1;
+
+       inversion_head_node_limit = INT_MAX;
+
+       /* Search for condition chains. */
+       ir_reserve_resources(current_ir_graph, IR_RESOURCE_BLOCK_MARK);
+
+       irg_walk_graph(current_ir_graph, reset_block_mark, NULL, NULL);
+
+       loop_info.blocks = get_loop_n_blocks(cur_loop);
+       cond_chain_entries = NEW_ARR_F(out_edge, 0);
+
+       head_inversion_node_count = 0;
+       head_inversion_block_count = 0;
+
+       set_Block_mark(loop_cf_head, 1);
+       mark_irn_visited(loop_cf_head);
        inc_irg_visited(current_ir_graph);
-       peel();
 
+       find_condition_chains(loop_cf_head);
+
+       DB((dbg, LEVEL_3, "Loop contains %d blocks.\n", loop_info.blocks));
+       if (loop_info.blocks < 2) {
+               do_inversion = 0;
+               DB((dbg, LEVEL_3, "Loop contains %d (less than 2) blocks => No Inversion done.\n", loop_info.blocks));
+       }
+
+       /* We also catch endless loops here,
+        * because they do not have a condition chain. */
+       if (head_inversion_block_count < 1) {
+               do_inversion = 0;
+               DB((dbg, LEVEL_3, "Loop contains %d (less than 1) invertible blocks => No Inversion done.\n", head_inversion_block_count));
+       }
+
+       if (do_inversion) {
+               cur_head_outs = NEW_ARR_F(out_edge, 0);
+
+               /* Get all edges pointing into the head or condition chain (outs). */
+               irg_walk_graph(current_ir_graph, get_head_outs, NULL, NULL);
+               inversion_walk(cur_head_outs);
+
+               DEL_ARR_F(cur_head_outs);
+
+               set_irg_doms_inconsistent(current_ir_graph);
+               set_irg_loopinfo_inconsistent(current_ir_graph);
+               set_irg_outs_inconsistent(current_ir_graph);
+       }
+
+       /* free */
+       DEL_ARR_F(cond_chain_entries);
+       ir_free_resources(current_ir_graph, IR_RESOURCE_BLOCK_MARK);
 }
 
+/**
+ * Returns last element of linked list of copies by
+ * walking the linked list.
+ */
+ir_node *get_last_copy(ir_node *node)
+{
+       ir_node *copy, *cur;
+       cur = node;
+       while ((copy = get_copy(cur))) {
+               cur = copy;
+       }
+       return cur;
+}
 
 /**
- * TODO use list , not arr_F
+ * Rewire floating copies of the current loop.
  */
-void analyze_loop(ir_loop *loop)
+void unrolling_fix_cf(void)
 {
-       /* Init new for every loop */
-       loop_cf_head = NULL;
-       loop_cf_head_valid = 1;
-       has_sto = 0;
+       ir_node *loophead = loop_cf_head;
+       int headarity =         get_irn_arity(loophead);
+       ir_node *phi, *headnode;
+       /*ir_node *high, *low;*/
+       int i;
 
-       cur_loop = loop;
+       int uhead_in_n = 0;
+       int backedges_n = get_backedge_n(loophead, 0);
+       int unroll_arity = backedges_n;
 
-       /* arrays */
-       backedges = NEW_ARR_F(loop_entry_t, 0);
-       alien_backedges = NEW_ARR_F(loop_entry_t, 0);
-       loop_entries = NEW_ARR_F(loop_entry_t, 0);
-       head_edges = NEW_ARR_F(loop_entry_t, 0);
+       /* Create ins for all heads and their phis */
+       headnode = get_copy(loophead);
+       for_each_copy(headnode) {
+               NEW_ARR_A(ir_node *, get_node_info(headnode)->ins, unroll_arity);
+               for_each_phi(headnode, phi) {
+                       NEW_ARR_A(ir_node *, get_node_info(phi)->ins, unroll_arity);
+               }
+       }
 
-       inc_irg_visited( current_ir_graph );
-       irg_walk_graph( current_ir_graph, block_phi_walker, NULL, NULL );
+       /* Append the copies to the existing loop. */
+       for (i = 0; i < headarity; i++) {
+               ir_node *upper_head = loophead;
+               ir_node *lower_head = get_copy(loophead);
 
-       /* Collect all backedges */
-       for_each_loop_block(loop, collect_backedges, NULL );
+               ir_node *upper_pred = get_irn_n(loophead, i);
+               ir_node *lower_pred = get_copy(get_irn_n(loophead, i));
 
-       /* Find loop entries walk, find head */
-       inc_irg_visited( current_ir_graph );
-       irg_walk_graph( current_ir_graph, find_loop_entries_walk, NULL, NULL );
+               ir_node *last_pred;
 
-       /* RETURN if there is no valid head */
-       if (!loop_cf_head || !loop_cf_head_valid)
-       {
-               //DBG printf("NOTE: There is no valid loop head. Nothing done.\n");
-               return;
+               /**
+                * Build unrolled loop top down
+                */
+               if (is_backedge(loophead, i) && !is_alien_edge(loophead, i)) {
+                       for_each_copy2(upper_head, lower_head, upper_pred, lower_pred) {
+                               get_node_info(lower_head)->ins[uhead_in_n] = upper_pred;
+
+                               for_each_phi(upper_head, phi) {
+                                       ir_node *phi_copy = get_copy(phi);
+                                       get_node_info(phi_copy)->ins[uhead_in_n] = get_irn_n(phi, i);
+                               }
+                       }
+
+                       last_pred = upper_pred;
+                       ++uhead_in_n;
+
+                       /* Fix the topmost loop heads backedges. */
+                       set_irn_n(loophead, i, last_pred);
+                       for_each_phi(loophead, phi) {
+                               ir_node *last_phi = get_last_copy(phi);
+                               ir_node *pred = get_irn_n(last_phi, i);
+                               set_irn_n(phi, i, pred);
+                       }
+               }
+       }
+
+       headnode = get_copy(loophead);
+       for_each_copy(headnode) {
+               set_irn_in(headnode, unroll_arity, get_node_info(headnode)->ins);
+               for_each_phi(headnode, phi) {
+                       set_irn_in(phi, unroll_arity, get_node_info(phi)->ins);
+               }
        }
+}
+
+#if 0
+static ir_node *add_phi(ir_node *node, int phi_pos)
+{
+       ir_mode *mode;
+       ir_node *phi;
+       ir_node **in;
+       mode = get_irn_mode(get_irn_n(node, phi_pos));
+       ir_node *block = get_nodes_block(node);
+       int n_cfgpreds = get_irn_arity(block);
+       ir_node *pred = get_irn_n(node, phi_pos);
+       int i;
+
+       /* create a new Phi */
+       NEW_ARR_A(ir_node*, in, n_cfgpreds);
+       for(i = 0; i < n_cfgpreds; ++i)
+               in[i] = new_Unknown(mode);  /*pred;*/
 
-       decision_maker();
+       phi = new_r_Phi(block, n_cfgpreds, in, mode);
 
-       // TODO free all link states... or better put them on functionstack
+       assert(phi && "phi null");
+       assert(is_Bad(phi) && "phi bad");
 
-       /* FREE */
-       DEL_ARR_F(loop_entries);
-       DEL_ARR_F(backedges);
-       DEL_ARR_F(alien_backedges);
-       DEL_ARR_F(head_edges);
+       /* Important: always keep block phi list up to date. */
+       add_Block_phi(block, phi);
+       /* EVERY node is assumed to have a node_info linked. */
+       alloc_node_info(phi, NULL);
 
-       //dump_ir_block_graph(current_ir_graph, "-lu1");
+       set_irn_n(node, phi_pos, phi);
+       return phi;
 }
+#endif
+
 
 /**
- *     Find most inner loops and send them to analyze_loop
+ * Loop unrolling
+ * Could be improved with variable range informations.
  */
-void analyze_inner_loop(ir_loop *loop)
+void loop_unrolling(void)
 {
-       /* descend into sons */
-       int sons = get_loop_n_sons(loop);
+       int i, j;
 
-       //printf("found %d loops \n", sons);
+       unroll_times = 8;
 
-       if (sons==0)
-       {
-               //printf("analyze loop %ld\n", loop->loop_nr);
-               analyze_loop(loop);
+       cur_loop_outs = NEW_ARR_F(out_edge, 0);
+       irg_walk_graph( current_ir_graph, get_loop_outs, NULL, NULL );
+
+       ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED);
+
+       /* duplicate whole loop content */
+       inc_irg_visited(current_ir_graph);
+
+       for(i = 0; i < ARR_LEN(cur_loop_outs); i++) {
+               out_edge entry = cur_loop_outs[i];
+               ir_node *node = entry.node;
+               ir_node *pred = get_irn_n(entry.node, entry.pred_irn_n);
+
+               if (!is_Block(node)) {
+
+                       for (j = 0; j < unroll_times-1; ++j) {
+                               copy_walk(pred, is_in_loop, cur_loop);
+
+                               pred = get_copy(pred);
+                       }
+               }
        }
-       else
-       {
-               int s;
-               for(s=0; s<sons; s++)
-               {
-                       //printf("descend into loop son %ld\n", get_loop_son(loop, s)->loop_nr);
-                       analyze_inner_loop( get_loop_son(loop, s) );
+
+       for(i = 0; i < ARR_LEN(cur_loop_outs); i++) {
+               out_edge entry = cur_loop_outs[i];
+               ir_node *node = entry.node;
+               ir_node *pred = get_irn_n(entry.node, entry.pred_irn_n);
+
+               /* build linked list of copied nodes/blocks */
+               if (is_Block(node)) {
+                       for (j = 0; j < unroll_times-1; ++j) {
+                               copy_walk(pred, is_in_loop, cur_loop);
+                               duplicate_preds(node, entry.pred_irn_n, get_copy(pred));
+
+                               pred = get_copy(pred);
+                       }
+               }
+       }
+
+       ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED);
+
+       /* Line up the floating copies. */
+       unrolling_fix_cf();
+
+       /* Generate phis for all loop outs */
+       for(i = 0; i < ARR_LEN(cur_loop_outs); i++) {
+               out_edge entry = cur_loop_outs[i];
+               ir_node *node = entry.node;
+               ir_node *pred = get_irn_n(entry.node, entry.pred_irn_n);
+
+               if (!is_Block(node) && !is_End(node)) {
+                       DB((dbg, LEVEL_1, "  construct_ssa_n def %ld  node %ld  pos %d\n",
+                                       get_irn_node_nr(pred), get_irn_node_nr(node), entry.pred_irn_n));
+                       construct_ssa_n(pred, node);
                }
        }
+
+       DEL_ARR_F(cur_loop_outs);
+
+       set_irg_doms_inconsistent(current_ir_graph);
+       set_irg_loopinfo_inconsistent(current_ir_graph);
+       set_irg_outs_inconsistent(current_ir_graph);
 }
 
+/* Initialization and */
+static void init_analyze(ir_loop *loop)
+{
+       /* Init new for every loop */
+       cur_loop = loop;
+
+       loop_cf_head = NULL;
+       loop_cf_head_valid = 1;
+       loop_inv_head = NULL;
+       loop_peeled_head = NULL;
+
+       loop_info.outs = 0;
+       loop_info.calls = 0;
+       loop_info.loads = 0;
+       loop_info.blocks = 0;
+
+       DB((dbg, LEVEL_2, "  >>>> current loop includes node %ld <<<\n", get_irn_node_nr(get_loop_node(loop, 0))));
+
+       irg_walk_graph(current_ir_graph, get_loop_info, NULL, NULL);
+
+       /* RETURN if there is no valid head */
+       if (!loop_cf_head || !loop_cf_head_valid) {
+               DB((dbg, LEVEL_2,   "No valid loop head. Nothing done.\n"));
+               return;
+       }
+
+       if (enable_peeling)
+               loop_peeling();
 
+       if (enable_inversion)
+               loop_inversion();
+       if (enable_unrolling)
+               loop_unrolling();
+
+#if 0
+       /* RETURN if there is a call in the loop */
+       if (loop_info.calls)
+               return;
+#endif
+
+       DB((dbg, LEVEL_2, "      <<<< end of loop with node %ld >>>>\n", get_irn_node_nr(get_loop_node(loop, 0))));
+}
 
-//
-//void phicheck(ir_node *node, void * env)
-//{
-//     if (!is_Block(node)) return;
-//
-//     ir_node *phi=get_Block_phis(node);
-//     while(phi)
-//     {
-//             if (!is_Phi(phi))
-//             {
-//                     printf("NOT PHI %ld\n", phi->node_nr);
-//                     phi = NULL;
-//             } else {
-//                     phi=get_Phi_next(phi);
-//             }
-//     }
-//}
-
-void loop_unroll(ir_graph *irg)
+/* Find most inner loops and send them to analyze_loop */
+static void find_most_inner_loop(ir_loop *loop)
 {
-       //printf(" --- loop unroll start --- \n");
+       /* descend into sons */
+       int sons = get_loop_n_sons(loop);
 
-       //irg_walk_graph(irg, phicheck, NULL, NULL);
+       if (sons == 0) {
+               loop_element elem;
+               int el_n, i;
 
+               el_n = get_loop_n_elements(loop);
+
+               for (i=0; i < el_n; ++i) {
+                       elem = get_loop_element(loop, i);
+                       /* We can only rely on the blocks,
+                        * as the loop attribute of the nodes seems not to be set. */
+                       if (is_ir_node(elem.kind) && is_Block(elem.node)) {
+                               ARR_APP1(ir_node *, loops, elem.node);
+                               DB((dbg, LEVEL_5, "Found most inner loop (contains block %+F)\n", elem.node));
+                               break;
+                       }
+               }
+       } else {
+               int s;
+               for(s=0; s<sons; s++) {
+                       find_most_inner_loop(get_loop_son(loop, s));
+               }
+       }
+}
+
+/**
+ * Assure preconditions are met and go through all loops.
+ */
+void loop_optimization(ir_graph *irg)
+{
        ir_loop *loop;
+       int     i, sons, nr;
+
+       /* Init */
+       link_node_state_list = NULL;
+       set_current_ir_graph(irg);
+
+       /* preconditions */
+       edges_assure(irg);
+       assure_irg_outs(irg);
 
+       /* NOTE: sets only the loop attribute of blocks, not nodes */
+       /* NOTE: Kills links */
        assure_cf_loop(irg);
 
+       ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
+       collect_phiprojs(irg);
+       ir_free_resources(irg, IR_RESOURCE_IRN_LINK);
+
+       /* allocate node_info for additional information on nodes */
+       ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK);
+       irg_walk_graph(current_ir_graph, alloc_node_info, NULL, NULL);
+
        loop = get_irg_loop(irg);
-       int sons = get_loop_n_sons(loop);
-       //printf("FOUND %d LOOPS \n", sons);
-       int nr;
-       for (nr=0; nr<sons; nr++)
-       {
-               analyze_inner_loop( get_loop_son(loop, nr) );
+       sons = get_loop_n_sons(loop);
+
+       loops = NEW_ARR_F(ir_node *, 0);
+
+       for (nr = 0; nr < sons; ++nr) {
+               find_most_inner_loop(get_loop_son(loop, nr));
        }
 
-       //DBG
-       //printf(" --- loop unroll done --- \n");
+/* TODO Keep backedges during optimization to avoid
+ * this ugly allocation and deallocation.
+ * (set_irn_in seems to destroy them)
+ */
+#if 0
+       for (i = 0; i < ARR_LEN(loops); ++i) {
+               ir_loop *loop;
+
+               loop = get_irn_loop(loops[i]);
+               init_analyze(loop);
+       }
+#else
+       /* This part is useful for testing
+        * or has to be used if the backedge information is destroyed.
+        * Which is the case at the moment, because the backedge information gets lost
+        * before inversion_fix_heads/unrolling_fix_cf, which results in bads.
+        * NOTE!: Testsuite runs successfully nevertheless...
+        */
+
+       /**
+        * assure_cf_loop() creates a completely new loop tree.
+        * Thus we cannot optimize a loop, assure_cf_loop() and continue with the next loop,
+        * as the next loop must be searched because it is not distinguishable from the
+        * already done loops.
+        * The links of the loops are also not available anymore (to store a "loop done" flag).
+        * Therefore we save a block per loop.
+        * NOTE: We rely on the loop optimizations not to remove any block from the loop.
+        * Later, we fetch the blocks loop attribute, as it is updated by assure_cf_loop.
+        */
+       for (i = 0; i < ARR_LEN(loops); ++i) {
+               ir_loop *loop;
+
+               free_node_info();
+               ir_free_resources(irg, IR_RESOURCE_IRN_LINK);
+
+               edges_assure(current_ir_graph);
+               assure_irg_outs(current_ir_graph);
+
+               /* NOTE: sets only the loop attribute of blocks */
+               /* NOTE: Kills links */
+               assure_cf_loop(current_ir_graph);
+
+               irg_walk_graph(current_ir_graph, alloc_node_info, NULL, NULL);
+               ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK);
+
+               /* Get loop from block */
+               loop = get_irn_loop(loops[i]);
+               init_analyze(loop);
+       }
+#endif
+
+       /* Free */
+       DEL_ARR_F(loops);
+
+       free_node_info();
+       ir_free_resources(irg, IR_RESOURCE_IRN_LINK);
+       ir_free_resources(irg, IR_RESOURCE_PHI_LIST);
+}
+
+void do_loop_unrolling(ir_graph *irg)
+{
+       enable_unrolling = 1;
+       enable_peeling = 0;
+       enable_inversion = 0;
+
+       DB((dbg, LEVEL_2, " >>> unrolling (Startnode %ld) <<<\n",
+               get_irn_node_nr(get_irg_start(irg))));
+
+       loop_optimization(irg);
+
+       DB((dbg, LEVEL_2, " >>> unrolling done (Startnode %ld) <<<\n",
+               get_irn_node_nr(get_irg_start(irg))));
 }
 
-struct loop_unroll_pass_t {
-       ir_graph_pass_t pass;
-};
+void do_loop_inversion(ir_graph *irg)
+{
+       enable_unrolling = 0;
+       enable_peeling = 0;
+       enable_inversion = 1;
 
-/**
- * Wrapper to run ...() as a ir_prog pass.
- */
-static int loop_unroll_wrapper(ir_graph *irg, void *context) {
+       DB((dbg, LEVEL_2, " >>> inversion (Startnode %ld) <<<\n",
+               get_irn_node_nr(get_irg_start(irg))));
 
-       (void)context;
-       loop_unroll(irg);
-       return 0;
+       loop_optimization(irg);
+
+       DB((dbg, LEVEL_2, " >>> inversion done (Startnode %ld) <<<\n",
+               get_irn_node_nr(get_irg_start(irg))));
 }
 
-//TODO ??
-ir_graph_pass_t *loop_unroll_pass(const char *name)
+void do_loop_peeling(ir_graph *irg)
 {
-       struct loop_unroll_pass_t *pass =
-               XMALLOCZ(struct loop_unroll_pass_t);
+       enable_unrolling = 0;
+       enable_peeling = 1;
+       enable_inversion = 0;
+
+       DB((dbg, LEVEL_2, " >>> peeling (Startnode %ld) <<<\n",
+               get_irn_node_nr(get_irg_start(irg))));
+
+       loop_optimization(irg);
+
+       DB((dbg, LEVEL_2, " >>> peeling done (Startnode %ld) <<<\n",
+               get_irn_node_nr(get_irg_start(irg))));
 
-       return def_graph_pass_constructor(
-               &pass->pass, name ? name : "loop_unroll",
-               loop_unroll_wrapper);
 }
 
-/*
-void firm_init_loopunroll(void) {
-       FIRM_DBG_REGISTER(dbg, "firm.opt.loopunroll");
-}*/
+void firm_init_loop_opt(void)
+{
+       FIRM_DBG_REGISTER(dbg, "firm.opt.loop");
+}