irgraph: Use get_irg_obstack() instead of accessing irg->obst directly.
[libfirm] / ir / opt / gvn_pre.c
index 08dcdf1..e4bc76b 100644 (file)
 #include "irpass.h"
 #include "valueset.h"
 #include "irloop.h"
+#include "firmstat_t.h"
 
 #include "irgraph_t.h"
 #include "irnode_t.h"
 #include "iropt_t.h"
 #include "plist.h"
 
+/* suggested by GVN-PRE authors */
 #define MAX_ANTIC_ITER 10
 #define MAX_INSERT_ITER 3
 
-#define PARTLY_ONLY 0
-#define HOIST_HIGH 0
-#define BETTER_GREED 0
+/* Stops antic iteration from processing endless loops. */
+#define IGNORE_INF_LOOPS 1
+/* Stops antic information to flow over infinite loop backedge */
+#define NO_INF_LOOPS 0
+
+/* Attempt to reduce register pressure and reduce code size
+   for hoisted nodes. */
+#define HOIST_HIGH 1
+#define COMMON_DOM 1
+
+/* Seamless implementation of handling loads and generally memory
+   dependent nodes with GVN-PRE. */
 #define LOADS 1
 #define DIVMODS 0
-#define NO_INF_LOOPS 0
-#define NO_INF_LOOPS2 0
+
+/* Experimental */
+
+/* Only optimize node directly after phi node if node is only successor. */
+#define MIN_CUT 0
+
+/* GVN is very limited. This enables optimize_node during value recognition.
+   GVN ist still very limited, since a+1+b+1 doesn't equal a+b+2.
+   TODO Broken for yet unknown reasons. */
+#define OPTIMIZE_NODES 0
+
 
 /** Additional info we need for every block. */
 typedef struct block_info {
        ir_valueset_t     *exp_gen;    /* contains this blocks clean expressions */
        ir_valueset_t     *avail_out;  /* available values at block end */
        ir_valueset_t     *antic_in;   /* clean anticipated values at block entry */
-       ir_valueset_t     *antic_done; /* keeps elements of antic_in after insert nodes phase*/
+       ir_valueset_t     *antic_done; /* keeps elements of antic_in after insert nodes phase */
        ir_valueset_t     *new_set;    /* new by hoisting made available values */
        ir_nodehashmap_t  *trans;      /* contains translated nodes translated into block */
        ir_node           *avail;      /* saves available node for insert node phase */
@@ -75,8 +95,8 @@ typedef struct block_info {
 
 /**
  * A pair of nodes to be exchanged.
- * We have to defer the exchange because our hash-sets cannot
- * find an already replaced node.
+ * We have to defer the exchange because there are still needed references
+ * to certain nodes.
  */
 typedef struct elim_pair {
        ir_node *old_node;      /* node that will be replaced */
@@ -98,9 +118,15 @@ typedef struct pre_env {
        unsigned        last_idx;     /* last node index of input graph */
        char            changes;      /* flag for fixed point iterations - non-zero if changes occurred */
        char            first_iter;   /* non-zero for first fixed point iteration */
+       int             iteration;    /* iteration counter */
+#if OPTIMIZE_NODES
+       pset           *value_table;   /* standard value table*/
+       pset           *gvnpre_values; /* gvnpre value table */
+#endif
 } pre_env;
 
 static pre_env *environ;
+
 /* custom GVN value map */
 static ir_nodehashmap_t value_map;
 
@@ -127,20 +153,20 @@ typedef struct gvnpre_statistics {
        int infinite_loops;
 } gvnpre_statistics;
 
-gvnpre_statistics *gvnpre_stats = NULL;
+static gvnpre_statistics *gvnpre_stats = NULL;
 
-static void init_stats()
+static void init_stats(void)
 {
        gvnpre_stats = XMALLOCZ(gvnpre_statistics);
 }
 
-static void free_stats()
+static void free_stats(void)
 {
        free(gvnpre_stats);
        gvnpre_stats = NULL;
 }
 
-static void print_stats()
+static void print_stats(void)
 {
        gvnpre_statistics *stats = gvnpre_stats;
        DB((dbg, LEVEL_1, "replaced             : %d\n", stats->replaced));
@@ -232,12 +258,14 @@ static int compare_gvn_identities(const void *elt, const void *key)
        if (is_Phi(a) || is_Phi(b))
                return 1;
 
-       /* memops are not the same, even if we want optimize them
+       /* memops are not the same, even if we want to optimize them
           we have to take the order in account */
-       if (is_memop(a) || is_memop(b)) {
+       if (is_memop(a) || is_memop(b) ||
+           get_irn_mode(a) == mode_T ||
+           get_irn_mode(b) == mode_T) {
                /* Loads with the same predecessors are the same value;
                   this should only happen after phi translation. */
-               if (! is_Load(a) || ! is_Load(b))
+               if ((! is_Load(a) || ! is_Load(b)) && (! is_Store(a) || ! is_Store(b)))
                        return 1;
        }
 
@@ -335,9 +363,7 @@ static ir_node *remember(ir_node *irn)
                in[i] = pred_value;
        }
 
-       /* do not create new value for loads or else it will not
-          be found in avail_out during insert phase */
-       if (changed && ! is_Load(irn)) {
+       if (changed && !is_memop(irn) && get_irn_mode(irn) != mode_X) {
                /* create representative for */
                ir_node *nn = new_ir_node(
                        get_irn_dbg_info(irn),
@@ -349,6 +375,14 @@ static ir_node *remember(ir_node *irn)
                        in);
                copy_node_attr(environ->graph, irn, nn);
 
+#if OPTIMIZE_NODES
+               /* TODO optimize_node() uses the valuetable and thus the custom
+                  identify_cmp() and will fail trying. */
+               environ->graph->value_table = environ->value_table;
+               nn = optimize_node(nn);
+               environ->graph->value_table = environ->gvnpre_values;
+#endif
+
                /* now the value can be determined because the
                   predecessors are the leaders */
                value = identify_remember(nn);
@@ -363,8 +397,9 @@ static ir_node *remember(ir_node *irn)
        return value;
 }
 
-/** When the value map has been built we may lookup expressions
- *  and remember them if new.
+/**
+ * When the value map has been built we may lookup expressions
+ * and remember them if new.
  */
 static ir_node *identify_or_remember(ir_node *irn)
 {
@@ -575,7 +610,7 @@ static void analyse_loops(ir_graph *irg)
        ir_free_resources(irg, IR_RESOURCE_BLOCK_MARK);
 }
 
-#if NO_INF_LOOPS || NO_INF_LOOPS2
+#if IGNORE_INF_LOOPS || NO_INF_LOOPS
 /**
  * Returns non-zero if block is part of an infinite loop.
  */
@@ -621,6 +656,8 @@ static unsigned is_nice_value(ir_node *n)
 #if LOADS
        if (is_Load(n))
                return get_Load_volatility(n) == volatility_non_volatile;
+       if (is_Store(n))
+               return get_Store_volatility(n) == volatility_non_volatile;
 #endif
 
        if (get_irn_pinned(n) == op_pin_state_pinned)
@@ -650,9 +687,27 @@ static unsigned is_clean_in_block(ir_node *n, ir_node *block, ir_valueset_t *val
        if (! is_nice_value(n))
                return 0;
 
-       /* filter loads from antic_in */
+#if LOADS
+       /* filter loads with no phi predecessor from antic_in */
        if (is_Load(n) && ! is_Phi(get_Load_mem(n)))
                return 0;
+       if (is_Store(n) && ! is_Phi(get_Store_mem(n)))
+               return 0;
+#endif
+
+#if DIVMODS
+       if (is_Div(n)) {
+               ir_node *mem = get_Div_mem(n);
+
+               mem = skip_Pin(mem);
+
+               if (! is_Phi(mem) && ! is_NoMem(mem))
+                       return 0;
+       }
+
+       if (is_Mod(n) && ! is_Phi(get_Mod_mem(n)))
+               return 0;
+#endif
 
        arity = get_irn_arity(n);
        for (i = 0; i < arity; ++i) {
@@ -694,20 +749,28 @@ static void topo_walker(ir_node *irn, void *ctx)
        if (is_Block(irn))
                return;
 
+#if OPTIMIZE_NODES
+       environ->graph->value_table = environ->value_table;
+       identify_remember(irn);
+       environ->graph->value_table = environ->gvnpre_values;
+#endif
+
        /* GVN step: remember the value. */
        value = remember(irn);
 
-       /* values that are not in antic_in also dont't need to be in any other set */
-       if (! is_nice_value(irn))
-               return;
-
        if (is_irn_constlike(irn))
                return;
 
        block = get_nodes_block(irn);
        info  = get_block_info(block);
 
-       ir_valueset_insert(info->avail_out, value, irn);
+       if (get_irn_mode(irn) != mode_X)
+               ir_valueset_insert(info->avail_out, value, irn);
+
+       /* values that are not in antic_in also dont't need to be in any other set */
+
+       if (! is_nice_value(irn))
+               return;
 
        if (is_clean_in_block(irn, block, info->exp_gen)) {
                DB((dbg, LEVEL_3, "%+F clean in block %+F\n", irn, block));
@@ -755,70 +818,6 @@ static void set_translated(ir_nodehashmap_t *map, ir_node *node, ir_node *trans)
        ir_nodehashmap_insert(map, node, trans);
 }
 
-#if DIVMODS
-/* Helper function to compare the values of pred and avail_pred. */
-static unsigned match_pred(ir_node *pred, ir_node *avail_pred, ir_node *block, int pos)
-{
-       ir_node *avail_value = identify(avail_pred);
-       ir_node *pred_block  = get_Block_cfgpred_block(block, pos);
-       ir_node *trans_pred  = get_translated(pred_block, pred);
-       ir_node *value;
-
-       if (trans_pred == NULL)
-               trans_pred = pred;
-       value = identify(trans_pred);
-
-       DB((dbg, LEVEL_3, "manual compare %+F  %+F\n", pred, avail_pred));
-
-       return (value == avail_value);
-}
-
-/**
- * Does phi translation for redundant Div/Mod nodes only.
- * Returns NULL for non-redundant node, which needs to be phi translated.
- */
-static ir_node *phi_translate_divmod(ir_node *divmod, ir_node *block, int pos)
-{
-       ir_node *mem   = get_memop_mem(divmod);
-       ir_node *trans = get_translated_pred(block, pos, mem);
-
-       if (trans == NULL)
-               trans = mem;
-
-       /* no partial redundancy if this is a mode_M phi */
-       if (is_Proj(trans)) {
-               /* The last memory operation in predecessor block */
-               ir_node *avail_op = get_Proj_pred(trans);
-
-               if (get_irn_op(divmod) == get_irn_op(avail_op)) {
-                       unsigned left, right;
-
-                       if (is_Div(avail_op)) {
-                               if (get_Div_resmode(divmod) == get_Div_resmode(avail_op) &&
-                                   get_Div_no_remainder(divmod) == get_Div_no_remainder(avail_op)) {
-
-                                       left  = match_pred(get_Div_left(divmod), get_Div_left(avail_op), block, pos);
-                                       right = match_pred(get_Div_right(divmod), get_Div_right(avail_op), block, pos);
-
-                                       if (left && right)
-                                               return avail_op;
-                               }
-                       } else if (is_Mod(avail_op)) {
-                           if (get_Mod_resmode(divmod) == get_Mod_resmode(avail_op)) {
-
-                                       left  = match_pred(get_Mod_left(divmod), get_Mod_left(avail_op), block, pos);
-                                       right = match_pred(get_Mod_right(divmod), get_Mod_right(avail_op), block, pos);
-
-                                       if (left && right)
-                                               return avail_op;
-                               }
-                       }
-               }
-       }
-       return NULL;
-}
-#endif
-
 /**
  * Translates an expression above a Phi.
  *
@@ -845,14 +844,6 @@ static ir_node *phi_translate(ir_node *node, ir_node *block, int pos, ir_valuese
        }
        arity = get_irn_arity(node);
 
-#if DIVMODS
-       if (is_Div(node) || is_Mod(node)) {
-               ir_node *avail_op = phi_translate_divmod(node, block, pos);
-               if (avail_op)
-                       return avail_op;
-       }
-#endif
-
        needed = 0;
        in = ALLOCANZ(ir_node *, arity);
 
@@ -870,28 +861,46 @@ static ir_node *phi_translate(ir_node *node, ir_node *block, int pos, ir_valuese
 
                if (! leader)
                        leader = pred;
+
                /* we cannot find this value in antic_in, because the value
                   has (possibly) changed! */
                pred_trans  = get_translated(pred_block, leader);
 
+
+#if DIVMODS
+               if (is_Div(node)) {
+                       ir_node *mem = get_Div_mem(node);
+
+                       mem = skip_Pin(mem);
+
+                       if (! is_Phi(mem))
+                               pred_trans = get_Div_mem(node);
+               }
+#endif
+
                DB((dbg, LEVEL_3, "trans %+F of %+F is  %+F\n", leader, pred_block, pred_trans));
                if (pred_trans == NULL) {
                        new_pred = pred;
                } else {
                        new_pred = pred_trans;
 
-                       /* loads: in case of memory projection and load,
-                          skip them and use the loads memory */
+                       /* loads: Predecessor is a memory phi, which translated yields a proj or
+                          another phi. In case of projection and a load predecessor,
+                          skip them and use the loads memory. */
                        if (is_Proj(pred_trans) && get_irn_mode(pred_trans) == mode_M) {
-                               ir_node *load = get_Proj_pred(pred_trans);
-                               /* translated predecessor will be different */
+#if LOADS || DIVMODS
+                               ir_node *loadstore = get_Proj_pred(pred_trans);
+                               /* If we do not translate this node, we will get its value wrong. */
                                needed |= 1;
 
-                               if (is_Load(load)) {
-                                       /* Put new load under the adjacent loads mem
+                               if (is_Load(loadstore)) {
+                                       /* Put new load under the adjacent loads memory edge
                                           such that GVN may compare them. */
-                                       new_pred = get_Load_mem(load);
+                                       new_pred = get_Load_mem(loadstore);
+                               } else if (is_Store(loadstore)) {
+                                       new_pred = get_Store_mem(loadstore);
                                }
+#endif
                        } else {
                                /* predecessor value changed, so translation is needed */
                                if (identify(new_pred) != identify(pred))
@@ -969,7 +978,7 @@ static void compute_antic(ir_node *block, void *ctx)
 
        /* add exp_gen */
        if (env->first_iter) {
-#if NO_INF_LOOPS
+#if IGNORE_INF_LOOPS
                /* keep antic_in of infinite loops empty */
                if (! is_in_infinite_loop(block)) {
                        foreach_valueset(info->exp_gen, value, expr, iter) {
@@ -1002,23 +1011,23 @@ static void compute_antic(ir_node *block, void *ctx)
 
                        if (trans == NULL)
                                trans = phi_translate(expr, succ, pos, get_block_info(succ)->antic_in);
+
                        /* create new value if necessary */
                        trans_value = identify_or_remember(trans);
 
                        DB((dbg, LEVEL_3, "Translate %+F %+F to %d = %+F (%+F)\n", expr, succ, pos, trans, trans_value));
 
-                       /* on value change (phi present) we need the translated node
+                       /* On value change (phi present) we need the translated node
                           to represent the new value for possible further translation. */
                        if (value != trans_value)
                                represent = trans;
                        else
                                represent = expr;
 
-                       // test only 1 translation, because more failed, because of uncleanliness of load
-                       if (is_clean_in_block(expr, block, info->antic_in) && ! is_Load(expr)) {
-#if NO_INF_LOOPS2
-                               /* TODO to be determined why nearly every spec benchmark fails */
-                               if (! is_in_infinite_loop(succ) || ! is_backedge(succ, pos)) {
+                       if (is_clean_in_block(expr, block, info->antic_in)) {
+#if NO_INF_LOOPS
+                               /* Prevent information flow over the backedge of endless loops. */
+                               if (env->iteration <= 2 || (is_backedge(succ, pos) && !is_in_infinite_loop(succ))) {
                                        ir_valueset_replace(info->antic_in, trans_value, represent);
                                }
 #else
@@ -1140,7 +1149,11 @@ static ir_mode *is_partially_redundant(ir_node *block, ir_node *expr, ir_node *v
                trans_expr = get_translated(pred_block, expr);
                trans_value = identify(trans_expr);
 
-               avail_expr = (ir_node*)ir_valueset_lookup(pred_info->avail_out, trans_value);
+               if (is_Const(trans_expr))
+                       avail_expr = trans_expr;
+               else
+                       avail_expr = (ir_node*)ir_valueset_lookup(pred_info->avail_out, trans_value);
+
                /* value might be available through a not yet existing constant */
                if (avail_expr == NULL && is_Const(trans_expr)) {
                        /* limit range of new constants */
@@ -1149,11 +1162,12 @@ static ir_mode *is_partially_redundant(ir_node *block, ir_node *expr, ir_node *v
                        ir_tarval *lower = new_tarval_from_long(-127, cmode);
                        ir_tarval *c     = get_Const_tarval(trans_expr);
 
-                       if (tarval_cmp(lower, c) != ir_relation_greater_equal &&
-                               tarval_cmp(c, upper) != ir_relation_greater_equal) {
-                               avail_expr = NULL;
-                       } else {
+                       /* tarval within range? */
+                       if (tarval_cmp(lower, c) == ir_relation_less_equal &&
+                               tarval_cmp(c, upper) == ir_relation_less_equal) {
                                avail_expr = trans_expr;
+                       } else {
+                               avail_expr = NULL;
                        }
            }
 
@@ -1173,20 +1187,14 @@ static ir_mode *is_partially_redundant(ir_node *block, ir_node *expr, ir_node *v
                        if (first_avail == NULL)
                                first_avail = avail_expr;
                        else if (first_avail != avail_expr)
-                               /* Multiple different expressions are available */
+                               /* Multiple different expressions are available,
+                                  This is why we need no cut over avail_out sets. */
                                fully_redundant = 0;
 
                        DB((dbg, LEVEL_2, "Found %+F from block %+F as %+F in pred %+F\n", expr, block, avail_expr, pred_block));
                }
        }
 
-#if BETTER_GREED
-       /* value is redundant from last iteration,
-          but has not been removed from antic_in (is not optimized) */
-       if (! environ->first_iter && is_redundant(block, expr))
-               return mode;
-#endif
-
        /* If it is not the same value already existing along every predecessor
        and it is defined by some predecessor then it is partially redundant. */
        if (! partially_redundant || fully_redundant)
@@ -1222,21 +1230,6 @@ static void update_new_set(ir_node *block, ir_node *idom)
 #endif
 } /* update_new_set */
 
-#if BETTER_GREED
-/*
- * Returns redundant flag of node irn in block block.
- */
-static unsigned is_redundant(ir_node *block, ir_node *irn)
-{
-       (void) block;
-       (void) irn;
-
-       /* TODO Needs to use a flag, because antic_done should only be used
-          if node is finally processed by insert_nodes. */
-       return 0;
-}
-#endif
-
 /**
  * Checks if hoisting irn is greedy.
  * Greedy hoisting means that there are non partially redundant nodes
@@ -1264,8 +1257,11 @@ static unsigned is_hoisting_greedy(ir_node *irn, ir_node *block)
                        ir_node *trans_val;
                        ir_node *avail;
 
-                       if (get_loop_depth(get_irn_loop(pred_block)) > get_loop_depth(get_irn_loop(get_nodes_block(irn))))
+#if MIN_CUT
+                       /* Very conservative min cut. Phi might only have 1 user. */
+                       if (is_Phi(pred) && get_irn_n_edges(pred) != 1)
                                return 1;
+#endif
 
                        if (is_Phi(pred) && get_nodes_block(pred) == block)
                                continue;
@@ -1281,29 +1277,44 @@ static unsigned is_hoisting_greedy(ir_node *irn, ir_node *block)
                                trans = pred;
                        DB((dbg, LEVEL_3, "trans %+F\n", trans));
 
-                       if (is_Load(trans))
-                               continue;
-
                        trans_val = identify(trans);
                        DB((dbg, LEVEL_3, "value %+F\n", trans_val));
+
+                       if (is_Const(trans_val) || is_SymConst(trans_val)) {
+                               /* existing constant */
+                               if (get_irn_idx(trans_val) < environ->last_idx) {
+                                       continue;
+                               } else {
+                                       /* limit range of new constants */
+                                       ir_mode   *cmode = get_irn_mode(trans);
+                                       ir_tarval *upper = new_tarval_from_long(128, cmode);
+                                       ir_tarval *lower = new_tarval_from_long(-128, cmode);
+                                       ir_tarval *c     = get_Const_tarval(trans);
+
+                                       /* tarval within range? */
+                                       if (tarval_cmp(lower, c) == ir_relation_less &&
+                                               tarval_cmp(c, upper) == ir_relation_less) {
+                                               continue;
+                                       } else {
+                                               return 1;
+                                       }
+                               }
+                       }
+
+                       /* */
                        if (is_irn_constlike(trans_val))
                                continue;
+
                        avail = ir_valueset_lookup(pred_info->avail_out, trans_val);
 
+                       DB((dbg, LEVEL_3, "avail %+F\n", avail));
                        if (! avail)
                                return 1;
-#if 0
+#if MIN_CUT
                        /* only optimize if predecessors have been optimized */
                        if (ir_valueset_lookup(info->antic_done, value) == NULL)
                                return 1;
 #endif
-
-#if 0
-                       /* Try to reduce cut. Turned out to be not very effective. */
-                       /* TODO Use out edges. */
-                       if (get_irn_outs_computed(irn) && get_irn_n_outs(irn) > 1)
-                               return 1;
-#endif
                }
        }
        return 0;
@@ -1335,9 +1346,6 @@ static void insert_nodes_walker(ir_node *block, void *ctx)
        ir_node                *idom;
        int                     pos;
        ir_valueset_iterator_t  iter;
-#if BETTER_GREED
-       plist_t *stack;
-#endif
 
        /* only blocks */
        if (! is_Block(block))
@@ -1362,14 +1370,6 @@ static void insert_nodes_walker(ir_node *block, void *ctx)
                return;
        }
 
-#if BETTER_GREED
-       stack = plist_new();
-       foreach_valueset(info->antic_in, value, expr, iter) {
-               /* inverse topologic */
-               plist_insert_front(stack, expr);
-       }
-#endif
-
        /* This is the main reason antic_in is preverred over antic_out;
           we may iterate over every anticipated value first and not
           over the predecessor blocks. */
@@ -1394,30 +1394,19 @@ static void insert_nodes_walker(ir_node *block, void *ctx)
                        DB((dbg, LEVEL_2, "Fully redundant expr %+F value %+F\n", expr, value));
                        DEBUG_ONLY(inc_stats(gvnpre_stats->fully);)
 
-#if ! PARTLY_ONLY
                        ir_valueset_insert(info->antic_done, value, expr);
-#endif
                        continue;
                }
 
-#if !BETTER_GREED
                if (is_hoisting_greedy(expr, block)) {
                        DB((dbg, LEVEL_2, "greedy\n"));
                        continue;
                }
-#endif
 
                mode = is_partially_redundant(block, expr, value);
                if (mode == NULL)
                        continue;
 
-#if BETTER_GREED
-               if (is_hoisting_greedy(expr, block)) {
-                       DB((dbg, LEVEL_2, "Better greed: greedy\n"));
-                       continue;
-               }
-#endif
-
 #if LOADS || DIVMODS
                /* save old mode_M phis to remove keepalive edges later */
                if (is_memop(expr)) {
@@ -1434,7 +1423,7 @@ static void insert_nodes_walker(ir_node *block, void *ctx)
                                inc_stats(gvnpre_stats->first_iter_found);
                        inc_stats(gvnpre_stats->partially);
                }
-               if (is_Load(expr))
+               if (is_Load(expr) || is_Store(expr))
                        inc_stats(gvnpre_stats->loads);
                else if (is_Div(expr) || is_Mod(expr))
                        inc_stats(gvnpre_stats->divmods);
@@ -1454,7 +1443,7 @@ static void insert_nodes_walker(ir_node *block, void *ctx)
                                int node_arity = get_irn_arity(expr);
                                ir_node **in = XMALLOCNZ(ir_node *, node_arity);
                                ir_node *trans;
-                               ir_node *new_value;
+                               ir_node *new_value, *new_value2;
                                ir_node *target_block = pred_block;
 
                                for (i = 0; i < node_arity; ++i) {
@@ -1465,9 +1454,12 @@ static void insert_nodes_walker(ir_node *block, void *ctx)
                                        ir_node *trans_val;
                                        ir_node *avail;
 
+                                       /* transform knowledge over the predecessor from
+                                          anti-leader world into leader world. */
+
                                        DB((dbg, LEVEL_3, "pred %+F\n", pred));
                                        value = identify(pred);
-                                       /* transform knowledge over anti leader into knowledge over leader */
+
                                        /* get leader for pred to lookup its translated value */
                                        leader = ir_valueset_lookup(info->antic_in, value);
                                        if (! leader)
@@ -1485,17 +1477,12 @@ static void insert_nodes_walker(ir_node *block, void *ctx)
                                                continue;
                                        }
 
-                                       /* handle load predecessor */
-                                       if (is_Load(trans)) {
-                                               in[i] = trans;
-                                               continue;
-                                       }
-
                                        trans_val = identify(trans);
                                        DB((dbg, LEVEL_3, "value %+F\n", trans_val));
+
                                        /* constants are always available but not in avail set */
                                        if (is_irn_constlike(trans_val)) {
-                                               in[i] = pred;
+                                               in[i] = trans;
                                                continue;
                                        }
 
@@ -1512,11 +1499,10 @@ static void insert_nodes_walker(ir_node *block, void *ctx)
                                if (is_Proj(expr))
                                        target_block = get_nodes_block(in[0]);
 
-                               /* copy node to represent the new value.
-                                  We do not translate nodes that do not need translation,
-                                  so we use the newly created nodes as value representatives only.
-                                  Their block is not important, because we create new ones during
-                                  the insert node phase. */
+                               /* Copy node to represent the new value.
+                                  We use translated nodes as value representatives only.
+                                  They have anti leaders as predecessors, not leaders!
+                                  So we have to create a new node using leaders. */
                                trans = new_ir_node(
                                        get_irn_dbg_info(expr),
                                        environ->graph,
@@ -1529,17 +1515,18 @@ static void insert_nodes_walker(ir_node *block, void *ctx)
                                /* We need the attribute copy here, because the Hash value of a
                                   node might depend on it. */
                                copy_node_attr(environ->graph, expr, trans);
-                               new_value = identify_or_remember(trans);
-
-                               DB((dbg, LEVEL_3, "Use new %+F in %+F because expr %+F(%+F) not available\n", trans, pred_block, expr, value));
 
                                /* value is now available in target block through trans
                                   insert (not replace) because it has not been available */
-                               ir_valueset_insert(pred_info->avail_out, value, trans);
+                               new_value = identify_or_remember(trans);
                                ir_valueset_insert(pred_info->avail_out, new_value, trans);
+                               DB((dbg, LEVEL_4, "avail%+F+= trans %+F(%+F)\n", pred_block, trans, new_value));
+
+                               new_value2 = identify(get_translated(pred_block, expr));
+                               ir_valueset_insert(pred_info->avail_out, new_value2, trans);
+                               DB((dbg, LEVEL_4, "avail%+F+= trans %+F(%+F)\n", pred_block, trans, new_value2));
 
-                               DB((dbg, LEVEL_4, "SET AVAIL value %+F trans %+F  in %+F\n", value, trans, pred_block));
-                               DB((dbg, LEVEL_4, "SET AVAIL newvalue %+F trans %+F  in %+F\n", new_value, trans, pred_block));
+                               DB((dbg, LEVEL_3, "Use new %+F in %+F because %+F(%+F) not available\n", trans, pred_block, expr, value));
 
                                phi_in[pos] = trans;
                        } else {
@@ -1556,7 +1543,8 @@ static void insert_nodes_walker(ir_node *block, void *ctx)
                        phi = new_r_Phi(block, arity, phi_in, mode);
                        DB((dbg, LEVEL_3, "New %+F for redundant %+F created\n", phi, expr));
 
-                       /* this value is now available through the new phi */
+                       /* This value is now available through the new phi.
+                          insert || replace in avail_out */
                        ir_valueset_replace(info->avail_out, value, phi);
                        ir_valueset_insert(info->new_set, value, phi);
                }
@@ -1566,53 +1554,6 @@ static void insert_nodes_walker(ir_node *block, void *ctx)
                ir_valueset_insert(info->antic_done, value, expr);
                env->changes |= 1;
        }
-
-#if BETTER_GREED
-       /* TODO Unfinished
-          Better greed first determines which values are redundant
-          and decides then which to take.
-          insert_nodes needs to be split up for that. The cycle could be
-          for each block: flag redundant nodes,
-          use heuristic to adjust these flags (also consider antic_done),
-          do insert nodes.
-          This way we could decide if we hoist a non redundant node,
-          if all its successors are redundant.
-          Or we might try to minimize the cut along hoisted nodes and their
-          non redundant successors.
-        */
-       if (env->changes) {
-               plist_element_t *it;
-               /* iterate in inverse topological order */
-               foreach_plist(stack, it) {
-                       ir_node *irn   = (ir_node *)plist_element_get_value(it);
-                       ir_node *block = get_nodes_block(irn);
-                       int      j;
-                       char     redundant = 1;
-
-                       /* does irn only have redundant successors? */
-
-                       /* TODO we need to use edges */
-                       assert(get_irn_outs_computed(irn));
-
-                       for (j = get_irn_n_outs(irn) - 1; j >= 0; --j) {
-                               ir_node *succ = get_irn_out(irn, j);
-
-                               /* if succ and irn are in the same block */
-                               if (get_nodes_block(succ) == block && is_redundant(block, succ)) {
-                                       continue;
-                               } else {
-                                       redundant = 0;
-                                       break;
-                               }
-                       }
-
-                       if (redundant)
-                               flag_redundant(irn, 1);
-               }
-       }
-       plist_free(stack);
-#endif
-
 }
 
 #if HOIST_HIGH
@@ -1629,9 +1570,8 @@ static void update_new_set_walker(ir_node *block, void *ctx)
 }
 
 /**
- * Domtree block walker to insert nodes into the highest
- * possible block. .
- *
+ * Domtree block walker to insert nodes with dying operands
+ * into the highest possible block whilst still being anticipated.
  */
 static void hoist_high(ir_node *block, void *ctx)
 {
@@ -1663,6 +1603,7 @@ static void hoist_high(ir_node *block, void *ctx)
        foreach_valueset(curr_info->antic_done, value, expr, iter) {
                int pos;
 
+               /* TODO currently we cannot handle load and their projections */
                if (is_memop(expr) || is_Proj(expr))
                        continue;
 
@@ -1700,19 +1641,28 @@ static void hoist_high(ir_node *block, void *ctx)
                        /* anticipation border */
                        new_target  = NULL;
                        nest_depth  = get_loop_depth(get_irn_loop(target));
+
+                       /* Either push the hoisted nodes up their path,
+                          or try to put them directly into their common dominator. */
+#if COMMON_DOM
                        /* By using block (instead of target) as initial block,
                           we only allow hoisting into a common block of
                           both predecessor blocks. */
                        dom         = block;
+#else
+                       dom         = target;
+#endif
 
-                       while (dom && dom != environ->start_block) {
+                       while (dom && dom != get_Block_idom(block)) {
 
                                dom = get_Block_idom(dom);
                                dom_info = get_block_info(dom);
                                DB((dbg, LEVEL_4, "testing dom %+F\n", dom));
 
-                               /* TODO antic_in means hoistable above block,
-                                  but we need 'hoistable into block'. */
+                               /* TODO Being in antic_in means hoistable above block,
+                                  but we need 'hoistable into block'.
+                                  This could be achieved by a flag for each valueset pair,
+                                  being set during antic computation. */
 
                                /* check if available node ist still anticipated and clean */
                                if (! ir_valueset_lookup(dom_info->antic_in, value)) {
@@ -1731,7 +1681,7 @@ static void hoist_high(ir_node *block, void *ctx)
 
                                /* check if operands die */
 
-                               /* check for uses on current path*/
+                               /* check for uses on current path */
                                for (i = 0; i < avail_arity; i++) {
                                        ir_node   *pred       = get_irn_n(avail, i);
                                        ir_node   *pred_value = identify(pred);
@@ -1759,7 +1709,7 @@ static void hoist_high(ir_node *block, void *ctx)
                                                        /* Do we have another user than avail?
                                                           Then predecessor is not dead after removal of avail. */
                                                        if (succ_value != value) {
-                                                               DB((dbg, LEVEL_4, "fail: still used in %+F\n", succ));
+                                                               DB((dbg, LEVEL_4, "still used in %+F\n", succ));
                                                                dom = NULL;
                                                                break;
                                                        }
@@ -1769,10 +1719,13 @@ static void hoist_high(ir_node *block, void *ctx)
                                if (dom)
                                        new_target = dom;
 
-                               /* only try direct dominator */
+#if COMMON_DOM
+                               /* only try common dominator */
                                break;
+#endif
                        }
 
+                       /* put node into new target block */
                        if (new_target) {
                                block_info *target_info = get_block_info(new_target);
                                int         nn_arity    = get_irn_arity(avail);
@@ -1800,10 +1753,11 @@ static void hoist_high(ir_node *block, void *ctx)
                                free(in);
 
                                identify_or_remember(nn);
-                               /* NOTE: Nodes are inserted into a dominating block and should
+                               /* TODO Nodes are inserted into a dominating block and should
                                   be available from this point on. Currently we do not push
                                   the availability information through during the walk. */
                                ir_valueset_insert(target_info->new_set, value, nn);
+                               ir_valueset_insert(target_info->avail_out, value, nn);
                        }
                }
        }
@@ -1835,11 +1789,8 @@ static void eliminate(ir_node *irn, void *ctx)
 
                if (value != NULL) {
                        ir_node *expr = (ir_node*)ir_valueset_lookup(info->avail_out, value);
+                       DB((dbg, LEVEL_3, "Elim %+F(%+F) avail %+F\n", irn, value, expr));
 
-#if ! PARTLY_ONLY
-                       if (expr && get_irn_idx(expr) <= env->last_idx)
-                               return;
-#endif
                        if (expr != NULL && expr != irn) {
                                elim_pair *p = OALLOC(env->obst, elim_pair);
 
@@ -1866,19 +1817,12 @@ static void eliminate(ir_node *irn, void *ctx)
 static void eliminate_nodes(elim_pair *pairs, ir_nodeset_t *keeps)
 {
        elim_pair             *p;
-       ir_nodeset_iterator_t  iter;
-       ir_node               *m_phi;
        ir_node               *end    = environ->end_node;
 
        for (p = pairs; p != NULL; p = p->next) {
                /* might be already changed */
                p->new_node = skip_Id(p->new_node);
 
-               // TEST
-               /* happens with load nodes */
-               if (p->new_node == p->old_node)
-                       continue;
-
                DB((dbg, LEVEL_2, "Replacing %+F by %+F\n", p->old_node, p->new_node));
 
                /* PRE tends to create Phi(self, self, ... , x, self, self, ...)
@@ -1914,6 +1858,7 @@ static void eliminate_nodes(elim_pair *pairs, ir_nodeset_t *keeps)
        }
 }  /* eliminate_nodes */
 
+
 /* --------------------------------------------------------
  * GVN PRE pass
  * --------------------------------------------------------
@@ -1923,6 +1868,7 @@ static void eliminate_nodes(elim_pair *pairs, ir_nodeset_t *keeps)
  * Gvn_Pre algorithm.
  *
  * @param irg   the graph
+ * @param env   the environment
  */
 static void gvn_pre(ir_graph *irg, pre_env *env)
 {
@@ -1947,6 +1893,7 @@ static void gvn_pre(ir_graph *irg, pre_env *env)
        antic_iter      = 0;
        env->first_iter = 1;
 
+       env->iteration = 1;
        /* antic_in passes */
        do {
                ++antic_iter;
@@ -1955,6 +1902,7 @@ static void gvn_pre(ir_graph *irg, pre_env *env)
                irg_walk_blkwise_graph(irg, compute_antic, NULL, env);
                env->first_iter = 0;
                DB((dbg, LEVEL_2, "----------------------------------------------\n"));
+               env->iteration ++;
        } while (env->changes != 0 && antic_iter < MAX_ANTIC_ITER);
 
        DEBUG_ONLY(set_stats(gvnpre_stats->antic_iterations, antic_iter);)
@@ -1975,17 +1923,18 @@ static void gvn_pre(ir_graph *irg, pre_env *env)
        DEBUG_ONLY(set_stats(gvnpre_stats->insert_iterations, insert_iter);)
 
 #if HOIST_HIGH
-       /* An attempt to reduce lifetime by hoisting already hoisted values
-          even higher, if their operands die. */
+       /* An attempt to reduce lifetimes by hoisting already hoisted values
+          even higher if their operands die. */
        dom_tree_walk_irg(irg, hoist_high, NULL, env);
        /* update avail_out for elimination */
        dom_tree_walk_irg(irg, update_new_set_walker, NULL, env);
 #endif
 
-       /* TODO deactivate edges to prevent intelligent removal of nodes */
+       /* Deactivate edges to prevent intelligent removal of nodes,
+          or else we will get deleted nodes which we try to exchange. */
        edges_deactivate(environ->graph);
 
-       /* last step: eliminate nodes */
+       /* eliminate nodes */
        irg_walk_graph(irg, NULL, eliminate, env);
        eliminate_nodes(env->pairs, env->keeps);
 
@@ -2005,9 +1954,9 @@ void do_gvn_pre(ir_graph *irg)
        optimization_state_t  state;
        block_info           *block_info;
 
-       /* bads and unreachables cause too much trouble with dominance
-          loop info for endless loop detection
-          no critical edges is GVN-PRE precondition
+       /* bads and unreachables cause too much trouble with dominance,
+          loop info for endless loop detection,
+          no critical edges is PRE precondition
         */
        assure_irg_properties(irg,
                IR_GRAPH_PROPERTY_NO_BADS
@@ -2038,21 +1987,29 @@ void do_gvn_pre(ir_graph *irg)
        env.end_node     = get_irg_end(irg);
        env.pairs        = NULL;
        env.keeps        = &keeps;
-       /* last index is no node */
-       env.last_idx     = get_irg_last_idx(irg) - 1;
+       env.last_idx     = get_irg_last_idx(irg);
 
        /* Detect and set links of infinite loops to non-zero. */
        analyse_loops(irg);
 
+#if OPTIMIZE_NODES
+       new_identities(irg);
+       env.value_table = irg->value_table;
+       irg->value_table = NULL;
+#endif
+
        /* Switch on GCSE. We need it to correctly compute
           the value of a node, which is independent from
           its block. */
        set_opt_global_cse(1);
-       /* new_identities */
+       /* new_identities() */
        if (irg->value_table != NULL)
                del_pset(irg->value_table);
        /* initially assumed nodes in pset are 512 */
        irg->value_table = new_pset(compare_gvn_identities, 512);
+#if OPTIMIZE_NODES
+       env.gvnpre_values = irg->value_table;
+#endif
 
        /* do GVN-PRE pass */
        gvn_pre(irg, &env);
@@ -2074,9 +2031,19 @@ void do_gvn_pre(ir_graph *irg)
        restore_optimization_state(&state);
        confirm_irg_properties(irg, IR_GRAPH_PROPERTIES_NONE);
 
-       /* TODO there seem to be optimizations that try to use the existing
+#if OPTIMIZE_NODES
+       irg->value_table = env.value_table;
+       del_pset(irg->value_table);
+       irg->value_table = env.gvnpre_values;
+#endif
+
+       /* TODO There seem to be optimizations that try to use the existing
           value_table. */
        new_identities(irg);
+
+       /* TODO assure nothing else breaks. */
+       set_opt_global_cse(0);
+       edges_activate(irg);
 }
 
 /* Creates an ir_graph pass for do_gvn_pre. */