beloopana: Remove duplicate comments.
[libfirm] / ir / opt / gvn_pre.c
index 79d737e..0bdd8fd 100644 (file)
@@ -1,20 +1,6 @@
 /*
- * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
- *
  * This file is part of libFirm.
- *
- * This file may be distributed and/or modified under the terms of the
- * GNU General Public License version 2 as published by the Free Software
- * Foundation and appearing in the file LICENSE.GPL included in the
- * packaging of this file.
- *
- * Licensees holding valid libFirm Professional Edition licenses may use
- * this file in accordance with the libFirm Commercial License.
- * Agreement provided with the Software.
- *
- * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
- * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE.
+ * Copyright (C) 2012 University of Karlsruhe.
  */
 
 /**
 #include "irouts.h"
 #include "irpass.h"
 #include "valueset.h"
+#include "irloop.h"
+#include "firmstat_t.h"
 
 #include "irgraph_t.h"
 #include "irnode_t.h"
 #include "iropt_t.h"
+#include "plist.h"
+
+/* suggested by GVN-PRE authors */
+#define MAX_ANTIC_ITER 10
+#define MAX_INSERT_ITER 3
+
+/* Stops antic iteration from processing endless loops. */
+#define IGNORE_INF_LOOPS 1
+/* Stops antic information to flow over infinite loop backedge */
+#define NO_INF_LOOPS 0
+
+/* Attempt to reduce register pressure and reduce code size
+   for hoisted nodes. */
+#define HOIST_HIGH 1
+#define COMMON_DOM 1
+
+/* Seamless implementation of handling loads and generally memory
+   dependent nodes with GVN-PRE. */
+#define LOADS 1
+#define DIVMODS 0
+
+/* Experimental */
+
+/* Only optimize node directly after phi node if node is only successor. */
+#define MIN_CUT 0
+
+/* GVN is very limited. This enables optimize_node during value recognition.
+   GVN ist still very limited, since a+1+b+1 doesn't equal a+b+2.
+   TODO Broken for yet unknown reasons. */
+#define OPTIMIZE_NODES 0
+
 
 /** Additional info we need for every block. */
 typedef struct block_info {
-       ir_valueset_t     *exp_gen;   /**< contains this blocks clean expressions */
-       ir_valueset_t     *avail_out; /**< The Avail_out set for a block. */
-       ir_valueset_t     *antic_in;  /**< The Antic_in set for a block. */
-       ir_valueset_t     *new_set;   /**< The set of all new values for a block. */
-       ir_nodehashmap_t  *trans;     /**< contains translated nodes in block */
-       ir_node           *avail;     /**< The get_map(avail, block) result. */
-       ir_node           *block;     /**< The Block of the block info. */
-       struct block_info *next;      /**< Links all entries, so we can recover the sets easily. */
-       int                found;     /**< Non-zero, if avail was found in this block. */
+       ir_valueset_t     *exp_gen;    /* contains this blocks clean expressions */
+       ir_valueset_t     *avail_out;  /* available values at block end */
+       ir_valueset_t     *antic_in;   /* clean anticipated values at block entry */
+       ir_valueset_t     *antic_done; /* keeps elements of antic_in after insert nodes phase */
+       ir_valueset_t     *new_set;    /* new by hoisting made available values */
+       ir_nodehashmap_t  *trans;      /* contains translated nodes translated into block */
+       ir_node           *avail;      /* saves available node for insert node phase */
+       int                found;      /* saves kind of availability for insert_node phase */
+       ir_node           *block;      /* block of the block_info */
+       struct block_info *next;       /* links all instances for easy access */
 } block_info;
 
 /**
- * A pair of nodes that must be exchanged.
- * We must defer the exchange because our hash-sets cannot
- * find an already replace node else.
+ * A pair of nodes to be exchanged.
+ * We have to defer the exchange because there are still needed references
+ * to certain nodes.
  */
 typedef struct elim_pair {
-       ir_node *old_node;      /**< The old node that will be replaced. */
-       ir_node *new_node;      /**< The new node. */
-       struct elim_pair *next; /**< Links all entries in a list. */
-       int     reason;         /**< The reason for the replacement. */
+       ir_node *old_node;      /* node that will be replaced */
+       ir_node *new_node;      /* replacement for old_node */
+       struct elim_pair *next; /* links all instances for easy access */
+       int     reason;         /* reason for the replacement */
 } elim_pair;
 
-/** The environment for the GVN-PRE algorithm */
+/** environment for the GVN-PRE algorithm */
 typedef struct pre_env {
-       struct obstack *obst;        /**< The obstack to allocate on. */
-       ir_node        *start_block; /**< The start block of the current graph. */
-       ir_node        *end_block;   /**< The end block of the current graph */
-       block_info     *list;        /**< Links all block info entries for easier recovery. */
-       elim_pair      *pairs;       /**< A list of node pairs that must be eliminated. */
-       unsigned        last_idx;    /**< last node index of "old" nodes, all higher indexes are newly created once. */
-       char            changes;     /**< Non-zero, if calculation of Antic_in has changed. */
-       char            first_iter;  /**< non-zero for first iteration */
+       ir_graph       *graph;        /* current graph */
+       struct obstack  obst;         /* obstack to allocate on */
+       ir_node        *start_block;  /* start block of the current graph */
+       ir_node        *end_block;    /* end block of the current graph */
+       ir_node        *end_node;     /* end node of the current graph */
+       block_info     *list;         /* block_info list head */
+       elim_pair      *pairs;        /* elim_pair list head */
+       ir_nodeset_t   *keeps;        /* a list of to be removed phis to kill their keep alive edges */
+       unsigned        last_idx;     /* last node index of input graph */
+       char            changes;      /* flag for fixed point iterations - non-zero if changes occurred */
+       char            first_iter;   /* non-zero for first fixed point iteration */
+       int             iteration;    /* iteration counter */
+#if OPTIMIZE_NODES
+       pset           *value_table;   /* standard value table*/
+       pset           *gvnpre_values; /* gvnpre value table */
+#endif
 } pre_env;
 
-/** The debug module handle. */
+static pre_env *environ;
+
+/* custom GVN value map */
+static ir_nodehashmap_t value_map;
+
+/* debug module handle */
 DEBUG_ONLY(static firm_dbg_module_t *dbg;)
 
-/* ----------  Functions for Value sets ---------- */
+#ifdef DEBUG_libfirm
+
+/* --------------------------------------------------------
+ * Statistics
+ * --------------------------------------------------------
+ */
+
+typedef struct gvnpre_statistics {
+       int replaced;
+       int partially;
+       int fully;
+       int loads;
+       int divmods;
+       int hoist_high;
+       int first_iter_found;
+       int antic_iterations;
+       int insert_iterations;
+       int infinite_loops;
+} gvnpre_statistics;
+
+static gvnpre_statistics *gvnpre_stats = NULL;
+
+static void init_stats(void)
+{
+       gvnpre_stats = XMALLOCZ(gvnpre_statistics);
+}
+
+static void free_stats(void)
+{
+       free(gvnpre_stats);
+       gvnpre_stats = NULL;
+}
+
+static void print_stats(void)
+{
+       gvnpre_statistics *stats = gvnpre_stats;
+       DB((dbg, LEVEL_1, "replaced             : %d\n", stats->replaced));
+       DB((dbg, LEVEL_1, "antic_in iterations  : %d\n", stats->antic_iterations));
+       DB((dbg, LEVEL_1, "insert iterations    : %d\n", stats->insert_iterations));
+       DB((dbg, LEVEL_1, "infinite loops       : %d\n", stats->infinite_loops));
+       DB((dbg, LEVEL_1, "fully redundant      : %d\n", stats->fully));
+       DB((dbg, LEVEL_1, "partially redundant  : %d\n", stats->partially));
+       DB((dbg, LEVEL_1, "  loads                : %d\n", stats->loads));
+       DB((dbg, LEVEL_1, "  Divs/Mods            : %d\n", stats->divmods));
+       DB((dbg, LEVEL_1, "  hoist high           : %d\n", stats->hoist_high));
+       DB((dbg, LEVEL_1, "  first iteration      : %d\n", stats->first_iter_found));
+}
+
+#define set_stats(var, value) (var)=(value)
+#define inc_stats(var)        ((var)+=1)
 
+/* --------------------------------------------------------
+ * Dump valuesets
+ * --------------------------------------------------------
+ */
 
 /**
- * computes dst = dst \/ src for value sets
+ * Dump a value set.
  *
- * @param dst    the union result set
- * @param src    the source set
+ * @param set    the set to dump
+ * @param txt    a text to describe the set
+ * @param block  the owner block of the set
  */
-static void value_union(ir_valueset_t *dst, ir_valueset_t *src)
+static void dump_value_set(ir_valueset_t *set, const char *txt, ir_node *block)
 {
-       ir_valueset_iterator_t  iter;
-       ir_node                *value;
-       ir_node                *expr;
+       ir_valueset_iterator_t iter;
+       ir_node *value, *expr;
+       int i;
+
+       DB((dbg, LEVEL_2, "%s(%+F) = {\n", txt, block));
+       i = 0;
+       foreach_valueset(set, value, expr, iter) {
+               if ((i & 3) == 3)
+                       DB((dbg, LEVEL_2, "\n"));
+               if (value != expr)
+                       DB((dbg, LEVEL_2, " %+F(%+F),", expr, value));
+               else
+                       DB((dbg, LEVEL_2, " %+F,", expr));
+               ++i;
+       }
+       DB((dbg, LEVEL_2, "\n}\n"));
+}
+
+/**
+ * Dump all exp_gen value sets.
+ *
+ * @param list  the list of block infos to retrieve the sets from
+ */
+static void dump_all_expgen_sets(block_info *list)
+{
+       block_info *block_info;
 
-       foreach_valueset(src, value, expr, iter) {
-               /* dominator tree walk; use first available expr as leader */
-               ir_valueset_insert(dst, value, expr);
+       for (block_info = list; block_info != NULL; block_info = block_info->next) {
+               dump_value_set(block_info->exp_gen, "[Exp_gen]", block_info->block);
        }
 }
 
+#else
+
+#define dump_all_expgen_sets(list)
+#define dump_value_set(set, txt, block)
+
+#endif /* DEBUG_libfirm */
 
-/* ----------  Functions for Values ---------- */
+/* --------------------------------------------------------
+ * GVN Functions
+ * --------------------------------------------------------
+ */
 
 /**
- * Remember adds a node e to the GCSE valuetable.
- *
- * @param e  a node representing an expression
- * @return the final value for the expression e
+ * Compares node collisions in valuetable.
+ * Modified identities_cmp().
  */
-static ir_node *remember(ir_node *e)
+static int compare_gvn_identities(const void *elt, const void *key)
 {
-       ir_node *value;
+       ir_node *a = (ir_node *)elt;
+       ir_node *b = (ir_node *)key;
+       int i, irn_arity_a;
 
-       if (is_Proj(e)) {
-               ir_node *pred   = get_Proj_pred(e);
-               ir_node *v_pred = identify_remember(pred);
+       if (a == b) return 0;
 
-               if (v_pred != pred) {
-                       ir_node *proj = new_r_Proj(v_pred, get_irn_mode(e), get_Proj_proj(e));
-                       value = identify_remember(proj);
-                       return value;
+       /* phi nodes kill predecessor values and are always different */
+       if (is_Phi(a) || is_Phi(b))
+               return 1;
+
+       /* memops are not the same, even if we want to optimize them
+          we have to take the order in account */
+       if (is_memop(a) || is_memop(b) ||
+           get_irn_mode(a) == mode_T ||
+           get_irn_mode(b) == mode_T) {
+               /* Loads with the same predecessors are the same value;
+                  this should only happen after phi translation. */
+               if ((! is_Load(a) || ! is_Load(b)) && (! is_Store(a) || ! is_Store(b)))
+                       return 1;
+       }
+
+       if ((get_irn_op(a) != get_irn_op(b)) ||
+           (get_irn_mode(a) != get_irn_mode(b))) return 1;
+
+       /* compare if a's in and b's in are of equal length */
+       irn_arity_a = get_irn_arity(a);
+       if (irn_arity_a != get_irn_arity(b))
+               return 1;
+
+       /* blocks are never the same */
+       if (is_Block(a) || is_Block(b))
+               return 1;
+
+       /* should only be used with gcse enabled */
+       assert(get_opt_global_cse());
+
+       /* compare a->in[0..ins] with b->in[0..ins] */
+       for (i = 0; i < irn_arity_a; ++i) {
+               ir_node *pred_a = get_irn_n(a, i);
+               ir_node *pred_b = get_irn_n(b, i);
+               if (pred_a != pred_b) {
+                       if (!is_irn_cse_neutral(pred_a) || !is_irn_cse_neutral(pred_b))
+                               return 1;
                }
        }
 
-       value = identify_remember(e);
-       return value;
-}  /* identify */
+       /*
+        * here, we already now that the nodes are identical except their
+        * attributes
+        */
+       if (a->op->ops.node_cmp_attr)
+               return a->op->ops.node_cmp_attr(a, b);
+
+       return 0;
+}
 
 /**
- * Identify does a lookup in the GCSE valuetable.
+ * Identify does a lookup in the GVN valuetable.
+ * To be used when no new GVN values are to be created.
  *
  * @param e  a node representing an expression
- * @return a node representing the value or NULL if no identified
+ * @return a node representing the value
  */
-static ir_node *identify(ir_node *e)
+static ir_node *identify(ir_node *irn)
 {
-       return identify_remember(e);
+       ir_node *value = ir_nodehashmap_get(ir_node, &value_map, irn);
+       if (value)
+               return value;
+       /* irn represents a new value, so return the leader */
+       return identify_remember(irn);
 }
 
 /**
- * Returns the block info of a block.
+ * remember() adds node irn to the GVN valuetable.
+ * Identify_remember only identifies values of nodes with the
+ * same predecessor nodes (not values). By creating a node from the predecessor
+ * values/leaders, a true valuetree is built. Phis kill their predecessor value,
+ * so no circular dependencies need to be resolved.
  *
- * @param block  the block
- * @return block info of block
+ * TODO Improvement:
+ *      Maybe this could be implemented with a custom node hash that takes
+ *      phi nodes and true values (instead of predecessors) into account,
+ *      resulting in value numbers.
+ * TODO This unnecessarily also handles nodes like calls, which are never equal.
+ *
+ * @param irn  a node representing an expression
+ * @return     the value of the expression
  */
-static block_info *get_block_info(ir_node *block)
+static ir_node *remember(ir_node *irn)
 {
-       return (block_info*)get_irn_link(block);
+       int       arity   = get_irn_arity(irn);
+       int       i;
+       int       changed = 0;
+       ir_node **in      = XMALLOCN(ir_node *, arity);
+       ir_node  *value;
+
+       for (i = 0; i < arity; ++i) {
+               ir_node *pred       = get_irn_n(irn, i);
+               /* value and leader at the same time */
+               ir_node *pred_value = identify(pred);
+
+               /* phi will be translated anyway, so kill the predecessor values
+                  this also prevents circular dependencies */
+               if (is_Phi(pred)) {
+                       /* every phi represents its own value */
+                       in[i] = pred;
+                       continue;
+               }
+
+               /* predecessor is not its value representation/the leader */
+               if (pred != pred_value)
+                       changed = 1;
+               in[i] = pred_value;
+       }
+
+       if (changed && !is_memop(irn) && get_irn_mode(irn) != mode_X) {
+               /* create representative for */
+               ir_node *nn = new_ir_node(
+                       get_irn_dbg_info(irn),
+                       get_irn_irg(irn),
+                       get_nodes_block(irn),
+                       get_irn_op(irn),
+                       get_irn_mode(irn),
+                       get_irn_arity(irn),
+                       in);
+               copy_node_attr(environ->graph, irn, nn);
+
+#if OPTIMIZE_NODES
+               /* TODO optimize_node() uses the valuetable and thus the custom
+                  identify_cmp() and will fail trying. */
+               environ->graph->value_table = environ->value_table;
+               nn = optimize_node(nn);
+               environ->graph->value_table = environ->gvnpre_values;
+#endif
+
+               /* now the value can be determined because the
+                  predecessors are the leaders */
+               value = identify_remember(nn);
+       } else {
+               value = identify_remember(irn);
+       }
+       free(in);
+
+       DB((dbg, LEVEL_4, "Remember %+F as value %+F\n", irn, value));
+       ir_nodehashmap_insert(&value_map, irn, value);
+
+       return value;
 }
 
+/**
+ * When the value map has been built we may lookup expressions
+ * and remember them if new.
+ */
+static ir_node *identify_or_remember(ir_node *irn)
+{
+       ir_node *value = ir_nodehashmap_get(ir_node, &value_map, irn);
+       if (value)
+               return value;
+       else
+               return remember(irn);
+}
+
+/* --------------------------------------------------------
+ * Block info
+ * --------------------------------------------------------
+ */
+
 /**
  * Allocate block info for block block.
  *
@@ -165,13 +409,13 @@ static block_info *get_block_info(ir_node *block)
  */
 static void alloc_block_info(ir_node *block, pre_env *env)
 {
-       block_info *info = OALLOC(env->obst, block_info);
+       block_info *info = OALLOC(&env->obst, block_info);
 
        set_irn_link(block, info);
-       info->exp_gen   = ir_valueset_new(16);
-       info->avail_out = ir_valueset_new(16);
-       info->antic_in  = ir_valueset_new(16);
-       /* valueset has much nicer interface */
+       info->exp_gen    = ir_valueset_new(16);
+       info->avail_out  = ir_valueset_new(16);
+       info->antic_in   = ir_valueset_new(16);
+       info->antic_done = ir_valueset_new(16);
        info->trans = XMALLOC(ir_nodehashmap_t);
        ir_nodehashmap_init(info->trans);
 
@@ -182,217 +426,235 @@ static void alloc_block_info(ir_node *block, pre_env *env)
 
        info->next = env->list;
        env->list  = info;
-}  /* alloc_block_info */
+}
+
+static void free_block_info(block_info *block_info)
+{
+       ir_valueset_del(block_info->exp_gen);
+       ir_valueset_del(block_info->avail_out);
+       ir_valueset_del(block_info->antic_in);
+       if (block_info->trans) {
+               ir_nodehashmap_destroy(block_info->trans);
+               free(block_info->trans);
+       }
+       if (block_info->new_set)
+               ir_valueset_del(block_info->new_set);
+}
 
 /**
- * Returns non-zero if a node is movable and a possible candidate for PRE.
+ * Bottom up walker that ensures that every block gets a block info.
  *
- * @param n  the node
- * @return non-zero if value is nice
+ * @param irn   the node
+ * @param ctx   the environment
  */
-static int is_nice_value(ir_node *n)
+static void block_info_walker(ir_node *irn, void *ctx)
 {
-       ir_mode *mode = get_irn_mode(n);
+       if (is_Block(irn)) {
+               pre_env *env = (pre_env*)ctx;
+               alloc_block_info(irn, env);
+       }
+}
 
-       if (mode == mode_M)
-               return 0;
+/**
+ * Returns the block info of a block.
+ */
+static block_info *get_block_info(ir_node *block)
+{
+       return (block_info*)get_irn_link(block);
+}
 
-       if (is_Phi(n))
-               return 1;
+/* --------------------------------------------------------
+ * Infinite loop analysis
+ * --------------------------------------------------------
+ */
+
+/**
+ * Walker to set block marks and loop links to 0.
+ */
+static void clear_block_mark_loop_link(ir_node *block, void *env)
+{
+       (void) env;
 
-       while (is_Proj(n))
-               n = get_Proj_pred(n);
+       if (is_Block(block)) {
+               set_Block_mark(block, 0);
+               set_loop_link(get_irn_loop(block), NULL);
+       }
+}
 
-       /* we may not move pinned nodes */
-       if (get_irn_pinned(n) == op_pin_state_pinned)
-               return 0;
+/**
+ * Returns non-zero if block is part of real loop loop.
+ */
 
-       if (!mode_is_data(mode)) {
-               /* Div and Mod are only nice if they do not use memory. */
-               if (! is_Div(n) && ! is_Mod(n))
-                       return 0;
-               if (! is_NoMem(get_memop_mem(n)))
+static unsigned in_loop(ir_node *block, ir_loop *loop)
+{
+       ir_loop *l     = get_irn_loop(block);
+       ir_loop *outer = get_irg_loop(environ->graph);
+
+       while (l != loop) {
+               /* loop tree root is not a loop */
+               if (l == NULL || l == outer)
                        return 0;
+               l = get_loop_outer_loop(l);
        }
        return 1;
-}  /* is_nice_value */
-
+}
 
-#ifdef DEBUG_libfirm
 /**
- * Dump a value set.
- *
- * @param set    the set to dump
- * @param txt    a text to describe the set
- * @param block  the owner block of the set
+ * Returns the outermost real loop of loop.
  */
-static void dump_value_set(ir_valueset_t *set, const char *txt, ir_node *block)
+static ir_loop *get_loop_outermost(ir_loop *loop)
 {
-       ir_valueset_iterator_t  iter;
-       ir_node                *value;
-       ir_node                *expr;
-       int                     i     = 0;
+       ir_loop *outer = get_irg_loop(environ->graph);
+       ir_loop *l     = loop;
+       ir_loop *last  = NULL;
 
-       DB((dbg, LEVEL_2, "%s(%+F) = {\n", txt, block));
-       foreach_valueset(set, value, expr, iter) {
-               if ((i & 3) == 3)
-                       DB((dbg, LEVEL_2, "\n"));
-               if (value != expr)
-                       DB((dbg, LEVEL_2, " %+F(%+F),", expr, value));
-               else
-                       DB((dbg, LEVEL_2, " %+F,", expr));
-               ++i;
+       while(l != outer) {
+               last = l;
+               l = get_loop_outer_loop(l);
        }
-       DB((dbg, LEVEL_2, "\n}\n"));
-}  /* dump_value_set */
+       return last;
+}
 
 /**
- * Dump all exp_gen value sets.
- *
- * @param list  the list of block infos to retrieve the sets from
+ * Topologic bottom-up walker sets links of infinite loops to non-zero.
+ * Block marks are used to flag blocks reachable (from end) on the one hand,
+ * on the other hand they are set if the block is not part of an infinite loop.
  */
-static void dump_all_expgen_sets(block_info *list)
+static void infinite_loop_walker(ir_node *block, void *env)
 {
-       block_info *bl_info;
+       int arity;
+       int i;
+       (void) env;
+
+       if (! is_Block(block))
+               return;
+
+       /* start block has no predecessors */
+       if (block == environ->start_block)
+               return;
+
+       arity = get_irn_arity(block);
+
+       /* block not part of a real loop: no infinite loop */
+       if (get_irn_loop(block) == get_irg_loop(environ->graph))
+               set_Block_mark(block, 1);
+
+       if (get_Block_mark(block)) {
+               /* reachable block: mark all cf predecessors */
+               for (i = 0; i < arity; ++i) {
+                       ir_node *pred = get_Block_cfgpred_block(block, i);
+                       if (is_Bad(pred))
+                               continue;
+                       set_Block_mark(pred, 1);
+               }
+       } else {
+               /* We are in a real loop and see an unreachable block. */
+               ir_loop *outermost_loop = get_loop_outermost(get_irn_loop(block));
+
+               /* flag loop as infinite */
+               set_loop_link(outermost_loop, outermost_loop);
+               DEBUG_ONLY(inc_stats(gvnpre_stats->infinite_loops);)
+
+               /* The cf predecessors are unreachable, but can never be part of
+                  an infinite loop, because we just reached them. So we set the
+                  blockmark to prevent triggering the infinite loop detection. */
 
-       for (bl_info = list; bl_info != NULL; bl_info = bl_info->next) {
-               dump_value_set(bl_info->exp_gen, "[Exp_gen]", bl_info->block);
+               /* passing information to the cf predecessors */
+               for (i = 0; i < arity; ++i) {
+                       ir_node *pred = get_Block_cfgpred_block(block, i);
+
+                       if (is_Bad(pred))
+                               continue;
+
+                       /* If our cf predecessor is in the same endless loop,
+                          it is also unreachable. */
+                       if (in_loop(pred, outermost_loop)) {
+                               set_Block_mark(pred, 0);
+                       } else {
+                               /* When we leave the unreachable loop, we artificially
+                                  declare the cf predecessor reachable. */
+                               set_Block_mark(pred, 1);
+                       }
+               }
        }
 }
 
-#else
-#define dump_value_set(set, txt, block)
-#define dump_all_expgen_sets(list)
-#endif /* DEBUG_libfirm */
-
 /**
- * Gets result of nodes phi translation into block.
- *
- * @param node   the node
- * @param block  the target block
- *
- * @return a phi translation of node node into block block or NULL
+ * Sets loop links of outermost infinite loops to non-zero.
  */
-static ir_node *get_translated(ir_node *node, ir_node *block)
+static void analyse_loops(ir_graph *irg)
 {
-       block_info *bi;
-       ir_node    *trans;
+       ir_reserve_resources(irg, IR_RESOURCE_BLOCK_MARK);
 
-       if (is_irn_constlike(node))
-               return node;
+       /* reset block mark and loop links */
+       irg_walk_blkwise_graph(irg, clear_block_mark_loop_link, NULL, NULL);
+
+       /* mark end block reachable */
+       set_Block_mark(get_irg_end_block(irg), 1);
+       irg_walk_blkwise_graph(irg, infinite_loop_walker, NULL, NULL);
 
-       bi    = get_block_info(block);
-       trans = ir_nodehashmap_get(ir_node, bi->trans, node);
-       return trans;
+       ir_free_resources(irg, IR_RESOURCE_BLOCK_MARK);
 }
 
+#if IGNORE_INF_LOOPS || NO_INF_LOOPS
 /**
- * Saves result of phi translation of node into predecessor
- * at pos of block succ.
- *
- * @param node   the node
- * @param succ   the successor of the translation target block
- * @param pos    the position of the predecessor block
- * @param trans  the translation result
- *
+ * Returns non-zero if block is part of an infinite loop.
  */
-static void set_translated(ir_node *node, ir_node *succ, int pos, ir_node *trans)
+static unsigned is_in_infinite_loop(ir_node *block)
 {
-       ir_node    *pred = get_Block_cfgpred_block(succ, pos);
-       block_info *bi   = get_block_info(pred);
+       ir_loop *loop;
 
-       ir_nodehashmap_insert(bi->trans, node, trans);
+       assert(is_Block(block));
+       loop = get_irn_loop(block);
+       assert(loop);
+
+       loop = get_loop_outermost(loop);
+       if (loop)
+               return (get_loop_link(loop) != NULL);
+       else
+               return 0;
 }
+#endif
+
+/* --------------------------------------------------------
+ * GVN-PRE Exp_gen
+ * --------------------------------------------------------
+ */
 
 /**
- * Checks if a node node is clean in block block for use in antic_in.
- *
- * A clean node in block block can be hoisted above block block.
- * A node is not clean if its value is killed in block block.
- * The node can still be hoisted into block block.
- *
- * @param n      the phi translated or not translated node
- * @param block  the block
- * @return non-zero value for clean node
+ * Returns non-zero if a node is movable and a possible candidate for PRE.
  */
-static int is_clean_in_block_antic(ir_node *node, ir_node *block)
+static unsigned is_nice_value(ir_node *n)
 {
-       int i;
-
-       if (get_irn_mode(node) == mode_M)
-               return 0;
+       ir_mode *mode = get_irn_mode(n);
 
-       /* a phi only has predecessors in other blocks */
-       if (is_Phi(node))
+       if (is_Phi(n))
                return 1;
 
-       /* constants are in start block */
-       if (is_irn_constlike(node))
+#if LOADS || DIVMODS
+       if (is_Proj(n) && mode != mode_X && mode != mode_T)
                return 1;
-
-       /* what we really want to check
-          Only for node is translated case; other are clean anyway */
-       if (! is_nice_value(node)) {
+#else
+       if (is_Proj(n))
                return 0;
-       }
-
-       /* cleanliness depends on nodes predecessors
-          At least if node is translated. */
-       for (i = get_irn_arity(node) - 1; i >= 0; --i) {
-               ir_node *pred = get_irn_n(node, i);
-               ir_node *trans;
-               ir_node *value;
-
-               if (is_irn_constlike(pred))
-                       continue;
-
-               /* exp_gen only contains clean nodes */
-               if (ir_valueset_lookup(get_block_info(block)->exp_gen, pred))
-                       continue;
-
-               /* block of pred strictly dominates target block. pred irrelevant. */
-               if (block_strictly_dominates(get_nodes_block(pred), block))
-                       continue;
-
-               /* --- pred neither in block, nor dominating -- */
+#endif
 
-               /* This pred is in antic_in and such clean.
-                  Not every clean pred is in antic_in though.
-                  Predecessor might be translated or not */
-               value = identify(pred);
-               if (ir_valueset_lookup(get_block_info(block)->antic_in, value))
-                       continue;
+#if LOADS
+       if (is_Load(n))
+               return get_Load_volatility(n) == volatility_non_volatile;
+       if (is_Store(n))
+               return get_Store_volatility(n) == volatility_non_volatile;
+#endif
 
-               /* This check is not redundant for translated nodes;
-                  non translated ones are already nice. */
-               if (! is_nice_value(pred)) {
-                       DB((dbg, LEVEL_5, "unclean %+F because pred %+F not nice\n", node, pred));
-                       return 0;
-               }
+       if (get_irn_pinned(n) == op_pin_state_pinned)
+               return 0;
 
-               /* predecessor is not translated. This is legal if
-                  predecessor is dominating or in target block (already checked). */
-               trans = get_translated(pred, block);
-               if (trans == NULL) {
-                       DB((dbg, LEVEL_5, "unclean %+F because pred %+F unclean (not translated)\n", node, pred));
+       if (! mode_is_data(mode)) {
+               if (! is_Div(n) && ! is_Mod(n))
                        return 0;
-
-               } else {
-                       /* Node and predecessor are translated, but is pred clean?
-                          The value of the translated predecessor has to be in antic_in. */
-                       ir_node *value = identify(trans);
-                       if (! ir_valueset_lookup(get_block_info(block)->antic_in, value)) {
-                               DB((dbg, LEVEL_5, "unclean %+F because pred %+F value %+F not antic\n", node, pred, value));
-                               return 0;
-                       }
-               }
-
-               assert(0 && "should have been catched");
        }
-
-       /* clean */
        return 1;
-}  /* is_clean_in_block */
+}
 
 /**
  * Checks if a node n is clean in block block for exp_gen.
@@ -401,12 +663,9 @@ static int is_clean_in_block_antic(ir_node *node, ir_node *block)
  * @param block  the block
  * @return non-zero value for clean node
  */
-static int is_clean_in_block_expgen(ir_node *n, ir_node *block)
+static unsigned is_clean_in_block(ir_node *n, ir_node *block, ir_valueset_t *valueset)
 {
-       int i;
-
-       if (get_irn_mode(n) == mode_M)
-               return 0;
+       int         i, arity;
 
        if (is_Phi(n))
                return 1;
@@ -414,58 +673,54 @@ static int is_clean_in_block_expgen(ir_node *n, ir_node *block)
        if (! is_nice_value(n))
                return 0;
 
-       for (i = get_irn_arity(n) - 1; i >= 0; --i) {
-               ir_node *pred = get_irn_n(n, i);
+#if LOADS
+       /* filter loads with no phi predecessor from antic_in */
+       if (is_Load(n) && ! is_Phi(get_Load_mem(n)))
+               return 0;
+       if (is_Store(n) && ! is_Phi(get_Store_mem(n)))
+               return 0;
+#endif
 
-               /* sufficient for exp_gen because block is always block of node */
-               if (get_nodes_block(pred) != block)
-                       continue;
+#if DIVMODS
+       if (is_Div(n)) {
+               ir_node *mem = get_Div_mem(n);
+
+               mem = skip_Pin(mem);
 
-               /* pred is in block,
-                  so it needs to be clean (already in exp_gen) */
-               if (! get_irn_link(pred)) {
-                       DB((dbg, LEVEL_5, "unclean %+F because pred %+F unclean\n", n, pred));
+               if (! is_Phi(mem) && ! is_NoMem(mem))
                        return 0;
-               } else {
-                       continue;
-               }
        }
-       return 1;
-}  /* is_clean_in_block */
 
-/**
- * Does blocklocal common subexpression elimination (CSE).
- *
- * @param irn   the node
- * @param ctx   the environment
- */
-static void cse_walker(ir_node *irn, void *ctx)
-{
-       ir_node *opt = identify_remember(irn);
-       (void) ctx;
+       if (is_Mod(n) && ! is_Phi(get_Mod_mem(n)))
+               return 0;
+#endif
 
-       if (opt != irn) {
-               DB((dbg, LEVEL_5, "CSE %+F to %+F\n", irn, opt));
-               exchange(irn, opt);
-       }
-}
+       arity = get_irn_arity(n);
+       for (i = 0; i < arity; ++i) {
+               ir_node *pred   = get_irn_n(n, i);
+               ir_node *value;
+
+               if (is_Phi(pred))
+                       continue;
+
+               /* we only handle current block */
+               if (get_nodes_block(pred) != block)
+                       continue;
+
+               if (! is_nice_value(pred))
+                       return 0;
+
+               value = identify(pred);
+               if (! ir_valueset_lookup(valueset, value))
+                       return 0;
 
-/**
- * Bottom up walker that ensures that every block gets a block info.
- *
- * @param irn   the node
- * @param ctx   the environment
- */
-static void block_info_walker(ir_node *irn, void *ctx)
-{
-       if (is_Block(irn)) {
-               pre_env *env = (pre_env*)ctx;
-               alloc_block_info(irn, env);
        }
+       return 1;
 }
 
 /**
  * Topological walker puts nodes in top-down topological order into exp_gen set.
+ * Assumed to walk blockwise and nodewise topologically top-down.
  *
  * @param irn    the node
  * @param ctx    the environment
@@ -477,79 +732,76 @@ static void topo_walker(ir_node *irn, void *ctx)
        ir_node    *value;
        (void) ctx;
 
-       /* GVN step: remember the value */
-       value = remember(irn);
-
-       /* no need to put constants into the sets: they are always redundant */
-       if (! is_nice_value(irn) || is_irn_constlike(irn))
+       if (is_Block(irn))
                return;
 
-       /* Do not put mode_T nodes info the sets, or PhiT will be created
-         (which are not allowed in Firm). Instead, put the Proj's here only. */
-       if (get_irn_mode(irn) == mode_T)
+#if OPTIMIZE_NODES
+       environ->graph->value_table = environ->value_table;
+       identify_remember(irn);
+       environ->graph->value_table = environ->gvnpre_values;
+#endif
+
+       /* GVN step: remember the value. */
+       value = remember(irn);
+
+       if (is_irn_constlike(irn))
                return;
 
        block = get_nodes_block(irn);
        info  = get_block_info(block);
 
-       if (is_clean_in_block_expgen(irn, block)) {
-               /* two expressions with same value in block;
-                  should have been fixed by CSE pass */
-               assert(get_nodes_block(irn) == block &&
-                   (! ir_valueset_lookup(info->exp_gen, value)));
+       if (get_irn_mode(irn) != mode_X)
+               ir_valueset_insert(info->avail_out, value, irn);
 
-               DB((dbg, LEVEL_5, "%+F clean in block %+F\n", irn, block));
+       /* values that are not in antic_in also dont't need to be in any other set */
+
+       if (! is_nice_value(irn))
+               return;
+
+       if (is_clean_in_block(irn, block, info->exp_gen)) {
+               DB((dbg, LEVEL_3, "%+F clean in block %+F\n", irn, block));
 
                ir_valueset_insert(info->exp_gen, value, irn);
-               /* flag irn as clean*/
-               set_irn_link(irn, irn);
-       } else {
-               /* flag irn as not clean */
-               set_irn_link(irn, NULL);
        }
 }
 
+/* --------------------------------------------------------
+ * GVN-PRE Antic_in
+ * --------------------------------------------------------
+ */
+
 /**
- * Computes Avail_out(block):
- *
- * Avail_in(block)  = Avail_out(dom(block))
- * Avail_out(block) = Avail_in(block) \/ Nodes(block)
+ * Gets result of nodes phi translation into block.
  *
- * Precondition:
- *  This function must be called in the top-down topological order:
- *  Then it computes Leader(Nodes(block)) instead of Nodes(block) !
+ * @param node   the node
+ * @param block  the target block
  *
- * @param block   the block
- * @param ctx     walker context
+ * @return a phi translation of node node into block block or NULL
  */
-static void compute_avail_top_down(ir_node *block, void *ctx)
+static ir_node *get_translated(ir_node *block, ir_node *node)
 {
-       pre_env    *env       = (pre_env*)ctx;
-       block_info *dom_info;
-       block_info *info      = get_block_info(block);
-       ir_node    *dom_block;
+       if (is_irn_constlike(node))
+               return node;
 
-       /* filter blocks from topological walker */
-       if (! is_Block(block))
-               return;
+       return ir_nodehashmap_get(ir_node, get_block_info(block)->trans, node);
+}
 
-       if (block == env->end_block)
+/**
+ * Saves result of phi translation of node into predecessor
+ * at pos of block succ.
+ *
+ * @param node   the node
+ * @param succ   the successor of the translation target block
+ * @param pos    the position of the predecessor block
+ * @param trans  the translation result
+ *
+ */
+static void set_translated(ir_nodehashmap_t *map, ir_node *node, ir_node *trans)
+{
+       if (is_irn_constlike(node))
                return;
-
-       /* First, add all nodes from the immediate dominator.
-          This ensures that avail_out contains the leader.
-          The start block has no immediate dominator. */
-       if (block != env->start_block) {
-               dom_block = get_Block_idom(block);
-               assert(is_Block(dom_block));
-               dom_info = get_block_info(dom_block);
-
-               value_union(info->avail_out, dom_info->avail_out);
-       }
-       /* Second, add values from exp_gen. */
-       value_union(info->avail_out, info->exp_gen);
-
-       dump_value_set(info->avail_out, "Avail_out", block);
+       /* insert or replace */
+       ir_nodehashmap_insert(map, node, trans);
 }
 
 /**
@@ -561,89 +813,126 @@ static void compute_avail_top_down(ir_node *block, void *ctx)
  *
  * @return a node representing the translated value
  */
-static ir_node *phi_translate(ir_node *node, ir_node *block, int pos)
+static ir_node *phi_translate(ir_node *node, ir_node *block, int pos, ir_valueset_t *leaderset)
 {
        int       i;
        int       arity;
        ir_node **in;
+       ir_node  *pred_block = get_Block_cfgpred_block(block, pos);
        ir_node  *nn;
-       ir_node  *target_block;
+       unsigned  needed;
 
        if (is_Phi(node)) {
-               if (get_nodes_block(node) == block) {
-                       /* a Phi inside target block */
+               if (get_nodes_block(node) == block)
                        return get_Phi_pred(node, pos);
-               }
-               /* already outside */
+               /* this phi does not need translation */
                return node;
        }
-
        arity = get_irn_arity(node);
-       in    = XMALLOCN(ir_node *, arity);
 
+       needed = 0;
+       in = ALLOCANZ(ir_node *, arity);
+
+       /* A value has several representatives. The anti leader is chosen to be
+          the main representative. If we access a node as representative of a
+          value we always use the anti leader. The anti leader can be found by
+          antic_in(identify(node)). */
        for (i = 0; i < arity; ++i) {
-               ir_node *pred       = get_irn_n(node, i);
-               ir_node *pred_block = get_Block_cfgpred_block(block,pos);
-               ir_node *trans      = get_translated(pred, pred_block);
-
-               /* if node is topologically first in block then
-              there is no translated predecessor.
-              We do not check cleanliness here, so pred might be not clean. */
-               if (trans == NULL)
-                       in[i] = pred;
-               else
-                       in[i] = trans;
-       }
+               ir_node *pred   = get_irn_n(node, i);
+               ir_node *value  = identify(pred);
+               /* get leader for pred to lookup its translated value */
+               ir_node *leader = ir_valueset_lookup(leaderset, value);
+               ir_node *pred_trans;
+               ir_node *new_pred;
+
+               if (! leader)
+                       leader = pred;
+
+               /* we cannot find this value in antic_in, because the value
+                  has (possibly) changed! */
+               pred_trans  = get_translated(pred_block, leader);
+
+
+#if DIVMODS
+               if (is_Div(node)) {
+                       ir_node *mem = get_Div_mem(node);
 
-       target_block = get_Block_cfgpred_block(block, pos);
-       if (is_Proj(node)) {
-               /* Projections are the sole case where we have to ensure
-                  that they are in the same block as their tuple node. */
-               target_block = get_nodes_block(in[0]);
+                       mem = skip_Pin(mem);
+
+                       if (! is_Phi(mem))
+                               pred_trans = get_Div_mem(node);
+               }
+#endif
+
+               DB((dbg, LEVEL_3, "trans %+F of %+F is  %+F\n", leader, pred_block, pred_trans));
+               if (pred_trans == NULL) {
+                       new_pred = pred;
+               } else {
+                       new_pred = pred_trans;
+
+                       /* loads: Predecessor is a memory phi, which translated yields a proj or
+                          another phi. In case of projection and a load predecessor,
+                          skip them and use the loads memory. */
+                       if (is_Proj(pred_trans) && get_irn_mode(pred_trans) == mode_M) {
+#if LOADS || DIVMODS
+                               ir_node *loadstore = get_Proj_pred(pred_trans);
+                               /* If we do not translate this node, we will get its value wrong. */
+                               needed |= 1;
+
+                               if (is_Load(loadstore)) {
+                                       /* Put new load under the adjacent loads memory edge
+                                          such that GVN may compare them. */
+                                       new_pred = get_Load_mem(loadstore);
+                               } else if (is_Store(loadstore)) {
+                                       new_pred = get_Store_mem(loadstore);
+                               }
+#endif
+                       } else {
+                               /* predecessor value changed, so translation is needed */
+                               if (identify(new_pred) != identify(pred))
+                                       needed |= 1;
+                       }
+               }
+
+               DB((dbg, LEVEL_4, "in %+F\n", new_pred));
+               in[i] = new_pred;
        }
 
+       if (! needed)
+               return node;
+
+       DB((dbg, LEVEL_3, "Translate\n"));
+
+       if (is_Proj(node))
+               pred_block = get_nodes_block(in[0]);
+
+       /* copy node to represent the new value.
+          We do not translate nodes that do not need translation,
+          so we use the newly created nodes as value representatives only.
+          Their block is not important, because we create new ones during
+          the insert node phase. */
        nn = new_ir_node(
                get_irn_dbg_info(node),
-               get_irn_irg(node),
-               target_block,
+               environ->graph,
+               pred_block,
                get_irn_op(node),
                get_irn_mode(node),
                arity,
                in);
-       free(in);
        /* We need the attribute copy here, because the Hash value of a
-          node might depend on that. */
-       copy_node_attr(get_irn_irg(node), node, nn);
-       DB((dbg, LEVEL_5, "New node %+F in %+F origin %+F\n", nn, get_Block_cfgpred_block(block, pos), node));
-
-
-       nn = optimize_node(nn);
-       DB((dbg, LEVEL_5, "New GCSE-optimized node %+F origin %+F\n", nn, node));
-
-       /* During the insert phase we need to compare the global value numbers
-          of blocks that do not dominate each other. 'Blocksafe' GCSE requires
-          the two equivalent nodes to be in blocks that dominate each other.
-          (see identities_cmp() in iropt.c)
-          If we do not translate a node into the predecessor block, their values
-          will not be considered equivalent. (we are at a merging block.)
-          So we have to translate a node into its predecessor block.
-          If we switched off blocksafety we will find matching values that are
-          not dominating (in loops) which we cannot use.
-
-          Also, blocksafe GCSE does not kill nn even if its value is already
-          present in the successor because the predecessor blocks do not dominate.
-          This is required for antic_in.
-
-          The nodes produced here are not necessarily in the designated block.
-          They are used to determine the value of node node.
-          If we use them for hoisting, we need to make sure that they are in the
-          designated block. fix_translated() does this job. */
+          node might depend on it. */
+       copy_node_attr(environ->graph, node, nn);
+       /* Optimizing nn here is tempting but might be against the GVN-PRE algorithm
+          because it already uses availability. */
 
+       DB((dbg, LEVEL_3, "New node %+F in %+F origin %+F\n", nn, get_Block_cfgpred_block(block, pos), node));
        return nn;
-}  /* phi_translate */
+}
 
 /**
  * Block-walker, computes Antic_in(block).
+ * Builds a value tree out of the graph by translating values
+ * over phi nodes.
  *
  * @param block  the block
  * @param ctx    the walker environment
@@ -658,130 +947,172 @@ static void compute_antic(ir_node *block, void *ctx)
        ir_node                *expr;
        size_t                  size;
        ir_valueset_iterator_t  iter;
+       int                     n_succ;
 
        /* filter blocks from topological walker */
        if (! is_Block(block))
                return;
 
-       /* no need for computations in start block */
-       if (block == env->start_block)
-               return;
-
        /* the end block has no successor */
        if (block == env->end_block)
                return;
 
        info = get_block_info(block);
+       /* track changes */
        size = ir_valueset_size(info->antic_in);
+       n_succ = get_Block_n_cfg_outs(block);
 
-       /* This step puts all generated expression from the
-          current block into antic_in.
-          This is needs to be done in the first iteration only. */
+       /* add exp_gen */
        if (env->first_iter) {
+#if IGNORE_INF_LOOPS
+               /* keep antic_in of infinite loops empty */
+               if (! is_in_infinite_loop(block)) {
+                       foreach_valueset(info->exp_gen, value, expr, iter) {
+                               ir_valueset_insert(info->antic_in, value, expr);
+                       }
+               }
+#else
                foreach_valueset(info->exp_gen, value, expr, iter) {
-                       /* We will have phi nodes in antic in.
-                          This should prevent special cases in several places. */
                        ir_valueset_insert(info->antic_in, value, expr);
                }
+#endif
        }
 
-       /* TODO handle endless loops. */
-
-       int n_succ = get_Block_n_cfg_outs(block);
-       if (n_succ == 1) {
-               int pos = -1;
-
-               /* find blocks position in succ's block predecessors */
-               succ = get_Block_cfg_out(block, 0);
-               pos  = get_Block_cfgpred_pos(succ, block);
-               assert(pos >= 0);
-
+       /* successor might have phi nodes */
+       if (n_succ == 1 && get_irn_arity(get_Block_cfg_out(block, 0)) > 1) {
+               succ      = get_Block_cfg_out(block, 0);
+               int pos   = get_Block_cfgpred_pos(succ, block);
                succ_info = get_block_info(succ);
-               /* translate into list: we cannot insert into a set we iterate
-                * and succ might be equal to block for endless loops */
-               foreach_valueset(succ_info->antic_in, value, expr, iter) {
-                       ir_node *trans;
-                       ir_node *newval;
-
-                       DB((dbg, LEVEL_5, "Begin phi translate antic: expr %+F from %+F to %d\n", expr, succ, pos));
-
-                       /* TODO if successor block has 1 predecessor we need no phi translation.
-                          But the clean_in_block check is still needed! */
-                       /* TODO phi translation and clean in block are overlapping,
-                          because phi trans perhaps should know in advance if predecessors are clean. */
-                       trans = phi_translate(expr, succ, pos);
-                       newval = remember(trans);
 
-                       DB((dbg, LEVEL_5, "----> phi translate antic: expr %+F from %+F to %d is trans %+F\n", expr, succ, pos, trans));
+               /* initialize translated set */
+               if (env->first_iter) {
+                       info->trans = XMALLOC(ir_nodehashmap_t);
+                       ir_nodehashmap_init(info->trans);
+               }
 
-                       if (is_clean_in_block_antic(trans, block)) {
-                               if (! is_irn_constlike(trans)) {
-                                       ir_valueset_insert(info->antic_in, newval, trans);
+               foreach_valueset(succ_info->antic_in, value, expr, iter) {
+                       ir_node *trans = get_translated(block, expr);
+                       ir_node *trans_value;
+                       ir_node *represent;
+
+                       if (trans == NULL)
+                               trans = phi_translate(expr, succ, pos, get_block_info(succ)->antic_in);
+
+                       /* create new value if necessary */
+                       trans_value = identify_or_remember(trans);
+
+                       DB((dbg, LEVEL_3, "Translate %+F %+F to %d = %+F (%+F)\n", expr, succ, pos, trans, trans_value));
+
+                       /* On value change (phi present) we need the translated node
+                          to represent the new value for possible further translation. */
+                       if (value != trans_value)
+                               represent = trans;
+                       else
+                               represent = expr;
+
+                       if (is_clean_in_block(expr, block, info->antic_in)) {
+#if NO_INF_LOOPS
+                               /* Prevent information flow over the backedge of endless loops. */
+                               if (env->iteration <= 2 || (is_backedge(succ, pos) && !is_in_infinite_loop(succ))) {
+                                       ir_valueset_replace(info->antic_in, trans_value, represent);
                                }
-                               DB((dbg, LEVEL_5, " translated %+F clean in %+F\n", trans, block));
-
-                       } else {
-                               DB((dbg, LEVEL_5, " translated %+F not clean in %+F\n", trans, block));
+#else
+                               ir_valueset_replace(info->antic_in, trans_value, represent);
+#endif
                        }
-
-                       /* We have to set translated anyway
-                          because expr might still be hoisted _into_ block. */
-                       set_translated(expr, succ, pos, trans);
-
-                       DB((dbg, LEVEL_5, "- end: expr %+F -----\n\n", expr));
+                       set_translated(info->trans, expr, represent);
                }
 
        } else if (n_succ > 1) {
-               ir_node    *succ0;
-               block_info *succ0_info;
                int         i;
-               int         common     = 1;
-
-               /* Select a successor to compute the disjoint of all nodes
-                  sets, it might be useful to select the block with the
-                  smallest number of nodes. For simplicity we choose the
-                  first one. */
-               succ0      = get_Block_cfg_out(block, 0);
-               succ0_info = get_block_info(succ0);
+               ir_node    *common     = NULL;
+               ir_node    *succ0      = get_Block_cfg_out(block, 0);
+               block_info *succ0_info = get_block_info(succ0);
 
+               /* disjoint of antic_ins */
                foreach_valueset(succ0_info->antic_in, value, expr, iter) {
-                       /* we need the disjoint */
+                       /* iterate over remaining successors */
                        for (i = 1; i < n_succ; ++i) {
                                ir_node    *succ      = get_Block_cfg_out(block, i);
                                block_info *succ_info = get_block_info(succ);
 
-                               if (ir_valueset_lookup(succ_info->antic_in, value) == NULL) {
-                                       common = 0;
-                                       break;
-                               }
-                       }
+                               /* value in antic_in? */
+                               common = ir_valueset_lookup(succ_info->antic_in, value);
+                               if (common == NULL)
+                                       break;
+                       }
+
+                       if (common && is_clean_in_block(expr, block, info->antic_in))
+                               ir_valueset_replace(info->antic_in, value, expr);
+               }
+       }
+
+
+       DEBUG_ONLY(dump_value_set(info->antic_in, "Antic_in", block);)
+
+       if (size != ir_valueset_size(info->antic_in))
+               env->changes |= 1;
+}
+
+/* --------------------------------------------------------
+ * Main algorithm Avail_out
+ * --------------------------------------------------------
+ */
+
+/**
+ * Computes Avail_out(block):
+ *
+ * Avail_in(block)  = Avail_out(dom(block))
+ * Avail_out(block) = Avail_in(block) \/ Nodes(block)
+ *
+ * Precondition:
+ *  This function must be called in the top-down topological order:
+ *  Then it computes Leader(Nodes(block)) instead of Nodes(block) !
+ *
+ * @param block   the block
+ * @param ctx     walker context
+ */
+static void compute_avail_top_down(ir_node *block, void *ctx)
+{
+       pre_env    *env   = (pre_env*)ctx;
+       block_info *info;
+
+       if (block == env->end_block)
+               return;
 
-                       /* we found a value that is common in all Antic_in(succ(b)),
-                          put it in Antic_in(b) if the value is not already represented. */
-                       if (common && is_clean_in_block_antic(expr, block)) {
-                               ir_valueset_insert(info->antic_in, value, expr);
-                       }
-                       set_translated(expr, succ0, 0, expr);
+       info  = get_block_info(block);
 
-               }
+       /* Add all nodes from the immediate dominator.
+          This ensures that avail_out contains the leader. */
+       if (block != env->start_block) {
+               ir_node                *dom_block = get_Block_idom(block);
+               block_info             *dom_info  = get_block_info(dom_block);
+               ir_node                *value;
+               ir_node                *expr;
+               ir_valueset_iterator_t  iter;
+
+               foreach_valueset(dom_info->avail_out, value, expr, iter)
+                       /* replace: use the leader from dominator, not local exp_gen */
+                       ir_valueset_replace(info->avail_out, value, expr);
        }
 
-       dump_value_set(info->antic_in, "Antic_in", block);
-       if (size != ir_valueset_size(info->antic_in)) {
-               env->changes |= 1;
-       }
+       DEBUG_ONLY(dump_value_set(info->avail_out, "Avail_out", block);)
+}
 
-}  /* compute_antic */
+/* --------------------------------------------------------
+ * Main algorithm redundancy detection
+ * --------------------------------------------------------
+ */
 
 /**
- * Finds if the value of expr is a partially redundant value in block.
+ * Returns a valid mode if the value of expr is a partially redundant value.
  *
  * @param block   the block
  * @param expr    the expression
  *
  * @return mode of the expression if it is partially redundant else NULL
  */
-static ir_mode *find_partially_redundant(ir_node *block, ir_node *expr)
+static ir_mode *is_partially_redundant(ir_node *block, ir_node *expr, ir_node *value)
 {
        ir_node *first_avail         = NULL;
        int      pos;
@@ -790,42 +1121,50 @@ static ir_mode *find_partially_redundant(ir_node *block, ir_node *expr)
        int      partially_redundant = 0;
        ir_mode *mode                = NULL;
 
-       DB((dbg, LEVEL_3, "Examine expr %+F of %+F\n", expr, block));
+       DB((dbg, LEVEL_3, "is partially redundant %+F(%+F) of %+F\n", expr, value, block));
 
        /* for each predecessor blocks */
        for (pos = 0; pos < arity; ++pos) {
-               block_info *pred_info;
                ir_node    *pred_block  = get_Block_cfgpred_block(block, pos);
+               block_info *pred_info;
                ir_node    *trans_expr;
                ir_node    *trans_value;
                ir_node    *avail_expr;
 
-               /* ignore bad blocks. */
-               if (is_Bad(pred_block))
-                       continue;
-
-               trans_expr = get_translated(expr, get_Block_cfgpred_block(block,pos));
-               DB((dbg, LEVEL_2, "expr %+F trans @ %d is translated %+F\n", expr, pos, trans_expr));
-               /* exp in antic in, so pred is clean
-                  uncover when it is not */
-               assert(trans_expr);
-
+               pred_info  = get_block_info(pred_block);
+               trans_expr = get_translated(pred_block, expr);
                trans_value = identify(trans_expr);
-               DB((dbg, LEVEL_2, "trans_value %+F\n", trans_value));
-               assert(trans_value);
 
-               pred_info  = get_block_info(pred_block);
-               avail_expr = (ir_node*)ir_valueset_lookup(pred_info->avail_out, trans_value);
-               DB((dbg, LEVEL_2, "avail_expr %+F\n", avail_expr));
+               if (is_Const(trans_expr))
+                       avail_expr = trans_expr;
+               else
+                       avail_expr = (ir_node*)ir_valueset_lookup(pred_info->avail_out, trans_value);
+
+               /* value might be available through a not yet existing constant */
+               if (avail_expr == NULL && is_Const(trans_expr)) {
+                       /* limit range of new constants */
+                       ir_mode   *cmode = get_irn_mode(trans_expr);
+                       ir_tarval *upper = new_tarval_from_long(127, cmode);
+                       ir_tarval *lower = new_tarval_from_long(-127, cmode);
+                       ir_tarval *c     = get_Const_tarval(trans_expr);
+
+                       /* tarval within range? */
+                       if (tarval_cmp(lower, c) == ir_relation_less_equal &&
+                               tarval_cmp(c, upper) == ir_relation_less_equal) {
+                               avail_expr = trans_expr;
+                       } else {
+                               avail_expr = NULL;
+                       }
+           }
+
+               DB((dbg, LEVEL_3, "avail_expr %+F  trans_expr %+F\n", avail_expr, trans_expr));
 
                if (avail_expr == NULL) {
-                       /* expr not available */
-                       pred_info->avail = expr;
+                       pred_info->avail = trans_expr;
                        pred_info->found = 0;
                        fully_redundant  = 0;
-
                } else {
-                       /* expr is available */
+                       /* expr is available, use the leader */
                        pred_info->avail    = avail_expr;
                        pred_info->found    = 1;
                        mode                = get_irn_mode(avail_expr);
@@ -834,79 +1173,21 @@ static ir_mode *find_partially_redundant(ir_node *block, ir_node *expr)
                        if (first_avail == NULL)
                                first_avail = avail_expr;
                        else if (first_avail != avail_expr)
-                               /* Multiple different expressions are available */
+                               /* Multiple different expressions are available,
+                                  This is why we need no cut over avail_out sets. */
                                fully_redundant = 0;
 
                        DB((dbg, LEVEL_2, "Found %+F from block %+F as %+F in pred %+F\n", expr, block, avail_expr, pred_block));
-               }  /* if */
-       }  /* for */
+               }
+       }
 
        /* If it is not the same value already existing along every predecessor
-           and it is defined by some predecessor then it is partially redundant. */
-       if (! fully_redundant && partially_redundant)
-               return mode;
-
-       return NULL;
+       and it is defined by some predecessor then it is partially redundant. */
+       if (! partially_redundant || fully_redundant)
+               return NULL;
+       return mode;
 }
 
-/**
- * Copies node and its predecessors to a block that dominates the target block.
- *
- * @param node     the node
- * @param target   the target block
- *
- * @return copy of node node dominating target block
- */
-static ir_node *fix_translation(ir_node *node, ir_node *target)
-{
-       ir_node  *nn;
-       int       i;
-       int       arity;
-       ir_node **ins;
-
-       DB((dbg, LEVEL_1, "Fix_translation %+F into %+F\n", node, target));
-
-       /* identifies unreachable blocks using domination */
-       if (get_Block_dom_depth(get_nodes_block(node)) < 0 ||
-           (get_Block_dom_depth(target) < 0))
-               return new_r_Bad(get_irn_irg(node), get_irn_mode(node));
-
-       /* Walk upwards until the node dominates its use in target block.
-          Precondition is that the node is clean. */
-       if (block_dominates(get_nodes_block(node), target))
-               return node;
-
-       DB((dbg, LEVEL_1, "Fix_translation%+F of node %+F does not dominate target %+F\n", get_nodes_block(node), node, target));
-
-       arity = get_irn_arity(node);
-       ins   = XMALLOCN(ir_node*, arity);
-
-       for (i = arity - 1; i >= 0; --i) {
-               ir_node *pred  = get_irn_n(node, i);
-               ir_node *fixed = fix_translation(pred, target);
-
-               DB((dbg, LEVEL_1, "Fixed %+F to %+F for node %+F\n", pred, fixed, node));
-               ins[i] = fixed;
-       }
-
-       nn = new_ir_node(
-               get_irn_dbg_info(node),
-               get_irn_irg(node),
-               target,
-               get_irn_op(node),
-               get_irn_mode(node),
-               arity,
-               ins);
-       free(ins);
-       copy_node_attr(get_irn_irg(node), node, nn);
-
-       DB((dbg, LEVEL_1, "New fixed node %+F from translated %+F. target %+F\n", nn, node, target));
-
-       nn = optimize_node(nn);
-       remember(nn);
-       return nn;
-} /* fix_translation */
-
 /**
  * Updates the new_set of a block by adding the new_set of
  * the immediate dominating block.
@@ -922,15 +1203,108 @@ static void update_new_set(ir_node *block, ir_node *idom)
        block_info             *idom_info = get_block_info(idom);
        int                     updated   = 0;
 
-       dump_value_set(idom_info->new_set, "[New Set]", idom);
+       DEBUG_ONLY(dump_value_set(idom_info->new_set, "[New Set]", idom);)
        foreach_valueset(idom_info->new_set, value, expr, iter) {
+               /* inherit new_set from immediate dominator */
                ir_valueset_insert(curr_info->new_set, value, expr);
+               /* replace in avail_out */
                updated |= ir_valueset_replace(curr_info->avail_out, value, expr);
        }
-       if (updated) {
+#ifdef DEBUG_libfirm
+       if (updated)
                dump_value_set(curr_info->avail_out, "Updated [Avail_out]", block);
+#endif
+}
+
+/**
+ * Checks if hoisting irn is greedy.
+ * Greedy hoisting means that there are non partially redundant nodes
+ * hoisted. This happens if a partially redundant node has
+ * non redundant predecessors.
+ */
+static unsigned is_hoisting_greedy(ir_node *irn, ir_node *block)
+{
+       int block_arity = get_irn_arity(block);
+       int arity = get_irn_arity(irn);
+       int pos, i;
+       block_info *info = get_block_info(block);
+
+       /* As long as the predecessor values are available in all predecessor blocks,
+          we can hoist this value. */
+       for (pos = 0; pos < block_arity; ++pos) {
+               ir_node    *pred_block = get_Block_cfgpred_block(block, pos);
+               block_info *pred_info  = get_block_info(pred_block);
+
+               for (i = 0; i < arity; ++i) {
+                       ir_node *pred     = get_irn_n(irn, i);
+                       ir_node *value;
+                       ir_node *leader;
+                       ir_node *trans;
+                       ir_node *trans_val;
+                       ir_node *avail;
+
+#if MIN_CUT
+                       /* Very conservative min cut. Phi might only have 1 user. */
+                       if (is_Phi(pred) && get_irn_n_edges(pred) != 1)
+                               return 1;
+#endif
+
+                       if (is_Phi(pred) && get_nodes_block(pred) == block)
+                               continue;
+
+                       DB((dbg, LEVEL_3, "pred %+F\n", pred));
+                       value = identify(pred);
+                       leader = ir_valueset_lookup(info->antic_in, value);
+                       if (! leader)
+                               leader = pred;
+                       DB((dbg, LEVEL_3, "lead %+F\n", leader));
+                       trans   = get_translated(pred_block, leader);
+                       if (! trans)
+                               trans = pred;
+                       DB((dbg, LEVEL_3, "trans %+F\n", trans));
+
+                       trans_val = identify(trans);
+                       DB((dbg, LEVEL_3, "value %+F\n", trans_val));
+
+                       if (is_Const(trans_val) || is_SymConst(trans_val)) {
+                               /* existing constant */
+                               if (get_irn_idx(trans_val) < environ->last_idx) {
+                                       continue;
+                               } else {
+                                       /* limit range of new constants */
+                                       ir_mode   *cmode = get_irn_mode(trans);
+                                       ir_tarval *upper = new_tarval_from_long(128, cmode);
+                                       ir_tarval *lower = new_tarval_from_long(-128, cmode);
+                                       ir_tarval *c     = get_Const_tarval(trans);
+
+                                       /* tarval within range? */
+                                       if (tarval_cmp(lower, c) == ir_relation_less &&
+                                               tarval_cmp(c, upper) == ir_relation_less) {
+                                               continue;
+                                       } else {
+                                               return 1;
+                                       }
+                               }
+                       }
+
+                       /* */
+                       if (is_irn_constlike(trans_val))
+                               continue;
+
+                       avail = ir_valueset_lookup(pred_info->avail_out, trans_val);
+
+                       DB((dbg, LEVEL_3, "avail %+F\n", avail));
+                       if (! avail)
+                               return 1;
+#if MIN_CUT
+                       /* only optimize if predecessors have been optimized */
+                       if (ir_valueset_lookup(info->antic_done, value) == NULL)
+                               return 1;
+#endif
+               }
        }
-} /* update_new_set */
+       return 0;
+}
 
 /**
  * Perform insertion of partially redundant values.
@@ -948,26 +1322,26 @@ static void update_new_set(ir_node *block, ir_node *idom)
  * @param block  the block
  * @param ctx    the walker environment
  */
-static void insert_nodes(ir_node *block, void *ctx)
+static void insert_nodes_walker(ir_node *block, void *ctx)
 {
-       pre_env                *env       = (pre_env*)ctx;
+       pre_env                *env    = (pre_env*)ctx;
+       int                     arity  = get_irn_arity(block);
        ir_node                *value;
        ir_node                *expr;
+       block_info             *info;
        ir_node                *idom;
-       block_info             *curr_info;
        int                     pos;
-       int                     arity     = get_irn_arity(block);
        ir_valueset_iterator_t  iter;
 
-       /* filter only blocks */
+       /* only blocks */
        if (! is_Block(block))
                return;
 
        /* ensure that even the start block has a new_set */
-       curr_info = get_block_info(block);
-       if (curr_info->new_set)
-               ir_valueset_del(curr_info->new_set);
-       curr_info->new_set = ir_valueset_new(16);
+       info = get_block_info(block);
+       if (info->new_set)
+               ir_valueset_del(info->new_set);
+       info->new_set = ir_valueset_new(16);
 
        if (block == env->start_block)
                return;
@@ -977,91 +1351,407 @@ static void insert_nodes(ir_node *block, void *ctx)
        idom = get_Block_idom(block);
        update_new_set(block, idom);
 
-       /* process only merge blocks */
-       if (arity < 2)
+       /* process only path joining blocks */
+       if (arity < 2) {
                return;
+       }
 
-       /* for each antic_in */
-       foreach_valueset(curr_info->antic_in, value, expr, iter) {
+       /* This is the main reason antic_in is preverred over antic_out;
+          we may iterate over every anticipated value first and not
+          over the predecessor blocks. */
+       foreach_valueset(info->antic_in, value, expr, iter) {
                ir_mode  *mode;
                ir_node  *phi;
-               ir_node  *phi_value;
                ir_node **phi_in;
 
-               /* filter phi nodes from antic in */
+               /* already done? */
+               if (ir_valueset_lookup(info->antic_done, value))
+                       continue;
+
+               /* filter phi nodes from antic_in */
                if (is_Phi(expr))
                        continue;
 
+               DB((dbg, LEVEL_2, "Insert for %+F (value %+F) in %+F\n", expr, value, block));
+
                /* A value computed in the dominator is totally redundant.
                   Hence we have nothing to insert. */
                if (ir_valueset_lookup(get_block_info(idom)->avail_out, value)) {
                        DB((dbg, LEVEL_2, "Fully redundant expr %+F value %+F\n", expr, value));
+                       DEBUG_ONLY(inc_stats(gvnpre_stats->fully);)
+
+                       ir_valueset_insert(info->antic_done, value, expr);
+                       continue;
+               }
+
+               if (is_hoisting_greedy(expr, block)) {
+                       DB((dbg, LEVEL_2, "greedy\n"));
                        continue;
                }
 
-               mode = find_partially_redundant(block, expr);
+               mode = is_partially_redundant(block, expr, value);
                if (mode == NULL)
                        continue;
 
-               DB((dbg, LEVEL_1, "Partial redundant %+F from block %+F found\n", expr, block));
+#if LOADS || DIVMODS
+               /* save old mode_M phis to remove keepalive edges later */
+               if (is_memop(expr)) {
+                       ir_node *mem = get_memop_mem(expr);
+                       if (is_Phi(mem) && get_nodes_block(mem) == get_nodes_block(expr)) {
+                               ir_nodeset_insert(env->keeps, mem);
+                       }
+               }
+#endif
+
+#ifdef DEBUG_libfirm
+               if (! is_Proj(expr)) {
+                       if (env->first_iter)
+                               inc_stats(gvnpre_stats->first_iter_found);
+                       inc_stats(gvnpre_stats->partially);
+               }
+               if (is_Load(expr) || is_Store(expr))
+                       inc_stats(gvnpre_stats->loads);
+               else if (is_Div(expr) || is_Mod(expr))
+                       inc_stats(gvnpre_stats->divmods);
+#endif
 
                phi_in = XMALLOCN(ir_node *, arity);
 
-               /* for all predecessor blocks */
+               /* for each predecessor block */
                for (pos = 0; pos < arity; ++pos) {
                        ir_node    *pred_block = get_Block_cfgpred_block(block, pos);
                        block_info *pred_info;
 
-                       /* ignore bad blocks. */
-                       if (is_Bad(pred_block)) {
-                               ir_graph *irg = get_irn_irg(pred_block);
-                               phi_in[pos] = new_r_Bad(irg, mode);
-                               continue;
-                       }
                        pred_info = get_block_info(pred_block);
 
-                       /* ignore blocks that already have the expression */
                        if (! pred_info->found) {
-                               ir_node *translated  = get_translated(expr, pred_block);
-                               ir_node *trans_value;
-
-                               /* make sure translated dominates its use */
-                               translated = fix_translation(translated, pred_block);
-                               DB((dbg, LEVEL_3, "Use translated %+F in %+F because expr %+F not available\n", translated, pred_block, expr));
-
-                               /* make the new node available  */
-                               trans_value = remember(translated);
-                               ir_valueset_insert(pred_info->avail_out, trans_value, translated);
-                               phi_in[pos] = translated;
-                               DB((dbg, LEVEL_5, "phi_in %+F\n", translated));
+                               int i;
+                               int node_arity = get_irn_arity(expr);
+                               ir_node **in = XMALLOCNZ(ir_node *, node_arity);
+                               ir_node *trans;
+                               ir_node *new_value, *new_value2;
+                               ir_node *target_block = pred_block;
+
+                               for (i = 0; i < node_arity; ++i) {
+                                       ir_node *pred     = get_irn_n(expr, i);
+                                       ir_node *value    = identify(pred);
+                                       ir_node *leader;
+                                       ir_node *trans;
+                                       ir_node *trans_val;
+                                       ir_node *avail;
+
+                                       /* transform knowledge over the predecessor from
+                                          anti-leader world into leader world. */
+
+                                       DB((dbg, LEVEL_3, "pred %+F\n", pred));
+                                       value = identify(pred);
+
+                                       /* get leader for pred to lookup its translated value */
+                                       leader = ir_valueset_lookup(info->antic_in, value);
+                                       if (! leader)
+                                               leader = pred;
+                                       DB((dbg, LEVEL_3, "lead %+F\n", leader));
+
+                                       trans   = get_translated(pred_block, leader);
+                                       if (!trans)
+                                               trans = pred;
+                                       DB((dbg, LEVEL_3, "trans %+F\n", trans));
+
+                                       /* in case of phi, we are done */
+                                       if (is_Phi(pred) && get_nodes_block(pred) == block) {
+                                               in[i] = trans;
+                                               continue;
+                                       }
+
+                                       trans_val = identify(trans);
+                                       DB((dbg, LEVEL_3, "value %+F\n", trans_val));
+
+                                       /* constants are always available but not in avail set */
+                                       if (is_irn_constlike(trans_val)) {
+                                               in[i] = trans;
+                                               continue;
+                                       }
+
+                                       /* use the leader
+                                          In case of loads we need to make sure the hoisted
+                                          loads are found despite their unique value. */
+                                       avail = ir_valueset_lookup(pred_info->avail_out, trans_val);
+                                       DB((dbg, LEVEL_3, "avail %+F\n", avail));
+
+                                       assert(avail && "predecessor has to be available");
+                                       in[i] = avail;
+                               }
+
+                               if (is_Proj(expr))
+                                       target_block = get_nodes_block(in[0]);
+
+                               /* Copy node to represent the new value.
+                                  We use translated nodes as value representatives only.
+                                  They have anti leaders as predecessors, not leaders!
+                                  So we have to create a new node using leaders. */
+                               trans = new_ir_node(
+                                       get_irn_dbg_info(expr),
+                                       environ->graph,
+                                       target_block,
+                                       get_irn_op(expr),
+                                       get_irn_mode(expr),
+                                       get_irn_arity(expr),
+                                       in);
+                               free(in);
+                               /* We need the attribute copy here, because the Hash value of a
+                                  node might depend on it. */
+                               copy_node_attr(environ->graph, expr, trans);
+
+                               /* value is now available in target block through trans
+                                  insert (not replace) because it has not been available */
+                               new_value = identify_or_remember(trans);
+                               ir_valueset_insert(pred_info->avail_out, new_value, trans);
+                               DB((dbg, LEVEL_4, "avail%+F+= trans %+F(%+F)\n", pred_block, trans, new_value));
+
+                               new_value2 = identify(get_translated(pred_block, expr));
+                               ir_valueset_insert(pred_info->avail_out, new_value2, trans);
+                               DB((dbg, LEVEL_4, "avail%+F+= trans %+F(%+F)\n", pred_block, trans, new_value2));
+
+                               DB((dbg, LEVEL_3, "Use new %+F in %+F because %+F(%+F) not available\n", trans, pred_block, expr, value));
+
+                               phi_in[pos] = trans;
                        } else {
+                               /* value available */
                                phi_in[pos] = pred_info->avail;
-                               DB((dbg, LEVEL_5, "phi_in %+F\n", pred_info->avail));
                        }
+                       DB((dbg, LEVEL_3, "phi_in %+F\n", phi_in[pos]));
+               }
 
-               }  /* for */
+               /* We do not connect tuples as they will be connected automatically
+                  by the corresponding projections. */
+               if (get_irn_mode(expr) != mode_T) {
 
-               phi = new_r_Phi(block, arity, phi_in, mode);
+                       phi = new_r_Phi(block, arity, phi_in, mode);
+                       DB((dbg, LEVEL_3, "New %+F for redundant %+F created\n", phi, expr));
+
+                       /* This value is now available through the new phi.
+                          insert || replace in avail_out */
+                       ir_valueset_replace(info->avail_out, value, phi);
+                       ir_valueset_insert(info->new_set, value, phi);
+               }
                free(phi_in);
-               DB((dbg, LEVEL_1, "New %+F for redundant %+F created\n", phi, expr));
 
-               phi_value = remember(phi);
+               /* already optimized this value in this block */
+               ir_valueset_insert(info->antic_done, value, expr);
+               env->changes |= 1;
+       }
+}
+
+#if HOIST_HIGH
+static void update_new_set_walker(ir_node *block, void *ctx)
+{
+       pre_env *env = (pre_env*)ctx;
+
+       if (! is_Block(block))
+               return;
+       if (block == env->start_block)
+               return;
+
+       update_new_set(block, get_Block_idom(block));
+}
+
+/**
+ * Domtree block walker to insert nodes with dying operands
+ * into the highest possible block whilst still being anticipated.
+ */
+static void hoist_high(ir_node *block, void *ctx)
+{
+       (void)ctx;
 
-               /* this 'global' value is now available through the new phi */
-               ir_valueset_replace(curr_info->avail_out, value, phi);
-               /* add this phi and its 'blocklocal' value */
-               ir_valueset_insert(curr_info->avail_out, phi_value, phi);
+       block_info             *curr_info;
+       ir_valueset_iterator_t  iter;
+       ir_node                *expr;
+       ir_node                *value;
+       int                     arity      = get_irn_arity(block);
 
-               ir_valueset_insert(curr_info->new_set, value, phi);
-               ir_valueset_insert(curr_info->new_set, phi_value, phi);
+       if (! is_Block(block))
+               return;
 
-               /* remove from antic_in to prevent reprocessing */
-               ir_valueset_remove_iterator(curr_info->antic_in, &iter);
+       curr_info = get_block_info(block);
 
-               env->changes |= 1;
+       if (curr_info->new_set)
+               ir_valueset_del(curr_info->new_set);
+       curr_info->new_set = ir_valueset_new(16);
+
+       if (arity < 2)
+               return;
+
+       DB((dbg, LEVEL_2, "High hoisting %+F\n", block));
+
+       /* foreach entry optimized by insert node phase */
+       foreach_valueset(curr_info->antic_done, value, expr, iter) {
+               int pos;
+
+               /* TODO currently we cannot handle load and their projections */
+               if (is_memop(expr) || is_Proj(expr))
+                       continue;
+
+               DB((dbg, LEVEL_4, "leader %+F value %+F\n", expr, value));
+
+               /* visit hoisted expressions */
+               for (pos = 0; pos < arity; ++pos) {
+                       /* standard target is predecessor block */
+                       ir_node    *target     = get_Block_cfgpred_block(block, pos);
+                       block_info *pred_info  = get_block_info(target);
+                       ir_node    *avail;
+                       ir_node    *new_target;
+                       ir_node    *trans_expr;
+                       ir_node    *trans_value;
+                       ir_node    *dom;
+                       int         avail_arity;
+                       int         i;
+                       unsigned    nest_depth;
+                       block_info *dom_info;
+
+                       /* get phi translated value */
+                       trans_expr  = get_translated(target, expr);
+                       trans_value = identify(trans_expr);
+                       avail       = (ir_node*)ir_valueset_lookup(pred_info->avail_out, trans_value);
+
+                       /* get the used expr on this path */
+
+                       /* TODO when does this happen? */
+                       if (avail == NULL)
+                               continue;
+
+                       avail_arity = get_irn_arity(avail);
+                       value = identify(avail);
+
+                       /* anticipation border */
+                       new_target  = NULL;
+                       nest_depth  = get_loop_depth(get_irn_loop(target));
+
+                       /* Either push the hoisted nodes up their path,
+                          or try to put them directly into their common dominator. */
+#if COMMON_DOM
+                       /* By using block (instead of target) as initial block,
+                          we only allow hoisting into a common block of
+                          both predecessor blocks. */
+                       dom         = block;
+#else
+                       dom         = target;
+#endif
+
+                       while (dom && dom != get_Block_idom(block)) {
+
+                               dom = get_Block_idom(dom);
+                               dom_info = get_block_info(dom);
+                               DB((dbg, LEVEL_4, "testing dom %+F\n", dom));
+
+                               /* TODO Being in antic_in means hoistable above block,
+                                  but we need 'hoistable into block'.
+                                  This could be achieved by a flag for each valueset pair,
+                                  being set during antic computation. */
+
+                               /* check if available node ist still anticipated and clean */
+                               if (! ir_valueset_lookup(dom_info->antic_in, value)) {
+                                       DB((dbg, LEVEL_4, "%+F not antic in %+F\n", value, dom));
+                                       break;
+                               }
+
+                               nest_depth = get_loop_depth(get_irn_loop(dom));
+
+                               /* do not hoist into loops */
+                               if (get_loop_depth(get_irn_loop(dom)) > nest_depth) {
+                                       DB((dbg, LEVEL_4, "%+F deeper nested\n", dom));
+                                       /* not a suitable location */
+                                       continue;
+                               }
+
+                               /* check if operands die */
+
+                               /* check for uses on current path */
+                               for (i = 0; i < avail_arity; i++) {
+                                       ir_node   *pred       = get_irn_n(avail, i);
+                                       ir_node   *pred_value = identify(pred);
+
+                                       if (dom == NULL)
+                                               break;
+
+                                       DB((dbg, LEVEL_4, "testing pred %+F\n", pred));
+
+                                       if (! ir_valueset_lookup(dom_info->avail_out, pred_value)) {
+                                               DB((dbg, LEVEL_4, "pred %+F not available\n", pred));
+                                               dom = NULL;
+                                               break;
+                                       }
+
+                                       /* check every successor */
+                                       foreach_out_edge(pred, edge) {
+                                               ir_node *succ = get_edge_src_irn(edge);
+                                               DB((dbg, LEVEL_4, "testing succ %+F\n", succ));
+
+                                               /* check only successors on current path to end */
+                                               if (block_dominates(dom, get_nodes_block(succ))) {
+                                                       ir_node *succ_value = identify(succ);
+
+                                                       /* Do we have another user than avail?
+                                                          Then predecessor is not dead after removal of avail. */
+                                                       if (succ_value != value) {
+                                                               DB((dbg, LEVEL_4, "still used in %+F\n", succ));
+                                                               dom = NULL;
+                                                               break;
+                                                       }
+                                               }
+                                       }
+                               }
+                               if (dom)
+                                       new_target = dom;
+
+#if COMMON_DOM
+                               /* only try common dominator */
+                               break;
+#endif
+                       }
+
+                       /* put node into new target block */
+                       if (new_target) {
+                               block_info *target_info = get_block_info(new_target);
+                               int         nn_arity    = get_irn_arity(avail);
+                               ir_node   **in          = XMALLOCN(ir_node *, nn_arity);
+                               ir_node    *nn;
+                               int         i;
+
+                               DB((dbg, LEVEL_2, "Hoisting %+F into %+F\n", avail, new_target));
+                               DEBUG_ONLY(inc_stats(gvnpre_stats->hoist_high);)
+
+                               for (i = 0; i < nn_arity; ++i) {
+                                       ir_node *pred       = get_irn_n(avail, i);
+                                       ir_node *avail_pred = ir_valueset_lookup(target_info->avail_out, identify(pred));
+                                       assert(avail_pred);
+                                       in[i] = avail_pred;
+                               }
+                               nn = new_ir_node(
+                                       get_irn_dbg_info(avail),
+                                       environ->graph,
+                                       new_target,
+                                       get_irn_op(avail),
+                                       get_irn_mode(avail),
+                                       nn_arity,
+                                       in);
+                               free(in);
+
+                               identify_or_remember(nn);
+                               /* TODO Nodes are inserted into a dominating block and should
+                                  be available from this point on. Currently we do not push
+                                  the availability information through during the walk. */
+                               ir_valueset_insert(target_info->new_set, value, nn);
+                               ir_valueset_insert(target_info->avail_out, value, nn);
+                       }
+               }
+       }
+}
+#endif
 
-  }  /* node_set_foreach */
-}  /* insert_nodes */
+/* --------------------------------------------------------
+ * Elimination of fully redundant nodes
+ * --------------------------------------------------------
+ */
 
 /**
  * Walker which finds redundant nodes using avail_out sets
@@ -1078,27 +1768,29 @@ static void eliminate(ir_node *irn, void *ctx)
 
        if (! is_Block(irn)) {
                ir_node    *block = get_nodes_block(irn);
-               block_info *bl    = get_block_info(block);
+               block_info *info  = get_block_info(block);
                ir_node    *value = identify(irn);
 
                if (value != NULL) {
-                       ir_node *expr = (ir_node*)ir_valueset_lookup(bl->avail_out, value);
+                       ir_node *expr = (ir_node*)ir_valueset_lookup(info->avail_out, value);
+                       DB((dbg, LEVEL_3, "Elim %+F(%+F) avail %+F\n", irn, value, expr));
 
                        if (expr != NULL && expr != irn) {
-                               elim_pair *p = OALLOC(env->obst, elim_pair);
+                               elim_pair *p = OALLOC(&env->obst, elim_pair);
 
                                p->old_node = irn;
                                p->new_node = expr;
                                p->next     = env->pairs;
-                               if (get_irn_idx(expr) >= env->last_idx)
+                               if (get_irn_idx(expr) > env->last_idx)
                                        p->reason = FS_OPT_GVN_PARTLY;
                                else
                                        p->reason = FS_OPT_GVN_FULLY;
                                env->pairs = p;
+                               DEBUG_ONLY(inc_stats(gvnpre_stats->replaced);)
                        }
                }
        }
-}  /* eliminate */
+}
 
 /**
  * Do all the recorded changes and optimize
@@ -1106,15 +1798,17 @@ static void eliminate(ir_node *irn, void *ctx)
  *
  * @param pairs  list of elimination pairs
  */
-static void eliminate_nodes(elim_pair *pairs)
+static void eliminate_nodes(elim_pair *pairs, ir_nodeset_t *keeps)
 {
-       elim_pair *p;
+       elim_pair             *p;
+       ir_node               *end    = environ->end_node;
 
        for (p = pairs; p != NULL; p = p->next) {
                /* might be already changed */
                p->new_node = skip_Id(p->new_node);
 
-               DB((dbg, LEVEL_1, "Replacing %+F by %+F\n", p->old_node, p->new_node));
+               DB((dbg, LEVEL_2, "Replacing %+F by %+F\n", p->old_node, p->new_node));
+
                /* PRE tends to create Phi(self, self, ... , x, self, self, ...)
                 * which we can optimize here */
                if (is_Phi(p->new_node)) {
@@ -1138,120 +1832,200 @@ static void eliminate_nodes(elim_pair *pairs)
                        }
                }
                DBG_OPT_GVN_PRE(p->old_node, p->new_node, p->reason);
+
                exchange(p->old_node, p->new_node);
        }
-}  /* eliminate_nodes */
+
+       /* remove keep alive edges of unused mode_M phis */
+       foreach_ir_nodeset(keeps, m_phi, iter) {
+               remove_End_keepalive(end, m_phi);
+       }
+}
+
+
+/* --------------------------------------------------------
+ * GVN PRE pass
+ * --------------------------------------------------------
+ */
 
 /**
- * Gvn_Pre pass for graph irg.
+ * Gvn_Pre algorithm.
  *
  * @param irg   the graph
+ * @param env   the environment
  */
-void do_gvn_pre(ir_graph *irg)
+static void gvn_pre(ir_graph *irg, pre_env *env)
 {
-       struct obstack        obst;
-       pre_env               a_env;
-       optimization_state_t  state;
-       block_info           *bl_info;
        unsigned              antic_iter;
        unsigned              insert_iter;
 
-       assure_irg_properties(irg,
-               IR_GRAPH_PROPERTY_CONSISTENT_OUTS
-               | IR_GRAPH_PROPERTY_NO_CRITICAL_EDGES
-               | IR_GRAPH_PROPERTY_CONSISTENT_DOMINANCE
-               | IR_GRAPH_PROPERTY_CONSISTENT_POSTDOMINANCE);
-
-       /* register a debug mask */
-       FIRM_DBG_REGISTER(dbg, "firm.opt.gvn_pre");
-       /* edges will crash if enabled due to our allocate on other obstack trick */
-       edges_deactivate(irg);
-
-       save_optimization_state(&state);
-
-       /* CSE pass
-        * If there are two nodes with the same value in one block,
-        * the exp_gen valueset can only contain one of them. */
-       set_opt_global_cse(0);
-       new_identities(irg);
-       irg_walk_graph(irg, NULL, cse_walker, &a_env);
-
        DB((dbg, LEVEL_1, "Doing GVN-PRE for %+F\n", irg));
 
-       /* Switch on GCSE. We need it to correctly compute
-          the value of a node, which is independent from
-          its block. */
-       set_opt_global_cse(1);
-       new_identities(irg);
-
-       /* setup environment */
-       obstack_init(&obst);
-       a_env.obst        = &obst;
-       a_env.list        = NULL;
-       a_env.start_block = get_irg_start_block(irg);
-       a_env.end_block   = get_irg_end_block(irg);
-       a_env.pairs       = NULL;
-
        /* allocate block info */
-       irg_walk_blkwise_graph(irg, block_info_walker, NULL, &a_env);
+       irg_walk_blkwise_graph(irg, block_info_walker, NULL, env);
+
+       ir_nodehashmap_init(&value_map);
 
        /* generate exp_gen */
-       irg_walk_blkwise_dom_top_down(irg, NULL, topo_walker, &a_env);
-       dump_all_expgen_sets(a_env.list);
+       irg_walk_blkwise_graph(irg, NULL, topo_walker, env);
+       dump_all_expgen_sets(env->list);
 
        /* compute the avail_out sets for all blocks */
-       dom_tree_walk_irg(irg, compute_avail_top_down, NULL, &a_env);
+       dom_tree_walk_irg(irg, compute_avail_top_down, NULL, env);
 
        /* compute the anticipated value sets for all blocks */
-       antic_iter       = 0;
-       a_env.first_iter = 1;
+       antic_iter      = 0;
+       env->first_iter = 1;
 
+       env->iteration = 1;
        /* antic_in passes */
        do {
-               DB((dbg, LEVEL_1, "= Antic_in Iteration %d ========================\n",
-                   ++antic_iter));
-               a_env.changes = 0;
-               irg_walk_blkwise_graph(irg, compute_antic, NULL, &a_env);
-               a_env.first_iter = 0;
-               DB((dbg, LEVEL_1, "----------------------------------------------\n"));
-       /* TODO bad endless loop protection */
-       } while (a_env.changes != 0 && antic_iter < 40);
-
+               ++antic_iter;
+               DB((dbg, LEVEL_2, "= Antic_in Iteration %d ========================\n", antic_iter));
+               env->changes = 0;
+               irg_walk_blkwise_graph(irg, compute_antic, NULL, env);
+               env->first_iter = 0;
+               DB((dbg, LEVEL_2, "----------------------------------------------\n"));
+               env->iteration ++;
+       } while (env->changes != 0 && antic_iter < MAX_ANTIC_ITER);
+
+       DEBUG_ONLY(set_stats(gvnpre_stats->antic_iterations, antic_iter);)
+
+       ir_nodeset_init(env->keeps);
+       insert_iter       = 0;
+       env->first_iter   = 1;
        /* compute redundant expressions */
-       insert_iter = 0;
-       a_env.last_idx = get_irg_last_idx(irg);
        do {
                ++insert_iter;
-               DB((dbg, LEVEL_1, "= Insert Iteration %d ==========================\n", insert_iter));
-               a_env.changes = 0;
+               DB((dbg, LEVEL_2, "= Insert Iteration %d ==========================\n", insert_iter));
+               env->changes = 0;
                /* TODO topologically top down would be better; fewer iterations. */
-               dom_tree_walk_irg(irg, insert_nodes, NULL, &a_env);
-               DB((dbg, LEVEL_1, "----------------------------------------------\n"));
-       } while (a_env.changes != 0);
+               dom_tree_walk_irg(irg, insert_nodes_walker, NULL, env);
+               env->first_iter = 0;
+               DB((dbg, LEVEL_2, "----------------------------------------------\n"));
+       } while (env->changes != 0 && insert_iter < MAX_INSERT_ITER);
+       DEBUG_ONLY(set_stats(gvnpre_stats->insert_iterations, insert_iter);)
+
+#if HOIST_HIGH
+       /* An attempt to reduce lifetimes by hoisting already hoisted values
+          even higher if their operands die. */
+       dom_tree_walk_irg(irg, hoist_high, NULL, NULL);
+       /* update avail_out for elimination */
+       dom_tree_walk_irg(irg, update_new_set_walker, NULL, env);
+#endif
+
+       /* Deactivate edges to prevent intelligent removal of nodes,
+          or else we will get deleted nodes which we try to exchange. */
+       edges_deactivate(environ->graph);
+
+       /* eliminate nodes */
+       irg_walk_graph(irg, NULL, eliminate, env);
+       eliminate_nodes(env->pairs, env->keeps);
+
+       ir_nodeset_destroy(env->keeps);
+}
 
-       /* last step: eliminate nodes */
-       irg_walk_graph(irg, NULL, eliminate, &a_env);
-       eliminate_nodes(a_env.pairs);
+/**
+ * Gvn_Pre pass for graph irg.
+ *
+ * @param irg   the graph
+ */
+void do_gvn_pre(ir_graph *irg)
+{
+       pre_env               env;
+       ir_nodeset_t          keeps;
+       optimization_state_t  state;
+       block_info           *block_info;
+
+       /* bads and unreachables cause too much trouble with dominance,
+          loop info for endless loop detection,
+          no critical edges is PRE precondition
+        */
+       assure_irg_properties(irg,
+               IR_GRAPH_PROPERTY_NO_BADS
+               | IR_GRAPH_PROPERTY_NO_UNREACHABLE_CODE
+               | IR_GRAPH_PROPERTY_CONSISTENT_LOOPINFO
+               | IR_GRAPH_PROPERTY_CONSISTENT_OUTS
+               | IR_GRAPH_PROPERTY_NO_CRITICAL_EDGES
+               | IR_GRAPH_PROPERTY_CONSISTENT_DOMINANCE);
+
+       /* register a debug mask */
+       FIRM_DBG_REGISTER(dbg, "firm.opt.gvn_pre");
+
+       save_optimization_state(&state);
+       ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK | IR_RESOURCE_LOOP_LINK);
+
+       edges_activate(irg);
+
+       environ = &env;
+       DEBUG_ONLY(init_stats();)
+
+       /* setup environment */
+       env.graph        = irg;
+       env.list         = NULL;
+       env.start_block  = get_irg_start_block(irg);
+       env.end_block    = get_irg_end_block(irg);
+       env.end_node     = get_irg_end(irg);
+       env.pairs        = NULL;
+       env.keeps        = &keeps;
+       env.last_idx     = get_irg_last_idx(irg);
+       obstack_init(&env.obst);
+
+       /* Detect and set links of infinite loops to non-zero. */
+       analyse_loops(irg);
+
+#if OPTIMIZE_NODES
+       new_identities(irg);
+       env.value_table = irg->value_table;
+       irg->value_table = NULL;
+#endif
+
+       /* Switch on GCSE. We need it to correctly compute
+          the value of a node, which is independent from
+          its block. */
+       set_opt_global_cse(1);
+       /* new_identities() */
+       if (irg->value_table != NULL)
+               del_pset(irg->value_table);
+       /* initially assumed nodes in pset are 512 */
+       irg->value_table = new_pset(compare_gvn_identities, 512);
+#if OPTIMIZE_NODES
+       env.gvnpre_values = irg->value_table;
+#endif
+
+       /* do GVN-PRE pass */
+       gvn_pre(irg, &env);
+       DEBUG_ONLY(print_stats();)
 
        /* clean up: delete all sets */
-       for (bl_info = a_env.list; bl_info != NULL; bl_info = bl_info->next) {
-               ir_valueset_del(bl_info->exp_gen);
-               ir_valueset_del(bl_info->avail_out);
-               ir_valueset_del(bl_info->antic_in);
-               ir_nodehashmap_destroy(bl_info->trans);
-               free(bl_info->trans);
-               if (bl_info->new_set)
-                       ir_valueset_del(bl_info->new_set);
+       for (block_info = env.list; block_info != NULL; block_info = block_info->next) {
+               free_block_info(block_info);
        }
 
-       obstack_free(&obst, NULL);
+       DEBUG_ONLY(free_stats();)
+       ir_nodehashmap_destroy(&value_map);
+       obstack_free(&env.obst, NULL);
+       ir_free_resources(irg, IR_RESOURCE_IRN_LINK | IR_RESOURCE_LOOP_LINK);
 
-       /* pin the graph again.
+       /* Pin the graph again.
           This is needed due to the use of set_opt_global_cse(1) */
        set_irg_pinned(irg, op_pin_state_pinned);
        restore_optimization_state(&state);
-
        confirm_irg_properties(irg, IR_GRAPH_PROPERTIES_NONE);
+
+#if OPTIMIZE_NODES
+       irg->value_table = env.value_table;
+       del_pset(irg->value_table);
+       irg->value_table = env.gvnpre_values;
+#endif
+
+       /* TODO There seem to be optimizations that try to use the existing
+          value_table. */
+       new_identities(irg);
+
+       /* TODO assure nothing else breaks. */
+       set_opt_global_cse(0);
+       edges_activate(irg);
 }
 
 /* Creates an ir_graph pass for do_gvn_pre. */