- refactoring of backend generator scripts: You can create multiple constructors
[libfirm] / ir / opt / opt_ldst.c
index 2dec50d..11f5571 100644 (file)
@@ -42,6 +42,7 @@
 #include "raw_bitset.h"
 #include "debug.h"
 #include "error.h"
+#include "irpass.h"
 
 /* maximum number of output Proj's */
 #define MAX_PROJ (pn_Load_max > pn_Store_max ? pn_Load_max : pn_Store_max)
@@ -107,6 +108,7 @@ struct block_t {
        block_t  *forward_next;      /**< next block entry for forward iteration */
        block_t  *backward_next;     /**< next block entry for backward iteration */
        memop_t  *avail;             /**< used locally for the avail map */
+       memop_t  **trans_results;    /**< used to cached translated nodes due antic calculation. */
 };
 
 /**
@@ -131,13 +133,13 @@ typedef struct ldst_env_t {
 #endif
 } ldst_env;
 
+/* the one and only environment */
+static ldst_env env;
+
 #ifdef DEBUG_libfirm
 
 static firm_dbg_module_t *dbg;
 
-/* the one and only environment */
-static ldst_env env;
-
 /**
  * Dumps the block list.
  *
@@ -166,7 +168,7 @@ static void dump_block_list(ldst_env *env) {
                }
                DB((dbg, LEVEL_2, "\n}\n\n"));
        }
-}
+}  /* dump_block_list */
 
 /**
  * Dumps the current set.
@@ -175,13 +177,13 @@ static void dump_block_list(ldst_env *env) {
  * @param s    name of the set
  */
 static void dump_curr(block_t *bl, const char *s) {
-       unsigned pos = 0;
        unsigned end = env.rbs_size - 1;
+       unsigned pos;
        int      i;
 
        DB((dbg, LEVEL_2, "%s[%+F] = {", s, bl->block));
        i = 0;
-       for (pos = rbitset_next(env.curr_set, pos, 1); pos != end; pos = rbitset_next(env.curr_set, pos + 1, 1)) {
+       for (pos = rbitset_next(env.curr_set, 0, 1); pos < end; pos = rbitset_next(env.curr_set, pos + 1, 1)) {
                memop_t *op = env.curr_id_2_memop[pos];
 
                if (i == 0) {
@@ -192,11 +194,16 @@ static void dump_curr(block_t *bl, const char *s) {
                i = (i + 1) & 3;
        }
        DB((dbg, LEVEL_2, "\n}\n"));
-}
+}  /* dump_curr */
 
 #else
-#define dump_block_list()
-#define dump_curr(bl, s)
+static void dump_block_list(ldst_env *env) {
+       (void) env;
+}
+static void dump_curr(block_t *bl, const char *s) {
+       (void) bl;
+       (void) s;
+}
 #endif /* DEBUG_libfirm */
 
 /** Get the block entry for a block node */
@@ -204,13 +211,13 @@ static block_t *get_block_entry(const ir_node *block) {
        assert(is_Block(block));
 
        return get_irn_link(block);
-}
+}  /* get_block_entry */
 
 /** Get the memop entry for a memory operation node */
 static memop_t *get_irn_memop(const ir_node *irn) {
        assert(! is_Block(irn));
        return get_irn_link(irn);
-}
+}  /* get_irn_memop */
 
 /**
  * Walk over the memory edges from definition to users.
@@ -250,7 +257,7 @@ static void walk_memory(ir_node *irn, irg_walk_func *pre, irg_walk_func *post, v
        }
        if (post)
                post(irn, ctx);
-}
+}  /* walk_memory */
 
 /**
  * Walks over all memory nodes of a graph.
@@ -272,16 +279,70 @@ static void walk_memory_irg(ir_graph *irg, irg_walk_func pre, irg_walk_func post
        walk_memory(get_irg_initial_mem(irg), pre, post, ctx);
 
        ir_free_resources(irg, IR_RESOURCE_IRN_VISITED);
-}
+}  /* walk_memory_irg */
 
 /**
- * Walker: allocate an block entry for every block.
+ * Register an address and allocate a (sparse, 0..n) ID for it.
+ *
+ * @param adr  the IR-node representing the address
+ *
+ * @return the allocated id
  */
-static void prepare_blocks(ir_node *block, void *ctx) {
+static unsigned register_address(ir_node *adr) {
+       address_entry *entry;
+
+       /* skip Confirms and Casts */
+restart:
+       if (is_Confirm(adr)) {
+               adr = get_Confirm_value(adr);
+               goto restart;
+       }
+       if (is_Cast(adr)) {
+               adr = get_Cast_op(adr);
+               goto restart;
+       }
+
+       entry = ir_nodemap_get(&env.adr_map, adr);
+
+       if (entry == NULL) {
+               /* new address */
+               entry = OALLOC(&env.obst, address_entry);
+
+               entry->id = env.curr_adr_id++;
+               ir_nodemap_insert(&env.adr_map, adr, entry);
+
+               DB((dbg, LEVEL_3, "ADDRESS %+F has ID %u\n", adr, entry->id));
+#ifdef DEBUG_libfirm
+               ARR_APP1(ir_node *, env.id_2_address, adr);
+#endif
+       }
+       return entry->id;
+}  /* register_address */
+
+
+/**
+ * translate an address through a Phi node into a given predecessor
+ * block.
+ *
+ * @param address  the address
+ * @param block    the block
+ * @param pos      the position of the predecessor in block
+ */
+static ir_node *phi_translate(ir_node *address, const ir_node *block, int pos) {
+       if (is_Phi(address) && get_nodes_block(address) == block)
+               address = get_Phi_pred(address, pos);
+       return address;
+}  /* phi_translate */
+
+/**
+ * Walker: allocate an block entry for every block
+ * and register all potential addresses.
+ */
+static void prepare_blocks(ir_node *irn, void *ctx) {
        (void)ctx;
 
-       if (is_Block(block)) {
-               block_t *entry = obstack_alloc(&env.obst, sizeof(*entry));
+       if (is_Block(irn)) {
+               block_t *entry = OALLOC(&env.obst, block_t);
                int     n;
 
                entry->memop_forward    = NULL;
@@ -290,22 +351,34 @@ static void prepare_blocks(ir_node *block, void *ctx) {
                entry->id_2_memop_avail = NULL;
                entry->anticL_in        = NULL;
                entry->id_2_memop_antic = NULL;
-               entry->block            = block;
+               entry->block            = irn;
                entry->forward_next     = NULL;
                entry->backward_next    = NULL;
                entry->avail            = NULL;
-               set_irn_link(block, entry);
+               entry->trans_results    = NULL;
+               set_irn_link(irn, entry);
 
-               set_Block_phis(block, NULL);
+               set_Block_phis(irn, NULL);
 
                /* use block marks to track unreachable blocks */
-               set_Block_mark(block, 0);
+               set_Block_mark(irn, 0);
 
-               n = get_Block_n_cfgpreds(block);
+               n = get_Block_n_cfgpreds(irn);
                if (n > env.max_cfg_preds)
                        env.max_cfg_preds = n;
+       } else {
+               ir_mode *mode = get_irn_mode(irn);
+
+               if (mode_is_reference(mode)) {
+                       /*
+                        * Register ALL possible addresses: this is overkill yet but
+                        * simpler then doing it for all possible translated addresses
+                        * (which would be sufficient in the moment.
+                        */
+                       (void)register_address(irn);
+               }
        }
-}
+}  /* prepare_blocks */
 
 /**
  * Post-Walker, link in all Phi's
@@ -317,7 +390,7 @@ static void link_phis(ir_node *irn, void *ctx) {
                ir_node *block = get_nodes_block(irn);
                add_Block_phi(block, irn);
        }
-}
+}  /* link_phis */
 
 /**
  * Block walker: creates the inverse post-order list for the CFG.
@@ -337,7 +410,7 @@ static void inverse_post_order(ir_node *block, void *ctx) {
        /* remember the first visited (last in list) entry, needed for later */
        if (env.backward == NULL)
                env.backward = entry;
-}
+}  /* inverse_post_order */
 
 /**
  * Block walker: create backward links for the memops of a block.
@@ -367,15 +440,18 @@ static void collect_backward(ir_node *block, void *ctx) {
                last     = op;
        }
        entry->memop_backward = last;
-}
+}  /* collect_backward */
 
 /**
  * Allocate a memop.
  *
- * @param irn  the IR-node representing the memop
+ * @param irn  the IR-node representing the memop or NULL
+ *             if this is a translated (virtual) memop
+ *
+ * @return the allocated memop
  */
 static memop_t *alloc_memop(ir_node *irn) {
-       memop_t *m = obstack_alloc(&env.obst, sizeof(*m));
+       memop_t *m = OALLOC(&env.obst, memop_t);
 
        m->value.address = NULL;
        m->value.value   = NULL;
@@ -389,9 +465,10 @@ static memop_t *alloc_memop(ir_node *irn) {
 
        memset(m->projs, 0, sizeof(m->projs));
 
-       set_irn_link(irn, m);
+       if (irn != NULL)
+               set_irn_link(irn, m);
        return m;
-}
+}  /* alloc_memop */
 
 /**
  * Create a memop for a Phi-replacement.
@@ -400,7 +477,7 @@ static memop_t *alloc_memop(ir_node *irn) {
  * @param phi  the Phi-node representing the new value
  */
 static memop_t *clone_memop_phi(memop_t *op, ir_node *phi) {
-       memop_t *m = obstack_alloc(&env.obst, sizeof(*m));
+       memop_t *m = OALLOC(&env.obst, memop_t);
 
        m->value         = op->value;
        m->value.value   = phi;
@@ -412,43 +489,7 @@ static memop_t *clone_memop_phi(memop_t *op, ir_node *phi) {
 
        set_irn_link(phi, m);
        return m;
-}
-
-/**
- * Register an address and allocate an ID for it.
- *
- * @param adr  the IR-node representing the address
- */
-static unsigned register_address(ir_node *adr) {
-       address_entry *entry;
-
-       /* skip Confirms and Casts */
-restart:
-       if (is_Confirm(adr)) {
-               adr = get_Confirm_value(adr);
-               goto restart;
-       }
-       if (is_Cast(adr)) {
-               adr = get_Cast_op(adr);
-               goto restart;
-       }
-
-       entry = ir_nodemap_get(&env.adr_map, adr);
-
-       if (entry == NULL) {
-               /* new address */
-               entry = obstack_alloc(&env.obst, sizeof(*entry));
-
-               entry->id = env.curr_adr_id++;
-               ir_nodemap_insert(&env.adr_map, adr, entry);
-
-               DB((dbg, LEVEL_3, "ADDRESS %+F has ID %u\n", adr, entry->id));
-#ifdef DEBUG_libfirm
-               ARR_APP1(ir_node *, env.id_2_address, adr);
-#endif
-       }
-       return entry->id;
-}
+}  /* clone_memop_phi */
 
 /**
  * Return the memory properties of a call node.
@@ -473,7 +514,7 @@ static unsigned get_Call_memory_properties(ir_node *call) {
                }
        }
        return prop & (mtp_property_const|mtp_property_pure);
-}
+}  /* get_Call_memory_properties */
 
 /**
  * Returns an entity if the address ptr points to a constant one.
@@ -482,8 +523,7 @@ static unsigned get_Call_memory_properties(ir_node *call) {
  *
  * @return an entity or NULL
  */
-static ir_entity *find_constant_entity(ir_node *ptr)
-{
+static ir_entity *find_constant_entity(ir_node *ptr) {
        for (;;) {
                if (is_SymConst(ptr) && get_SymConst_kind(ptr) == symconst_addr_ent) {
                        return get_SymConst_entity(ptr);
@@ -1001,7 +1041,7 @@ static void update_Load_memop(memop_t *m) {
                        env.changed = 1;
                }
                if (m->projs[pn_Load_X_regular]) {
-                       exchange(m->projs[pn_Load_X_regular], new_r_Jmp(current_ir_graph, get_nodes_block(load)));
+                       exchange(m->projs[pn_Load_X_regular], new_r_Jmp(get_nodes_block(load)));
                        m->projs[pn_Load_X_regular] = NULL;
                        env.changed = 1;
                }
@@ -1048,7 +1088,7 @@ static void update_Load_memop(memop_t *m) {
                /* no user, KILL it */
                mark_replace_load(m, NULL);
        }
-}
+}  /* update_Load_memop */
 
 /**
  * Update a memop for a Store.
@@ -1095,7 +1135,7 @@ static void update_Store_memop(memop_t *m) {
        }
        m->value.value = get_Store_value(store);
        m->value.mode  = get_irn_mode(m->value.value);
-}
+}  /* update_Store_memop */
 
 /**
  * Update a memop for a Call.
@@ -1132,7 +1172,7 @@ static void update_Call_memop(memop_t *m) {
                        break;
                }
        }
-}
+}  /* update_Call_memop */
 
 /**
  * Update a memop for a Div/Mod/Quot/DivMod.
@@ -1159,7 +1199,7 @@ static void update_DivOp_memop(memop_t *m) {
                        break;
                }
        }
-}
+}  /* update_DivOp_memop */
 
 /**
  * Update a memop for a Phi.
@@ -1169,7 +1209,7 @@ static void update_DivOp_memop(memop_t *m) {
 static void update_Phi_memop(memop_t *m) {
        /* the Phi is it's own mem */
        m->mem = m->node;
-}
+}  /* update_Phi_memop */
 
 /**
  * Memory walker: collect all memory ops and build topological lists.
@@ -1243,12 +1283,16 @@ static void collect_memops(ir_node *irn, void *ctx) {
                        entry->memop_backward       = op;
                }
        }
-}
+}  /* collect_memops */
 
 /**
  * Find an address in the current set.
  *
  * @param value  the value to be searched for
+ *
+ * @return a memop for the value or NULL if the value does
+ *         not exists in the set or cannot be converted into
+ *         the requested mode
  */
 static memop_t *find_address(const value_t *value) {
        if (rbitset_is_set(env.curr_set, value->id)) {
@@ -1263,28 +1307,27 @@ static memop_t *find_address(const value_t *value) {
                        return res;
        }
        return NULL;
-}
+}  /* find_address */
 
 /**
  * Find an address in the avail_out set.
  *
  * @param bl     the block
- * @param value  the value to be searched for
  */
-static memop_t *find_address_avail(const block_t *bl, const value_t *value) {
-       if (rbitset_is_set(bl->avail_out, value->id)) {
-               memop_t *res = bl->id_2_memop_avail[value->id];
+static memop_t *find_address_avail(const block_t *bl, unsigned id, const ir_mode *mode) {
+       if (rbitset_is_set(bl->avail_out, id)) {
+               memop_t *res = bl->id_2_memop_avail[id];
 
-               if (res->value.mode == value->mode)
+               if (res->value.mode == mode)
                        return res;
                /* allow hidden casts */
                if (get_mode_arithmetic(res->value.mode) == irma_twos_complement &&
-                   get_mode_arithmetic(value->mode) == irma_twos_complement &&
-                   get_mode_size_bits(res->value.mode) == get_mode_size_bits(value->mode))
+                   get_mode_arithmetic(mode) == irma_twos_complement &&
+                   get_mode_size_bits(res->value.mode) == get_mode_size_bits(mode))
                        return res;
        }
        return NULL;
-}
+}  /* find_address_avail */
 
 /**
  * Kill all addresses from the current set.
@@ -1294,7 +1337,7 @@ static void kill_all(void) {
 
        /* set sentinel */
        rbitset_set(env.curr_set, env.rbs_size - 1);
-}
+}  /* kill_all */
 
 /**
  * Kill memops that are not alias free due to a Store value from the current set.
@@ -1302,19 +1345,20 @@ static void kill_all(void) {
  * @param value  the Store value
  */
 static void kill_memops(const value_t *value) {
-       unsigned pos = 0;
        unsigned end = env.rbs_size - 1;
+       unsigned pos;
 
-       for (pos = rbitset_next(env.curr_set, pos, 1); pos != end; pos = rbitset_next(env.curr_set, pos + 1, 1)) {
+       for (pos = rbitset_next(env.curr_set, 0, 1); pos < end; pos = rbitset_next(env.curr_set, pos + 1, 1)) {
                memop_t *op = env.curr_id_2_memop[pos];
 
                if (ir_no_alias != get_alias_relation(current_ir_graph, value->address, value->mode,
                                                          op->value.address, op->value.mode)) {
                        rbitset_clear(env.curr_set, pos);
                        env.curr_id_2_memop[pos] = NULL;
+                       DB((dbg, LEVEL_2, "KILLING %+F because of possible alias address %+F\n", op->node, value->address));
                }
        }
-}
+}  /* kill_memops */
 
 /**
  * Add the value of a memop to the current set.
@@ -1324,7 +1368,7 @@ static void kill_memops(const value_t *value) {
 static void add_memop(memop_t *op) {
        rbitset_set(env.curr_set, op->value.id);
        env.curr_id_2_memop[op->value.id] = op;
-}
+}  /* add_memop */
 
 /**
  * Add the value of a memop to the avail_out set.
@@ -1335,11 +1379,14 @@ static void add_memop(memop_t *op) {
 static void add_memop_avail(block_t *bl, memop_t *op) {
        rbitset_set(bl->avail_out, op->value.id);
        bl->id_2_memop_avail[op->value.id] = op;
-}
+}  /* add_memop_avail */
 
 /**
  * Check, if we can convert a value of one mode to another mode
  * without changing the representation of bits.
+ *
+ * @param from  the original mode
+ * @param to    the destination mode
  */
 static int can_convert_to(const ir_mode *from, const ir_mode *to) {
        if (get_mode_arithmetic(from) == irma_twos_complement &&
@@ -1347,10 +1394,16 @@ static int can_convert_to(const ir_mode *from, const ir_mode *to) {
            get_mode_size_bits(from) == get_mode_size_bits(to))
                return 1;
        return 0;
-}
+}  /* can_convert_to */
 
 /**
- * Add a Conv if needed.
+ * Add a Conv to the requested mode if needed.
+ *
+ * @param irn   the IR-node to convert
+ * @param mode  the destination mode
+ *
+ * @return the possible converted node or NULL
+ *         if the conversion is not possible
  */
 static ir_node *conv_to(ir_node *irn, ir_mode *mode) {
        ir_mode *other = get_irn_mode(irn);
@@ -1358,13 +1411,32 @@ static ir_node *conv_to(ir_node *irn, ir_mode *mode) {
                /* different modes: check if conversion is possible without changing the bits */
                if (can_convert_to(other, mode)) {
                        ir_node *block = get_nodes_block(irn);
-                       return new_r_Conv(current_ir_graph, block, irn, mode);
+                       return new_r_Conv(block, irn, mode);
                }
                /* otherwise not possible ... yet */
                return NULL;
        }
        return irn;
-}
+}  /* conv_to */
+
+/**
+ * Update the address of an value if this address was a load result
+ * and the load is killed now.
+ *
+ * @param value  the value whose address is updated
+ */
+static void update_address(value_t *value) {
+       if (is_Proj(value->address)) {
+               ir_node *load = get_Proj_pred(value->address);
+
+               if (is_Load(load)) {
+                       const memop_t *op = get_irn_memop(load);
+
+                       if (op->flags & FLAG_KILLED_NODE)
+                               value->address = op->replace;
+               }
+       }
+}  /* update_address */
 
 /**
  * Do forward dataflow analysis on the given block and calculate the
@@ -1387,7 +1459,10 @@ static void calc_gen_kill_avail(block_t *bl) {
                case iro_Load:
                        if (! (op->flags & (FLAG_KILLED_NODE|FLAG_IGNORE))) {
                                /* do we have this already? */
-                               memop_t *other = find_address(&op->value);
+                               memop_t *other;
+
+                               update_address(&op->value);
+                               other = find_address(&op->value);
                                if (other != NULL && other != op) {
                                        def = conv_to(other->value.value, op->value.mode);
                                        if (def != NULL) {
@@ -1412,7 +1487,10 @@ static void calc_gen_kill_avail(block_t *bl) {
                case iro_Store:
                        if (! (op->flags & FLAG_KILLED_NODE)) {
                                /* do we have this store already */
-                               memop_t *other = find_address(&op->value);
+                               memop_t *other;
+
+                               update_address(&op->value);
+                               other = find_address(&op->value);
                                if (other != NULL) {
                                        if (is_Store(other->node)) {
                                                if (op != other && !(other->flags & FLAG_IGNORE) &&
@@ -1445,7 +1523,7 @@ static void calc_gen_kill_avail(block_t *bl) {
                                kill_all();
                }
        }
-}
+}  /* calc_gen_kill_avail */
 
 #define BYTE_SIZE(x)  (((x) + 7) >> 3)
 
@@ -1462,7 +1540,7 @@ static void forward_avail(block_t *bl) {
 
        calc_gen_kill_avail(bl);
        dump_curr(bl, "Avail_out");
-}
+}  /* forward_avail */
 
 /**
  * Do backward dataflow analysis on a given block to calculate the antic set
@@ -1474,16 +1552,69 @@ static void forward_avail(block_t *bl) {
  */
 static int backward_antic(block_t *bl) {
        memop_t *op;
-       int     n = get_Block_n_cfg_outs(bl->block);
+       ir_node *block = bl->block;
+       int     n = get_Block_n_cfg_outs(block);
+
+       if (n == 1) {
+               ir_node  *succ    = get_Block_cfg_out(block, 0);
+               block_t  *succ_bl = get_block_entry(succ);
+               int      pred_pos = get_Block_cfgpred_pos(succ, block);
+               unsigned end      = env.rbs_size - 1;
+               unsigned pos;
 
-       if (n >= 1) {
-               ir_node *succ    = get_Block_cfg_out(bl->block, 0);
+               kill_all();
+
+               if (bl->trans_results == NULL) {
+                       /* allocate the translate cache */
+                       bl->trans_results = OALLOCNZ(&env.obst, memop_t*, env.curr_adr_id);
+               }
+
+               /* check for partly redundant values */
+               for (pos = rbitset_next(succ_bl->anticL_in, 0, 1);
+                    pos < end;
+                    pos = rbitset_next(succ_bl->anticL_in, pos + 1, 1)) {
+                       /*
+                        * do Phi-translation here: Note that at this point the nodes are
+                        * not changed, so we can safely cache the results.
+                        * However: Loads of Load results ARE bad, because we have no way
+                         to translate them yet ...
+                        */
+                       memop_t *op = bl->trans_results[pos];
+                       if (op == NULL) {
+                               /* not yet translated */
+                               ir_node *adr, *trans_adr;
+
+                               op  = succ_bl->id_2_memop_antic[pos];
+                               adr = op->value.address;
+
+                               trans_adr = phi_translate(adr, succ, pred_pos);
+                               if (trans_adr != adr) {
+                                       /* create a new entry for the translated one */
+                                       memop_t *new_op;
+
+                                       new_op = alloc_memop(NULL);
+                                       new_op->value.address = trans_adr;
+                                       new_op->value.id      = register_address(trans_adr);
+                                       new_op->value.mode    = op->value.mode;
+                                       new_op->node          = op->node; /* we need the node to decide if Load/Store */
+                                       new_op->flags         = op->flags;
+
+                                       bl->trans_results[pos] = new_op;
+                                       op = new_op;
+                               }
+                       }
+                       env.curr_id_2_memop[op->value.id] = op;
+                       rbitset_set(env.curr_set, op->value.id);
+               }
+       } else if (n > 1) {
+               ir_node *succ    = get_Block_cfg_out(block, 0);
                block_t *succ_bl = get_block_entry(succ);
                int i;
 
-               rbitset_cpy(env.curr_set, succ_bl->anticL_in, env.rbs_size);
+               rbitset_copy(env.curr_set, succ_bl->anticL_in, env.rbs_size);
                memcpy(env.curr_id_2_memop, succ_bl->id_2_memop_antic, env.rbs_size * sizeof(env.curr_id_2_memop[0]));
 
+               /* Hmm: probably we want kill merges of Loads ans Stores here */
                for (i = n - 1; i > 0; --i) {
                        ir_node *succ    = get_Block_cfg_out(bl->block, i);
                        block_t *succ_bl = get_block_entry(succ);
@@ -1495,18 +1626,6 @@ static int backward_antic(block_t *bl) {
                kill_all();
        }
 
-#if 0
-       /* cleanup: kill those Loads which address is not available */
-       for (pos = rbitset_next(env.curr_set, pos, 1); pos != end; pos = rbitset_next(env.curr_set, pos + 1, 1)) {
-               memop_t *op     = succ_bl->id_2_memop[pos];
-               ir_node *ptr    = get_Load_ptr(op->node);
-               ir_node *ptr_bl = get_nodes_block(ptr);
-
-               if (!block_dominates(ptr_bl, bl->block))
-                       rbitset_clear(env.curr_set, pos);
-       }
-#endif
-
        dump_curr(bl, "AnticL_out");
 
        for (op = bl->memop_backward; op != NULL; op = op->prev) {
@@ -1538,13 +1657,13 @@ static int backward_antic(block_t *bl) {
        memcpy(bl->id_2_memop_antic, env.curr_id_2_memop, env.rbs_size * sizeof(env.curr_id_2_memop[0]));
        if (! rbitset_equal(bl->anticL_in, env.curr_set, env.rbs_size)) {
                /* changed */
-               rbitset_cpy(bl->anticL_in, env.curr_set, env.rbs_size);
+               rbitset_copy(bl->anticL_in, env.curr_set, env.rbs_size);
                dump_curr(bl, "AnticL_in*");
                return 1;
        }
        dump_curr(bl, "AnticL_in");
        return 0;
-}
+}  /* backward_antic */
 
 /**
  * Replace a Load memop by a already known value.
@@ -1579,7 +1698,7 @@ static void replace_load(memop_t *op) {
                        /* a hidden cast */
                        dbg_info *db    = get_irn_dbg_info(load);
                        ir_node  *block = get_nodes_block(proj);
-                       def = new_rd_Conv(db, current_ir_graph, block, def, mode);
+                       def = new_rd_Conv(db, block, def, mode);
                }
                exchange(proj, def);
        }
@@ -1589,9 +1708,9 @@ static void replace_load(memop_t *op) {
        }
        proj = op->projs[pn_Load_X_regular];
        if (proj != NULL) {
-               exchange(proj, new_r_Jmp(current_ir_graph, get_nodes_block(load)));
+               exchange(proj, new_r_Jmp(get_nodes_block(load)));
        }
-}
+}  /* replace_load */
 
 /**
  * Remove a Store memop.
@@ -1614,9 +1733,9 @@ static void remove_store(memop_t *op) {
        }
        proj = op->projs[pn_Store_X_regular];
        if (proj != NULL) {
-               exchange(proj, new_r_Jmp(current_ir_graph, get_nodes_block(store)));
+               exchange(proj, new_r_Jmp(get_nodes_block(store)));
        }
-}
+}  /* remove_store */
 
 
 /**
@@ -1639,7 +1758,7 @@ static void do_replacements(block_t *bl) {
                        }
                }
        }
-}
+}  /* do_replacements */
 
 /**
  * Calculate the Avail_out sets for all basic blocks.
@@ -1660,7 +1779,7 @@ static void calcAvail(void) {
        /* restore the current sets */
        env.curr_id_2_memop = tmp_memop;
        env.curr_set        = tmp_set;
-}
+}  /* calcAvail */
 
 /**
  * Calculate the Antic_in sets for all basic blocks.
@@ -1685,22 +1804,7 @@ static void calcAntic(void) {
                ++i;
        } while (need_iter);
        DB((dbg, LEVEL_2, "Get anticipated Load set after %d iterations\n", i));
-}
-
-/**
- * Return the node representing the last memory in a block.
- *
- * @param bl  the block
- */
-static ir_node *find_first_memory(block_t *bl) {
-       for (;;) {
-               if (bl->memop_forward != NULL) {
-                       return bl->memop_forward->node;
-               }
-               /* if there is NO memory in this block, go to the post dominator */
-               bl = get_block_entry(get_Block_ipostdom(bl->block));
-       }
-}
+}  /* calcAntic */
 
 /**
  * Return the node representing the last memory in a block.
@@ -1715,7 +1819,7 @@ static ir_node *find_last_memory(block_t *bl) {
                /* if there is NO memory in this block, go to the dominator */
                bl = get_block_entry(get_Block_idom(bl->block));
        }
-}
+}  /* find_last_memory */
 
 /**
  * Reroute all memory users of old memory
@@ -1736,7 +1840,7 @@ static void reroute_all_mem_users(ir_node *omem, ir_node *nmem) {
 
        /* all edges previously point to omem now point to nmem */
        nmem->out = omem->out;
-}
+}  /* reroute_all_mem_users */
 
 /**
  * Reroute memory users of old memory that are dominated by a given block
@@ -1775,7 +1879,7 @@ static void reroute_mem_through(ir_node *omem, ir_node *nmem, ir_node *pass_bl)
        /* first entry is used for the length */
        edges[0].pos = j;
        nmem->out = edges;
-}
+}  /* reroute_mem_through */
 
 /**
  * insert Loads, making partly redundant Loads fully redundant
@@ -1783,10 +1887,8 @@ static void reroute_mem_through(ir_node *omem, ir_node *nmem, ir_node *pass_bl)
 static int insert_Load(block_t *bl) {
        ir_node  *block = bl->block;
        int      i, n = get_Block_n_cfgpreds(block);
-       unsigned pos = 0;
        unsigned end = env.rbs_size - 1;
-       ir_node  *pred;
-       block_t  *pred_bl;
+       unsigned pos;
 
        DB((dbg, LEVEL_3, "processing %+F\n", block));
 
@@ -1795,14 +1897,9 @@ static int insert_Load(block_t *bl) {
                return 0;
        }
 
-       pred    = get_Block_cfgpred_block(bl->block, 0);
-       pred_bl = get_block_entry(pred);
-
-       rbitset_cpy(env.curr_set, pred_bl->avail_out, env.rbs_size);
-
        if (n > 1) {
-               int     i, pos;
                ir_node **ins;
+               int     pos;
 
                NEW_ARR_A(ir_node *, ins, n);
 
@@ -1839,7 +1936,7 @@ static int insert_Load(block_t *bl) {
                                block_t *pred_bl = get_block_entry(pred);
                                int     need_phi = 0;
                                memop_t *first   = NULL;
-                               ir_mode *mode;
+                               ir_mode *mode    = NULL;
 
                                for (i = 0; i < n; ++i) {
                                        memop_t *mop;
@@ -1870,7 +1967,7 @@ static int insert_Load(block_t *bl) {
                                }
                                if (need_phi) {
                                        /* build a Phi  */
-                                       ir_node *phi = new_r_Phi(current_ir_graph, bl->block, n, ins, mode);
+                                       ir_node *phi = new_r_Phi(bl->block, n, ins, mode);
                                        memop_t *phiop = alloc_memop(phi);
 
                                        phiop->value = first->value;
@@ -1886,26 +1983,30 @@ static int insert_Load(block_t *bl) {
                }
        } else {
                /* only one predecessor, simply copy the map */
-               pred    = get_Block_cfgpred_block(bl->block, 0);
-               pred_bl = get_block_entry(pred);
+               ir_node *pred    = get_Block_cfgpred_block(bl->block, 0);
+               block_t *pred_bl = get_block_entry(pred);
 
-               rbitset_cpy(env.curr_set, pred_bl->avail_out, env.rbs_size);
+               rbitset_copy(env.curr_set, pred_bl->avail_out, env.rbs_size);
 
                memcpy(env.curr_id_2_memop, pred_bl->id_2_memop_avail, env.rbs_size * sizeof(bl->id_2_memop_avail[0]));
        }
 
        if (n > 1) {
                /* check for partly redundant values */
-               for (pos = rbitset_next(bl->anticL_in, pos, 1); pos != end; pos = rbitset_next(bl->anticL_in, pos + 1, 1)) {
+               for (pos = rbitset_next(bl->anticL_in, 0, 1);
+                    pos < end;
+                    pos = rbitset_next(bl->anticL_in, pos + 1, 1)) {
                        memop_t *op = bl->id_2_memop_antic[pos];
                        int     have_some, all_same;
                        ir_node *first;
 
+                       if (rbitset_is_set(env.curr_set, pos)) {
+                               /* already avail */
+                               continue;
+                       }
+
                        assert(is_Load(op->node));
 
-                       /* ignore killed */
-                       if (op->flags & FLAG_KILLED_NODE)
-                               continue;
                        DB((dbg, LEVEL_3, "anticipated %+F\n", op->node));
 
                        have_some  = 0;
@@ -1914,14 +2015,19 @@ static int insert_Load(block_t *bl) {
                        for (i = n - 1; i >= 0; --i) {
                                ir_node *pred    = get_Block_cfgpred_block(block, i);
                                block_t *pred_bl = get_block_entry(pred);
-                               memop_t *e       = find_address_avail(pred_bl, &op->value);
                                ir_mode *mode    = op->value.mode;
+                               memop_t *e;
+                               ir_node *adr;
 
+                               adr = phi_translate(op->value.address, block, i);
+                               DB((dbg, LEVEL_3, ".. using address %+F in pred %d\n", adr, i));
+                               e   = find_address_avail(pred_bl, register_address(adr), mode);
                                if (e == NULL) {
-                                       ir_node *block = get_nodes_block(op->value.address);
-                                       if (! block_dominates(block, pred)) {
+                                       ir_node *ef_block = get_nodes_block(adr);
+                                       if (! block_dominates(ef_block, pred)) {
                                                /* cannot place a copy here */
                                                have_some = 0;
+                                               DB((dbg, LEVEL_3, "%+F cannot be moved into predecessor %+F\n", op->node, pred));
                                                break;
                                        }
                                        DB((dbg, LEVEL_3, "%+F is not available in predecessor %+F\n", op->node, pred));
@@ -1957,17 +2063,18 @@ static int insert_Load(block_t *bl) {
                                                /* create a new Load here and make to make it fully redundant */
                                                dbg_info *db       = get_irn_dbg_info(op->node);
                                                ir_node  *last_mem = find_last_memory(pred_bl);
-                                               ir_node  *load, *def;
+                                               ir_node  *load, *def, *adr;
                                                memop_t  *new_op;
 
                                                assert(last_mem != NULL);
-                                               load = new_rd_Load(db, current_ir_graph, pred, last_mem, op->value.address, mode, cons_none);
-                                               def  = new_r_Proj(current_ir_graph, pred, load, mode, pn_Load_res);
+                                               adr  = phi_translate(op->value.address, block, i);
+                                               load = new_rd_Load(db, pred, last_mem, adr, mode, cons_none);
+                                               def  = new_r_Proj(pred, load, mode, pn_Load_res);
                                                DB((dbg, LEVEL_1, "Created new %+F in %+F for party redundant %+F\n", load, pred, op->node));
 
                                                new_op                = alloc_memop(load);
-                                               new_op->mem           = new_r_Proj(current_ir_graph, pred, load, mode_M, pn_Load_M);
-                                               new_op->value.address = op->value.address;
+                                               new_op->mem           = new_r_Proj(pred, load, mode_M, pn_Load_M);
+                                               new_op->value.address = adr;
                                                new_op->value.id      = op->value.id;
                                                new_op->value.mode    = mode;
                                                new_op->value.value   = def;
@@ -1975,7 +2082,10 @@ static int insert_Load(block_t *bl) {
                                                new_op->projs[pn_Load_M]   = new_op->mem;
                                                new_op->projs[pn_Load_res] = def;
 
-                                               new_op->prev            = pred_bl->memop_backward;
+                                               new_op->prev = pred_bl->memop_backward;
+                                               if (pred_bl->memop_backward != NULL)
+                                                       pred_bl->memop_backward->next = new_op;
+
                                                pred_bl->memop_backward = new_op;
 
                                                if (pred_bl->memop_forward == NULL)
@@ -1997,7 +2107,7 @@ static int insert_Load(block_t *bl) {
                                        }
                                        in[i] = conv_to(pred_bl->avail->value.value, mode);
                                }
-                               phi = new_r_Phi(current_ir_graph, block, n, in, mode);
+                               phi = new_r_Phi(block, n, in, mode);
                                DB((dbg, LEVEL_1, "Created new %+F in %+F for now redundant %+F\n", phi, block, op->node));
 
                                phi_op = clone_memop_phi(op, phi);
@@ -2009,15 +2119,18 @@ static int insert_Load(block_t *bl) {
        /* recalculate avail by gen and kill */
        calc_gen_kill_avail(bl);
 
+       /* always update the map after gen/kill, as values might have been changed due to RAR/WAR/WAW */
+       memcpy(bl->id_2_memop_avail, env.curr_id_2_memop, env.rbs_size * sizeof(env.curr_id_2_memop[0]));
+
        if (!rbitset_equal(bl->avail_out, env.curr_set, env.rbs_size)) {
                /* the avail set has changed */
-               rbitset_cpy(bl->avail_out, env.curr_set, env.rbs_size);
-               memcpy(bl->id_2_memop_avail, env.curr_id_2_memop, env.rbs_size * sizeof(env.curr_id_2_memop[0]));
+               rbitset_copy(bl->avail_out, env.curr_set, env.rbs_size);
                dump_curr(bl, "Avail_out*");
                return 1;
        }
+       dump_curr(bl, "Avail_out");
        return 0;
-}
+}  /* insert_Load */
 
 /**
  * Insert Loads upwards.
@@ -2043,10 +2156,12 @@ static void insert_Loads_upwards(void) {
        } while (need_iter);
 
        DB((dbg, LEVEL_2, "Finished Load inserting after %d iterations\n", i));
-}
+}  /* insert_Loads_upwards */
 
 /**
- * kill unreachable control flow.
+ * Kill unreachable control flow.
+ *
+ * @param irg  the graph to operate on
  */
 static void kill_unreachable_blocks(ir_graph *irg) {
        block_t *bl;
@@ -2137,7 +2252,7 @@ static void kill_unreachable_blocks(ir_graph *irg) {
 
                /* this transformation do NOT invalidate the dominance */
        }
-}
+}  /* kill_unreachable_blocks */
 
 int opt_ldst(ir_graph *irg) {
        block_t  *bl;
@@ -2153,7 +2268,7 @@ int opt_ldst(ir_graph *irg) {
        /* we need landing pads */
        remove_critical_cf_edges(irg);
 
-       dump_ir_block_graph(irg, "-XXX");
+//     dump_ir_block_graph(irg, "-XXX");
 
        if (get_opt_alias_analysis()) {
                assure_irg_entity_usage_computed(irg);
@@ -2232,7 +2347,7 @@ int opt_ldst(ir_graph *irg) {
 
        /* create address sets: for now, only the existing addresses are allowed plus one
           needed for the sentinel */
-       env.rbs_size = env.n_mem_ops + 1;
+       env.rbs_size = env.curr_adr_id + 1;
 
        /* create the current set */
        env.curr_set = rbitset_obstack_alloc(&env.obst, env.rbs_size);
@@ -2256,6 +2371,7 @@ int opt_ldst(ir_graph *irg) {
        }
 
 //     dump_block_list(&env);
+       (void) dump_block_list;
 
        calcAvail();
        calcAntic();
@@ -2279,7 +2395,7 @@ end:
        ir_nodemap_destroy(&env.adr_map);
        obstack_free(&env.obst, NULL);
 
-       dump_ir_block_graph(irg, "-YYY");
+//     dump_ir_block_graph(irg, "-YYY");
 
 #ifdef DEBUG_libfirm
        DEL_ARR_F(env.id_2_address);
@@ -2287,4 +2403,9 @@ end:
 
        current_ir_graph = rem;
        return env.changed != 0;
-}
+}  /* opt_ldst */
+
+ir_graph_pass_t *opt_ldst_pass(const char *name)
+{
+       return def_graph_pass_ret(name ? name : "ldst_df", opt_ldst);
+}  /* opt_ldst_pass */