- fixed warning
[libfirm] / ir / opt / opt_ldst.c
index 2dec50d..cf901f9 100644 (file)
@@ -107,6 +107,7 @@ struct block_t {
        block_t  *forward_next;      /**< next block entry for forward iteration */
        block_t  *backward_next;     /**< next block entry for backward iteration */
        memop_t  *avail;             /**< used locally for the avail map */
+       memop_t  **trans_results;    /**< used to cached translated nodes due antic calculation. */
 };
 
 /**
@@ -131,13 +132,13 @@ typedef struct ldst_env_t {
 #endif
 } ldst_env;
 
+/* the one and only environment */
+static ldst_env env;
+
 #ifdef DEBUG_libfirm
 
 static firm_dbg_module_t *dbg;
 
-/* the one and only environment */
-static ldst_env env;
-
 /**
  * Dumps the block list.
  *
@@ -166,7 +167,7 @@ static void dump_block_list(ldst_env *env) {
                }
                DB((dbg, LEVEL_2, "\n}\n\n"));
        }
-}
+}  /* dump_block_list */
 
 /**
  * Dumps the current set.
@@ -175,13 +176,13 @@ static void dump_block_list(ldst_env *env) {
  * @param s    name of the set
  */
 static void dump_curr(block_t *bl, const char *s) {
-       unsigned pos = 0;
        unsigned end = env.rbs_size - 1;
+       unsigned pos;
        int      i;
 
        DB((dbg, LEVEL_2, "%s[%+F] = {", s, bl->block));
        i = 0;
-       for (pos = rbitset_next(env.curr_set, pos, 1); pos != end; pos = rbitset_next(env.curr_set, pos + 1, 1)) {
+       for (pos = rbitset_next(env.curr_set, 0, 1); pos < end; pos = rbitset_next(env.curr_set, pos + 1, 1)) {
                memop_t *op = env.curr_id_2_memop[pos];
 
                if (i == 0) {
@@ -192,7 +193,7 @@ static void dump_curr(block_t *bl, const char *s) {
                i = (i + 1) & 3;
        }
        DB((dbg, LEVEL_2, "\n}\n"));
-}
+}  /* dump_curr */
 
 #else
 #define dump_block_list()
@@ -204,13 +205,13 @@ static block_t *get_block_entry(const ir_node *block) {
        assert(is_Block(block));
 
        return get_irn_link(block);
-}
+}  /* get_block_entry */
 
 /** Get the memop entry for a memory operation node */
 static memop_t *get_irn_memop(const ir_node *irn) {
        assert(! is_Block(irn));
        return get_irn_link(irn);
-}
+}  /* get_irn_memop */
 
 /**
  * Walk over the memory edges from definition to users.
@@ -250,7 +251,7 @@ static void walk_memory(ir_node *irn, irg_walk_func *pre, irg_walk_func *post, v
        }
        if (post)
                post(irn, ctx);
-}
+}  /* walk_memory */
 
 /**
  * Walks over all memory nodes of a graph.
@@ -272,15 +273,69 @@ static void walk_memory_irg(ir_graph *irg, irg_walk_func pre, irg_walk_func post
        walk_memory(get_irg_initial_mem(irg), pre, post, ctx);
 
        ir_free_resources(irg, IR_RESOURCE_IRN_VISITED);
-}
+}  /* walk_memory_irg */
 
 /**
- * Walker: allocate an block entry for every block.
+ * Register an address and allocate a (sparse, 0..n) ID for it.
+ *
+ * @param adr  the IR-node representing the address
+ *
+ * @return the allocated id
  */
-static void prepare_blocks(ir_node *block, void *ctx) {
+static unsigned register_address(ir_node *adr) {
+       address_entry *entry;
+
+       /* skip Confirms and Casts */
+restart:
+       if (is_Confirm(adr)) {
+               adr = get_Confirm_value(adr);
+               goto restart;
+       }
+       if (is_Cast(adr)) {
+               adr = get_Cast_op(adr);
+               goto restart;
+       }
+
+       entry = ir_nodemap_get(&env.adr_map, adr);
+
+       if (entry == NULL) {
+               /* new address */
+               entry = obstack_alloc(&env.obst, sizeof(*entry));
+
+               entry->id = env.curr_adr_id++;
+               ir_nodemap_insert(&env.adr_map, adr, entry);
+
+               DB((dbg, LEVEL_3, "ADDRESS %+F has ID %u\n", adr, entry->id));
+#ifdef DEBUG_libfirm
+               ARR_APP1(ir_node *, env.id_2_address, adr);
+#endif
+       }
+       return entry->id;
+}  /* register_address */
+
+
+/**
+ * translate an address through a Phi node into a given predecessor
+ * block.
+ *
+ * @param address  the address
+ * @param block    the block
+ * @param pos      the position of the predecessor in block
+ */
+static ir_node *phi_translate(ir_node *address, const ir_node *block, int pos) {
+       if (is_Phi(address) && get_nodes_block(address) == block)
+               address = get_Phi_pred(address, pos);
+       return address;
+}  /* phi_translate */
+
+/**
+ * Walker: allocate an block entry for every block
+ * and register all potential addresses.
+ */
+static void prepare_blocks(ir_node *irn, void *ctx) {
        (void)ctx;
 
-       if (is_Block(block)) {
+       if (is_Block(irn)) {
                block_t *entry = obstack_alloc(&env.obst, sizeof(*entry));
                int     n;
 
@@ -290,22 +345,34 @@ static void prepare_blocks(ir_node *block, void *ctx) {
                entry->id_2_memop_avail = NULL;
                entry->anticL_in        = NULL;
                entry->id_2_memop_antic = NULL;
-               entry->block            = block;
+               entry->block            = irn;
                entry->forward_next     = NULL;
                entry->backward_next    = NULL;
                entry->avail            = NULL;
-               set_irn_link(block, entry);
+               entry->trans_results    = NULL;
+               set_irn_link(irn, entry);
 
-               set_Block_phis(block, NULL);
+               set_Block_phis(irn, NULL);
 
                /* use block marks to track unreachable blocks */
-               set_Block_mark(block, 0);
+               set_Block_mark(irn, 0);
 
-               n = get_Block_n_cfgpreds(block);
+               n = get_Block_n_cfgpreds(irn);
                if (n > env.max_cfg_preds)
                        env.max_cfg_preds = n;
+       } else {
+               ir_mode *mode = get_irn_mode(irn);
+
+               if (mode_is_reference(mode)) {
+                       /*
+                        * Register ALL possible addresses: this is overkill yet but
+                        * simpler then doing it for all possible translated addresses
+                        * (which would be sufficient in the moment.
+                        */
+                       (void)register_address(irn);
+               }
        }
-}
+}  /* prepare_blocks */
 
 /**
  * Post-Walker, link in all Phi's
@@ -317,7 +384,7 @@ static void link_phis(ir_node *irn, void *ctx) {
                ir_node *block = get_nodes_block(irn);
                add_Block_phi(block, irn);
        }
-}
+}  /* link_phis */
 
 /**
  * Block walker: creates the inverse post-order list for the CFG.
@@ -337,7 +404,7 @@ static void inverse_post_order(ir_node *block, void *ctx) {
        /* remember the first visited (last in list) entry, needed for later */
        if (env.backward == NULL)
                env.backward = entry;
-}
+}  /* inverse_post_order */
 
 /**
  * Block walker: create backward links for the memops of a block.
@@ -367,12 +434,15 @@ static void collect_backward(ir_node *block, void *ctx) {
                last     = op;
        }
        entry->memop_backward = last;
-}
+}  /* collect_backward */
 
 /**
  * Allocate a memop.
  *
- * @param irn  the IR-node representing the memop
+ * @param irn  the IR-node representing the memop or NULL
+ *             if this is a translated (virtual) memop
+ *
+ * @return the allocated memop
  */
 static memop_t *alloc_memop(ir_node *irn) {
        memop_t *m = obstack_alloc(&env.obst, sizeof(*m));
@@ -389,9 +459,10 @@ static memop_t *alloc_memop(ir_node *irn) {
 
        memset(m->projs, 0, sizeof(m->projs));
 
-       set_irn_link(irn, m);
+       if (irn != NULL)
+               set_irn_link(irn, m);
        return m;
-}
+}  /* alloc_memop */
 
 /**
  * Create a memop for a Phi-replacement.
@@ -412,43 +483,7 @@ static memop_t *clone_memop_phi(memop_t *op, ir_node *phi) {
 
        set_irn_link(phi, m);
        return m;
-}
-
-/**
- * Register an address and allocate an ID for it.
- *
- * @param adr  the IR-node representing the address
- */
-static unsigned register_address(ir_node *adr) {
-       address_entry *entry;
-
-       /* skip Confirms and Casts */
-restart:
-       if (is_Confirm(adr)) {
-               adr = get_Confirm_value(adr);
-               goto restart;
-       }
-       if (is_Cast(adr)) {
-               adr = get_Cast_op(adr);
-               goto restart;
-       }
-
-       entry = ir_nodemap_get(&env.adr_map, adr);
-
-       if (entry == NULL) {
-               /* new address */
-               entry = obstack_alloc(&env.obst, sizeof(*entry));
-
-               entry->id = env.curr_adr_id++;
-               ir_nodemap_insert(&env.adr_map, adr, entry);
-
-               DB((dbg, LEVEL_3, "ADDRESS %+F has ID %u\n", adr, entry->id));
-#ifdef DEBUG_libfirm
-               ARR_APP1(ir_node *, env.id_2_address, adr);
-#endif
-       }
-       return entry->id;
-}
+}  /* clone_memop_phi */
 
 /**
  * Return the memory properties of a call node.
@@ -473,7 +508,7 @@ static unsigned get_Call_memory_properties(ir_node *call) {
                }
        }
        return prop & (mtp_property_const|mtp_property_pure);
-}
+}  /* get_Call_memory_properties */
 
 /**
  * Returns an entity if the address ptr points to a constant one.
@@ -482,8 +517,7 @@ static unsigned get_Call_memory_properties(ir_node *call) {
  *
  * @return an entity or NULL
  */
-static ir_entity *find_constant_entity(ir_node *ptr)
-{
+static ir_entity *find_constant_entity(ir_node *ptr) {
        for (;;) {
                if (is_SymConst(ptr) && get_SymConst_kind(ptr) == symconst_addr_ent) {
                        return get_SymConst_entity(ptr);
@@ -1048,7 +1082,7 @@ static void update_Load_memop(memop_t *m) {
                /* no user, KILL it */
                mark_replace_load(m, NULL);
        }
-}
+}  /* update_Load_memop */
 
 /**
  * Update a memop for a Store.
@@ -1095,7 +1129,7 @@ static void update_Store_memop(memop_t *m) {
        }
        m->value.value = get_Store_value(store);
        m->value.mode  = get_irn_mode(m->value.value);
-}
+}  /* update_Store_memop */
 
 /**
  * Update a memop for a Call.
@@ -1132,7 +1166,7 @@ static void update_Call_memop(memop_t *m) {
                        break;
                }
        }
-}
+}  /* update_Call_memop */
 
 /**
  * Update a memop for a Div/Mod/Quot/DivMod.
@@ -1159,7 +1193,7 @@ static void update_DivOp_memop(memop_t *m) {
                        break;
                }
        }
-}
+}  /* update_DivOp_memop */
 
 /**
  * Update a memop for a Phi.
@@ -1169,7 +1203,7 @@ static void update_DivOp_memop(memop_t *m) {
 static void update_Phi_memop(memop_t *m) {
        /* the Phi is it's own mem */
        m->mem = m->node;
-}
+}  /* update_Phi_memop */
 
 /**
  * Memory walker: collect all memory ops and build topological lists.
@@ -1243,12 +1277,16 @@ static void collect_memops(ir_node *irn, void *ctx) {
                        entry->memop_backward       = op;
                }
        }
-}
+}  /* collect_memops */
 
 /**
  * Find an address in the current set.
  *
  * @param value  the value to be searched for
+ *
+ * @return a memop for the value or NULL if the value does
+ *         not exists in the set or cannot be converted into
+ *         the requested mode
  */
 static memop_t *find_address(const value_t *value) {
        if (rbitset_is_set(env.curr_set, value->id)) {
@@ -1263,28 +1301,27 @@ static memop_t *find_address(const value_t *value) {
                        return res;
        }
        return NULL;
-}
+}  /* find_address */
 
 /**
  * Find an address in the avail_out set.
  *
  * @param bl     the block
- * @param value  the value to be searched for
  */
-static memop_t *find_address_avail(const block_t *bl, const value_t *value) {
-       if (rbitset_is_set(bl->avail_out, value->id)) {
-               memop_t *res = bl->id_2_memop_avail[value->id];
+static memop_t *find_address_avail(const block_t *bl, unsigned id, const ir_mode *mode) {
+       if (rbitset_is_set(bl->avail_out, id)) {
+               memop_t *res = bl->id_2_memop_avail[id];
 
-               if (res->value.mode == value->mode)
+               if (res->value.mode == mode)
                        return res;
                /* allow hidden casts */
                if (get_mode_arithmetic(res->value.mode) == irma_twos_complement &&
-                   get_mode_arithmetic(value->mode) == irma_twos_complement &&
-                   get_mode_size_bits(res->value.mode) == get_mode_size_bits(value->mode))
+                   get_mode_arithmetic(mode) == irma_twos_complement &&
+                   get_mode_size_bits(res->value.mode) == get_mode_size_bits(mode))
                        return res;
        }
        return NULL;
-}
+}  /* find_address_avail */
 
 /**
  * Kill all addresses from the current set.
@@ -1294,7 +1331,7 @@ static void kill_all(void) {
 
        /* set sentinel */
        rbitset_set(env.curr_set, env.rbs_size - 1);
-}
+}  /* kill_all */
 
 /**
  * Kill memops that are not alias free due to a Store value from the current set.
@@ -1302,19 +1339,20 @@ static void kill_all(void) {
  * @param value  the Store value
  */
 static void kill_memops(const value_t *value) {
-       unsigned pos = 0;
        unsigned end = env.rbs_size - 1;
+       unsigned pos;
 
-       for (pos = rbitset_next(env.curr_set, pos, 1); pos != end; pos = rbitset_next(env.curr_set, pos + 1, 1)) {
+       for (pos = rbitset_next(env.curr_set, 0, 1); pos < end; pos = rbitset_next(env.curr_set, pos + 1, 1)) {
                memop_t *op = env.curr_id_2_memop[pos];
 
                if (ir_no_alias != get_alias_relation(current_ir_graph, value->address, value->mode,
                                                          op->value.address, op->value.mode)) {
                        rbitset_clear(env.curr_set, pos);
                        env.curr_id_2_memop[pos] = NULL;
+                       DB((dbg, LEVEL_2, "KILLING %+F because of possible alias address %+F\n", op->node, value->address));
                }
        }
-}
+}  /* kill_memops */
 
 /**
  * Add the value of a memop to the current set.
@@ -1324,7 +1362,7 @@ static void kill_memops(const value_t *value) {
 static void add_memop(memop_t *op) {
        rbitset_set(env.curr_set, op->value.id);
        env.curr_id_2_memop[op->value.id] = op;
-}
+}  /* add_memop */
 
 /**
  * Add the value of a memop to the avail_out set.
@@ -1335,11 +1373,14 @@ static void add_memop(memop_t *op) {
 static void add_memop_avail(block_t *bl, memop_t *op) {
        rbitset_set(bl->avail_out, op->value.id);
        bl->id_2_memop_avail[op->value.id] = op;
-}
+}  /* add_memop_avail */
 
 /**
  * Check, if we can convert a value of one mode to another mode
  * without changing the representation of bits.
+ *
+ * @param from  the original mode
+ * @param to    the destination mode
  */
 static int can_convert_to(const ir_mode *from, const ir_mode *to) {
        if (get_mode_arithmetic(from) == irma_twos_complement &&
@@ -1347,10 +1388,16 @@ static int can_convert_to(const ir_mode *from, const ir_mode *to) {
            get_mode_size_bits(from) == get_mode_size_bits(to))
                return 1;
        return 0;
-}
+}  /* can_convert_to */
 
 /**
- * Add a Conv if needed.
+ * Add a Conv to the requested mode if needed.
+ *
+ * @param irn   the IR-node to convert
+ * @param mode  the destination mode
+ *
+ * @return the possible converted node or NULL
+ *         if the conversion is not possible
  */
 static ir_node *conv_to(ir_node *irn, ir_mode *mode) {
        ir_mode *other = get_irn_mode(irn);
@@ -1364,7 +1411,26 @@ static ir_node *conv_to(ir_node *irn, ir_mode *mode) {
                return NULL;
        }
        return irn;
-}
+}  /* conv_to */
+
+/**
+ * Update the address of an value if this address was a load result
+ * and the load is killed now.
+ *
+ * @param value  the value whose address is updated
+ */
+static void update_address(value_t *value) {
+       if (is_Proj(value->address)) {
+               ir_node *load = get_Proj_pred(value->address);
+
+               if (is_Load(load)) {
+                       const memop_t *op = get_irn_memop(load);
+
+                       if (op->flags & FLAG_KILLED_NODE)
+                               value->address = op->replace;
+               }
+       }
+}  /* update_address */
 
 /**
  * Do forward dataflow analysis on the given block and calculate the
@@ -1387,7 +1453,10 @@ static void calc_gen_kill_avail(block_t *bl) {
                case iro_Load:
                        if (! (op->flags & (FLAG_KILLED_NODE|FLAG_IGNORE))) {
                                /* do we have this already? */
-                               memop_t *other = find_address(&op->value);
+                               memop_t *other;
+
+                               update_address(&op->value);
+                               other = find_address(&op->value);
                                if (other != NULL && other != op) {
                                        def = conv_to(other->value.value, op->value.mode);
                                        if (def != NULL) {
@@ -1412,7 +1481,10 @@ static void calc_gen_kill_avail(block_t *bl) {
                case iro_Store:
                        if (! (op->flags & FLAG_KILLED_NODE)) {
                                /* do we have this store already */
-                               memop_t *other = find_address(&op->value);
+                               memop_t *other;
+
+                               update_address(&op->value);
+                               other = find_address(&op->value);
                                if (other != NULL) {
                                        if (is_Store(other->node)) {
                                                if (op != other && !(other->flags & FLAG_IGNORE) &&
@@ -1445,7 +1517,7 @@ static void calc_gen_kill_avail(block_t *bl) {
                                kill_all();
                }
        }
-}
+}  /* calc_gen_kill_avail */
 
 #define BYTE_SIZE(x)  (((x) + 7) >> 3)
 
@@ -1462,7 +1534,7 @@ static void forward_avail(block_t *bl) {
 
        calc_gen_kill_avail(bl);
        dump_curr(bl, "Avail_out");
-}
+}  /* forward_avail */
 
 /**
  * Do backward dataflow analysis on a given block to calculate the antic set
@@ -1474,16 +1546,71 @@ static void forward_avail(block_t *bl) {
  */
 static int backward_antic(block_t *bl) {
        memop_t *op;
-       int     n = get_Block_n_cfg_outs(bl->block);
+       ir_node *block = bl->block;
+       int     n = get_Block_n_cfg_outs(block);
+
+       if (n == 1) {
+               ir_node  *succ    = get_Block_cfg_out(block, 0);
+               block_t  *succ_bl = get_block_entry(succ);
+               int      pred_pos = get_Block_cfgpred_pos(succ, block);
+               unsigned end      = env.rbs_size - 1;
+               unsigned pos;
+
+               kill_all();
+
+               if (bl->trans_results == NULL) {
+                       /* allocate the translate cache */
+                       unsigned size = env.curr_adr_id * sizeof(bl->trans_results[0]);
+                       bl->trans_results = obstack_alloc(&env.obst, size);
+                       memset(bl->trans_results, 0, size);
+               }
 
-       if (n >= 1) {
-               ir_node *succ    = get_Block_cfg_out(bl->block, 0);
+               /* check for partly redundant values */
+               for (pos = rbitset_next(succ_bl->anticL_in, 0, 1);
+                    pos < end;
+                    pos = rbitset_next(succ_bl->anticL_in, pos + 1, 1)) {
+                       /*
+                        * do Phi-translation here: Note that at this point the nodes are
+                        * not changed, so we can safely cache the results.
+                        * However: Loads of Load results ARE bad, because we have no way
+                         to translate them yet ...
+                        */
+                       memop_t *op = bl->trans_results[pos];
+                       if (op == NULL) {
+                               /* not yet translated */
+                               ir_node *adr, *trans_adr;
+
+                               op  = succ_bl->id_2_memop_antic[pos];
+                               adr = op->value.address;
+
+                               trans_adr = phi_translate(adr, succ, pred_pos);
+                               if (trans_adr != adr) {
+                                       /* create a new entry for the translated one */
+                                       memop_t *new_op;
+
+                                       new_op = alloc_memop(NULL);
+                                       new_op->value.address = trans_adr;
+                                       new_op->value.id      = register_address(trans_adr);
+                                       new_op->value.mode    = op->value.mode;
+                                       new_op->node          = op->node; /* we need the node to decide if Load/Store */
+                                       new_op->flags         = op->flags;
+
+                                       bl->trans_results[pos] = new_op;
+                                       op = new_op;
+                               }
+                       }
+                       env.curr_id_2_memop[op->value.id] = op;
+                       rbitset_set(env.curr_set, op->value.id);
+               }
+       } else if (n > 1) {
+               ir_node *succ    = get_Block_cfg_out(block, 0);
                block_t *succ_bl = get_block_entry(succ);
                int i;
 
                rbitset_cpy(env.curr_set, succ_bl->anticL_in, env.rbs_size);
                memcpy(env.curr_id_2_memop, succ_bl->id_2_memop_antic, env.rbs_size * sizeof(env.curr_id_2_memop[0]));
 
+               /* Hmm: probably we want kill merges of Loads ans Stores here */
                for (i = n - 1; i > 0; --i) {
                        ir_node *succ    = get_Block_cfg_out(bl->block, i);
                        block_t *succ_bl = get_block_entry(succ);
@@ -1495,18 +1622,6 @@ static int backward_antic(block_t *bl) {
                kill_all();
        }
 
-#if 0
-       /* cleanup: kill those Loads which address is not available */
-       for (pos = rbitset_next(env.curr_set, pos, 1); pos != end; pos = rbitset_next(env.curr_set, pos + 1, 1)) {
-               memop_t *op     = succ_bl->id_2_memop[pos];
-               ir_node *ptr    = get_Load_ptr(op->node);
-               ir_node *ptr_bl = get_nodes_block(ptr);
-
-               if (!block_dominates(ptr_bl, bl->block))
-                       rbitset_clear(env.curr_set, pos);
-       }
-#endif
-
        dump_curr(bl, "AnticL_out");
 
        for (op = bl->memop_backward; op != NULL; op = op->prev) {
@@ -1544,7 +1659,7 @@ static int backward_antic(block_t *bl) {
        }
        dump_curr(bl, "AnticL_in");
        return 0;
-}
+}  /* backward_antic */
 
 /**
  * Replace a Load memop by a already known value.
@@ -1591,7 +1706,7 @@ static void replace_load(memop_t *op) {
        if (proj != NULL) {
                exchange(proj, new_r_Jmp(current_ir_graph, get_nodes_block(load)));
        }
-}
+}  /* replace_load */
 
 /**
  * Remove a Store memop.
@@ -1616,7 +1731,7 @@ static void remove_store(memop_t *op) {
        if (proj != NULL) {
                exchange(proj, new_r_Jmp(current_ir_graph, get_nodes_block(store)));
        }
-}
+}  /* remove_store */
 
 
 /**
@@ -1639,7 +1754,7 @@ static void do_replacements(block_t *bl) {
                        }
                }
        }
-}
+}  /* do_replacements */
 
 /**
  * Calculate the Avail_out sets for all basic blocks.
@@ -1660,7 +1775,7 @@ static void calcAvail(void) {
        /* restore the current sets */
        env.curr_id_2_memop = tmp_memop;
        env.curr_set        = tmp_set;
-}
+}  /* calcAvail */
 
 /**
  * Calculate the Antic_in sets for all basic blocks.
@@ -1685,22 +1800,7 @@ static void calcAntic(void) {
                ++i;
        } while (need_iter);
        DB((dbg, LEVEL_2, "Get anticipated Load set after %d iterations\n", i));
-}
-
-/**
- * Return the node representing the last memory in a block.
- *
- * @param bl  the block
- */
-static ir_node *find_first_memory(block_t *bl) {
-       for (;;) {
-               if (bl->memop_forward != NULL) {
-                       return bl->memop_forward->node;
-               }
-               /* if there is NO memory in this block, go to the post dominator */
-               bl = get_block_entry(get_Block_ipostdom(bl->block));
-       }
-}
+}  /* calcAntic */
 
 /**
  * Return the node representing the last memory in a block.
@@ -1715,7 +1815,7 @@ static ir_node *find_last_memory(block_t *bl) {
                /* if there is NO memory in this block, go to the dominator */
                bl = get_block_entry(get_Block_idom(bl->block));
        }
-}
+}  /* find_last_memory */
 
 /**
  * Reroute all memory users of old memory
@@ -1736,7 +1836,7 @@ static void reroute_all_mem_users(ir_node *omem, ir_node *nmem) {
 
        /* all edges previously point to omem now point to nmem */
        nmem->out = omem->out;
-}
+}  /* reroute_all_mem_users */
 
 /**
  * Reroute memory users of old memory that are dominated by a given block
@@ -1775,7 +1875,7 @@ static void reroute_mem_through(ir_node *omem, ir_node *nmem, ir_node *pass_bl)
        /* first entry is used for the length */
        edges[0].pos = j;
        nmem->out = edges;
-}
+}  /* reroute_mem_through */
 
 /**
  * insert Loads, making partly redundant Loads fully redundant
@@ -1783,10 +1883,8 @@ static void reroute_mem_through(ir_node *omem, ir_node *nmem, ir_node *pass_bl)
 static int insert_Load(block_t *bl) {
        ir_node  *block = bl->block;
        int      i, n = get_Block_n_cfgpreds(block);
-       unsigned pos = 0;
        unsigned end = env.rbs_size - 1;
-       ir_node  *pred;
-       block_t  *pred_bl;
+       unsigned pos;
 
        DB((dbg, LEVEL_3, "processing %+F\n", block));
 
@@ -1795,14 +1893,9 @@ static int insert_Load(block_t *bl) {
                return 0;
        }
 
-       pred    = get_Block_cfgpred_block(bl->block, 0);
-       pred_bl = get_block_entry(pred);
-
-       rbitset_cpy(env.curr_set, pred_bl->avail_out, env.rbs_size);
-
        if (n > 1) {
-               int     i, pos;
                ir_node **ins;
+               int     pos;
 
                NEW_ARR_A(ir_node *, ins, n);
 
@@ -1886,8 +1979,8 @@ static int insert_Load(block_t *bl) {
                }
        } else {
                /* only one predecessor, simply copy the map */
-               pred    = get_Block_cfgpred_block(bl->block, 0);
-               pred_bl = get_block_entry(pred);
+               ir_node *pred    = get_Block_cfgpred_block(bl->block, 0);
+               block_t *pred_bl = get_block_entry(pred);
 
                rbitset_cpy(env.curr_set, pred_bl->avail_out, env.rbs_size);
 
@@ -1896,16 +1989,20 @@ static int insert_Load(block_t *bl) {
 
        if (n > 1) {
                /* check for partly redundant values */
-               for (pos = rbitset_next(bl->anticL_in, pos, 1); pos != end; pos = rbitset_next(bl->anticL_in, pos + 1, 1)) {
+               for (pos = rbitset_next(bl->anticL_in, 0, 1);
+                    pos < end;
+                    pos = rbitset_next(bl->anticL_in, pos + 1, 1)) {
                        memop_t *op = bl->id_2_memop_antic[pos];
                        int     have_some, all_same;
                        ir_node *first;
 
+                       if (rbitset_is_set(env.curr_set, pos)) {
+                               /* already avail */
+                               continue;
+                       }
+
                        assert(is_Load(op->node));
 
-                       /* ignore killed */
-                       if (op->flags & FLAG_KILLED_NODE)
-                               continue;
                        DB((dbg, LEVEL_3, "anticipated %+F\n", op->node));
 
                        have_some  = 0;
@@ -1914,14 +2011,19 @@ static int insert_Load(block_t *bl) {
                        for (i = n - 1; i >= 0; --i) {
                                ir_node *pred    = get_Block_cfgpred_block(block, i);
                                block_t *pred_bl = get_block_entry(pred);
-                               memop_t *e       = find_address_avail(pred_bl, &op->value);
                                ir_mode *mode    = op->value.mode;
+                               memop_t *e;
+                               ir_node *adr;
 
+                               adr = phi_translate(op->value.address, block, i);
+                               DB((dbg, LEVEL_3, ".. using address %+F in pred %d\n", adr, i));
+                               e   = find_address_avail(pred_bl, register_address(adr), mode);
                                if (e == NULL) {
-                                       ir_node *block = get_nodes_block(op->value.address);
-                                       if (! block_dominates(block, pred)) {
+                                       ir_node *ef_block = get_nodes_block(adr);
+                                       if (! block_dominates(ef_block, pred)) {
                                                /* cannot place a copy here */
                                                have_some = 0;
+                                               DB((dbg, LEVEL_3, "%+F cannot be moved into predecessor %+F\n", op->node, pred));
                                                break;
                                        }
                                        DB((dbg, LEVEL_3, "%+F is not available in predecessor %+F\n", op->node, pred));
@@ -1957,17 +2059,18 @@ static int insert_Load(block_t *bl) {
                                                /* create a new Load here and make to make it fully redundant */
                                                dbg_info *db       = get_irn_dbg_info(op->node);
                                                ir_node  *last_mem = find_last_memory(pred_bl);
-                                               ir_node  *load, *def;
+                                               ir_node  *load, *def, *adr;
                                                memop_t  *new_op;
 
                                                assert(last_mem != NULL);
-                                               load = new_rd_Load(db, current_ir_graph, pred, last_mem, op->value.address, mode, cons_none);
+                                               adr  = phi_translate(op->value.address, block, i);
+                                               load = new_rd_Load(db, current_ir_graph, pred, last_mem, adr, mode, cons_none);
                                                def  = new_r_Proj(current_ir_graph, pred, load, mode, pn_Load_res);
                                                DB((dbg, LEVEL_1, "Created new %+F in %+F for party redundant %+F\n", load, pred, op->node));
 
                                                new_op                = alloc_memop(load);
                                                new_op->mem           = new_r_Proj(current_ir_graph, pred, load, mode_M, pn_Load_M);
-                                               new_op->value.address = op->value.address;
+                                               new_op->value.address = adr;
                                                new_op->value.id      = op->value.id;
                                                new_op->value.mode    = mode;
                                                new_op->value.value   = def;
@@ -1975,7 +2078,10 @@ static int insert_Load(block_t *bl) {
                                                new_op->projs[pn_Load_M]   = new_op->mem;
                                                new_op->projs[pn_Load_res] = def;
 
-                                               new_op->prev            = pred_bl->memop_backward;
+                                               new_op->prev = pred_bl->memop_backward;
+                                               if (pred_bl->memop_backward != NULL)
+                                                       pred_bl->memop_backward->next = new_op;
+
                                                pred_bl->memop_backward = new_op;
 
                                                if (pred_bl->memop_forward == NULL)
@@ -2009,15 +2115,18 @@ static int insert_Load(block_t *bl) {
        /* recalculate avail by gen and kill */
        calc_gen_kill_avail(bl);
 
+       /* always update the map after gen/kill, as values might have been changed due to RAR/WAR/WAW */
+       memcpy(bl->id_2_memop_avail, env.curr_id_2_memop, env.rbs_size * sizeof(env.curr_id_2_memop[0]));
+
        if (!rbitset_equal(bl->avail_out, env.curr_set, env.rbs_size)) {
                /* the avail set has changed */
                rbitset_cpy(bl->avail_out, env.curr_set, env.rbs_size);
-               memcpy(bl->id_2_memop_avail, env.curr_id_2_memop, env.rbs_size * sizeof(env.curr_id_2_memop[0]));
                dump_curr(bl, "Avail_out*");
                return 1;
        }
+       dump_curr(bl, "Avail_out");
        return 0;
-}
+}  /* insert_Load */
 
 /**
  * Insert Loads upwards.
@@ -2043,10 +2152,12 @@ static void insert_Loads_upwards(void) {
        } while (need_iter);
 
        DB((dbg, LEVEL_2, "Finished Load inserting after %d iterations\n", i));
-}
+}  /* insert_Loads_upwards */
 
 /**
- * kill unreachable control flow.
+ * Kill unreachable control flow.
+ *
+ * @param irg  the graph to operate on
  */
 static void kill_unreachable_blocks(ir_graph *irg) {
        block_t *bl;
@@ -2137,7 +2248,7 @@ static void kill_unreachable_blocks(ir_graph *irg) {
 
                /* this transformation do NOT invalidate the dominance */
        }
-}
+}  /* kill_unreachable_blocks */
 
 int opt_ldst(ir_graph *irg) {
        block_t  *bl;
@@ -2153,7 +2264,7 @@ int opt_ldst(ir_graph *irg) {
        /* we need landing pads */
        remove_critical_cf_edges(irg);
 
-       dump_ir_block_graph(irg, "-XXX");
+//     dump_ir_block_graph(irg, "-XXX");
 
        if (get_opt_alias_analysis()) {
                assure_irg_entity_usage_computed(irg);
@@ -2232,7 +2343,7 @@ int opt_ldst(ir_graph *irg) {
 
        /* create address sets: for now, only the existing addresses are allowed plus one
           needed for the sentinel */
-       env.rbs_size = env.n_mem_ops + 1;
+       env.rbs_size = env.curr_adr_id + 1;
 
        /* create the current set */
        env.curr_set = rbitset_obstack_alloc(&env.obst, env.rbs_size);
@@ -2279,7 +2390,7 @@ end:
        ir_nodemap_destroy(&env.adr_map);
        obstack_free(&env.obst, NULL);
 
-       dump_ir_block_graph(irg, "-YYY");
+//     dump_ir_block_graph(irg, "-YYY");
 
 #ifdef DEBUG_libfirm
        DEL_ARR_F(env.id_2_address);
@@ -2287,4 +2398,4 @@ end:
 
        current_ir_graph = rem;
        return env.changed != 0;
-}
+}  /* opt_ldst */