init_sp Unknown node constructed with CSE disabled
[libfirm] / ir / be / bespillremat.c
index a98380c..3efbf99 100644 (file)
@@ -31,6 +31,7 @@
 #include "irloop_t.h"
 #include "phiclass.h"
 #include "iredges.h"
+#include "execfreq.h"
 
 #include <lpp/lpp.h>
 #include <lpp/lpp_net.h>
 
 #define DUMP_SOLUTION
 #define DUMP_ILP
-#define KEEPALIVE /* keep alive all inserted remats and dump graph with remats */
+//#define KEEPALIVE /* keep alive all inserted remats and dump graph with remats */
 #define COLLECT_REMATS /* enable rematerialization */
 #define COLLECT_INVERSE_REMATS /* enable placement of inverse remats */
 #define REMAT_WHILE_LIVE /* only remat values that are live */
 //#define NO_ENLARGE_L1V3N355 /* do not remat after the death of some operand */
-#define EXECFREQ_LOOPDEPH /* compute execution frequency from loop depth only */
+//#define EXECFREQ_LOOPDEPH /* compute execution frequency from loop depth only */
 //#define MAY_DIE_AT_PRE_REMAT /* allow values to die after a pre remat */
-//#define CHECK_POST_REMAT /* check pressure after post remats (conservative but otherwise we can temporarily exceed the register pressure) */
+#define CHECK_POST_REMAT /* check pressure after post remats (conservative but otherwise we can temporarily exceed the register pressure) */
 #define NO_SINGLE_USE_REMATS /* do not repair schedule */
+//#define KEEPALIVE_SPILLS
+//#define KEEPALIVE_RELOADS
+#define GOODWIN_REDUCTION
 
 #define  SOLVE
-#undef  SOLVE_LOCAL
+//#define  SOLVE_LOCAL
 #define LPP_SERVER "i44pc52"
 #define LPP_SOLVER "cplex"
 
-#ifndef EXECFREQ_LOOPDEPH
-#include "execfreq.h"
-#endif
-
 #define COST_LOAD      10
 #define COST_STORE     50
 #define COST_REMAT     1
 
-#define ILP_TIMEOUT    20
+#define ILP_TIMEOUT    120
 
 #define ILP_UNDEF              -1
 
@@ -95,9 +95,9 @@ typedef struct _spill_ilp_t {
        ir_node                      *keep;
 #endif
        set                          *values; /**< for collecting all definitions of values before running ssa-construction */
-#ifndef EXECFREQ_LOOPDEPH
        set                          *execfreqs;
-#endif
+       pset                         *spills;
+       ir_node                      *m_unknown;
        DEBUG_ONLY(firm_dbg_module_t * dbg);
 } spill_ilp_t;
 
@@ -285,21 +285,35 @@ cmp_keyval(const void *a, const void *b, size_t size)
        return !(p->key == q->key);
 }
 
-static float
+static double
 execution_frequency(const spill_ilp_t * si, const ir_node * irn)
 {
-#ifdef EXECFREQ_LOOPDEPH
-       if(is_Block(irn))
-               return expf((float)get_loop_depth(get_irn_loop(irn)) * logf(10));
-       else
-               return expf((float)get_loop_depth(get_irn_loop(get_nodes_block(irn))) * logf(10));
-#else
-       if(is_Block(irn)) {
-               return get_block_execfreq(si->execfreqs, irn);
+#define FUDGE 0.001
+       if(si->execfreqs) {
+               if(is_Block(irn)) {
+                       return get_block_execfreq(si->execfreqs, irn) + FUDGE;
+               } else {
+                       return get_block_execfreq(si->execfreqs, get_nodes_block(irn)) + FUDGE;
+               }
        } else {
-               return get_block_execfreq(si->execfreqs, get_nodes_block(irn));
+               if(is_Block(irn))
+                       return exp(get_loop_depth(get_irn_loop(irn)) * log(10)) + FUDGE;
+               else
+                       return exp(get_loop_depth(get_irn_loop(get_nodes_block(irn))) * log(10)) + FUDGE;
        }
-#endif
+}
+
+static double
+get_cost(const spill_ilp_t * si, const ir_node * irn)
+{
+       if(be_is_Spill(irn)) {
+               return COST_STORE;
+       } else if(be_is_Reload(irn)){
+               return COST_LOAD;
+       } else {
+               return arch_get_op_estimated_cost(si->chordal_env->birg->main_env->arch_env, irn);
+       }
+
 }
 
 /**
@@ -353,7 +367,7 @@ get_remat_from_op(spill_ilp_t * si, const ir_node * dest_value, const ir_node *
 
                remat = obstack_alloc(si->obst, sizeof(*remat));
                remat->op = op;
-               remat->cost = COST_REMAT; /* TODO ask backend for real cost */
+               remat->cost = get_cost(si, op);
                remat->value = dest_value;
                remat->proj = proj;
                remat->inverse = 0;
@@ -799,6 +813,39 @@ insert_remat_before(spill_ilp_t * si, const remat_t * remat, const ir_node * pos
        }
 }
 
+static int
+get_block_n_succs(const ir_node *block) {
+       const ir_edge_t *edge;
+
+       assert(edges_activated(current_ir_graph));
+
+       edge = get_block_succ_first(block);
+       if (! edge)
+               return 0;
+
+       edge = get_block_succ_next(block, edge);
+       return edge ? 2 : 1;
+}
+
+static int
+is_merge_edge(const ir_node * bb)
+{
+#ifdef GOODWIN_REDUCTION
+       return get_block_n_succs(bb) == 1;
+#else
+       return 1;
+#endif
+}
+
+static int
+is_diverge_edge(const ir_node * bb)
+{
+#ifdef GOODWIN_REDUCTION
+       return get_Block_n_cfgpreds(bb) == 1;
+#else
+       return 1;
+#endif
+}
 
 /**
  * Insert (so far unused) remats into the irg to
@@ -966,67 +1013,48 @@ walker_remat_insertor(ir_node * bb, void * data)
        live_foreach(bb, li) {
                ir_node        *value = (ir_node *) li->irn;
 
-               /* add remats at end of block */
-               if (live_is_end(li) && has_reg_class(si, value)) {
-                       remat_info_t   *remat_info,
-                                                   query;
-                       remat_t        *remat;
+               /* add remats at end if successor has multiple predecessors */
+               if(is_merge_edge(bb)) {
+                       /* add remats at end of block */
+                       if (live_is_end(li) && has_reg_class(si, value)) {
+                               remat_info_t   *remat_info,
+                                                          query;
+                               remat_t        *remat;
 
-                       query.irn = value;
-                       query.remats = NULL;
-                       query.remats_by_operand = NULL;
-                       remat_info = set_find(si->remat_info, &query, sizeof(query), HASH_PTR(value));
+                               query.irn = value;
+                               query.remats = NULL;
+                               query.remats_by_operand = NULL;
+                               remat_info = set_find(si->remat_info, &query, sizeof(query), HASH_PTR(value));
 
-                       if(remat_info && remat_info->remats) {
-                               pset_foreach(remat_info->remats, remat) {
-                                       DBG((si->dbg, LEVEL_4, "\t  considering remat %+F at end of block %+F\n", remat->op, bb));
+                               if(remat_info && remat_info->remats) {
+                                       pset_foreach(remat_info->remats, remat) {
+                                               DBG((si->dbg, LEVEL_4, "\t  considering remat %+F at end of block %+F\n", remat->op, bb));
 
-                                       insert_remat_before(si, remat, bb, NULL);
+                                               insert_remat_before(si, remat, bb, NULL);
+                                       }
                                }
                        }
                }
+               if(is_diverge_edge(bb)) {
+                       /* add remat2s at beginning of block */
+                       if ((live_is_in(li) || (is_Phi(value) && get_nodes_block(value)==bb)) && has_reg_class(si, value)) {
+                               remat_info_t   *remat_info,
+                                                          query;
+                               remat_t        *remat;
 
-               /* add remat2s at beginning of block */
-               if ((live_is_in(li) || (is_Phi(value) && get_nodes_block(value)==bb)) && has_reg_class(si, value)) {
-                       remat_info_t   *remat_info,
-                                                   query;
-                       remat_t        *remat;
+                               query.irn = value;
+                               query.remats = NULL;
+                               query.remats_by_operand = NULL;
+                               remat_info = set_find(si->remat_info, &query, sizeof(query), HASH_PTR(value));
 
-                       query.irn = value;
-                       query.remats = NULL;
-                       query.remats_by_operand = NULL;
-                       remat_info = set_find(si->remat_info, &query, sizeof(query), HASH_PTR(value));
-
-                       if(remat_info && remat_info->remats) {
-                               pset_foreach(remat_info->remats, remat) {
-                                       DBG((si->dbg, LEVEL_4, "\t  considering remat %+F at beginning of block %+F\n", remat->op, bb));
-
-                                       /* put the remat here if all its args are available */
-                                       insert_remat_after(si, remat, bb, NULL);
-
-#if 0
-                                       for(i=0, n=get_irn_arity(remat->op); i<n; ++i) {
-                                               ir_node   *remat_arg = get_irn_n(remat->op, i);
+                               if(remat_info && remat_info->remats) {
+                                       pset_foreach(remat_info->remats, remat) {
+                                               DBG((si->dbg, LEVEL_4, "\t  considering remat %+F at beginning of block %+F\n", remat->op, bb));
 
-                                               if(has_reg_class(si, remat_arg) && is_live_in(bb, remat_arg)) {
-                                                       insert_remat_after(si, remat, bb, NULL);
-                                                       break;
-                                               }
-                                       }
-#endif
-
-#if 0
-                       if(remat_info && remat_info->remats_by_operand) {
-                               pset_foreach(remat_info->remats_by_operand, remat) {
-                                       DBG((si->dbg, LEVEL_4, "\t  considering remat %+F at beginning of block %+F\n", remat->op, bb));
-#ifdef REMAT_WHILE_LIVE
-                                       if(is_live_in(bb, remat->value)) {
+                                               /* put the remat here if all its args are available */
                                                insert_remat_after(si, remat, bb, NULL);
+
                                        }
-#else
-                                       insert_remat_after(si, remat, bb, NULL);
-#endif
-#endif
                                }
                        }
                }
@@ -1093,17 +1121,17 @@ luke_endwalker(ir_node * bb, void * data)
                        query.irn = irn;
                        spill = set_insert(spill_bb->ilp, &query, sizeof(query), HASH_PTR(irn));
 
-                       ir_snprintf(buf, sizeof(buf), "reg_out_%N_%N", bb, irn);
+                       ir_snprintf(buf, sizeof(buf), "reg_out_%N_%N", irn, bb);
                        spill->reg_out = lpp_add_var(si->lpp, buf, lpp_binary, 0.0);
                        /* if irn is used at the end of the block, then it is live anyway */
                        if(!pset_find_ptr(use_end, irn))
                                lpp_set_factor_fast(si->lpp, cst, spill->reg_out, 1.0);
 
-                       ir_snprintf(buf, sizeof(buf), "mem_out_%N_%N", bb, irn);
+                       ir_snprintf(buf, sizeof(buf), "mem_out_%N_%N", irn, bb);
                        spill->mem_out = lpp_add_var(si->lpp, buf, lpp_binary, 0.0);
 
-                       ir_snprintf(buf, sizeof(buf), "spill_%N_%N", bb, irn);
-                       spill->spill = lpp_add_var(si->lpp, buf, lpp_binary, COST_STORE*execution_frequency(si, irn));
+                       ir_snprintf(buf, sizeof(buf), "spill_%N_%N", irn, bb);
+                       spill->spill = lpp_add_var(si->lpp, buf, lpp_binary, COST_STORE*execution_frequency(si, bb));
 
                        spill->reg_in = ILP_UNDEF;
                        spill->mem_in = ILP_UNDEF;
@@ -1267,8 +1295,12 @@ luke_blockwalker(ir_node * bb, void * data)
                }
        }
 
-       spill_bb->reloads = obstack_alloc(si->obst, pset_count(live) * sizeof(*spill_bb->reloads));
-       memset(spill_bb->reloads, 0xFF, pset_count(live) * sizeof(*spill_bb->reloads));
+       if(is_merge_edge(bb)) {
+               spill_bb->reloads = obstack_alloc(si->obst, pset_count(live) * sizeof(*spill_bb->reloads));
+               memset(spill_bb->reloads, 0xFF, pset_count(live) * sizeof(*spill_bb->reloads));
+       } else {
+               spill_bb->reloads = NULL;
+       }
 
        i=0;
        live_foreach(bb, li) {
@@ -1279,13 +1311,15 @@ luke_blockwalker(ir_node * bb, void * data)
                        spill = set_find_spill(spill_bb->ilp, irn);
                        assert(spill);
 
-                       ir_snprintf(buf, sizeof(buf), "reload_%N_%N", bb, irn);
-                       spill_bb->reloads[i] = lpp_add_var(si->lpp, buf, lpp_binary, COST_LOAD*execution_frequency(si, bb));
+                       if(spill_bb->reloads) {
+                               ir_snprintf(buf, sizeof(buf), "reload_%N_%N", bb, irn);
+                               spill_bb->reloads[i] = lpp_add_var(si->lpp, buf, lpp_binary, COST_LOAD*execution_frequency(si, bb));
 
-                       /* reload <= mem_out */
-                       cst = lpp_add_cst(si->lpp, buf, lpp_less, 0.0);
-                       lpp_set_factor_fast(si->lpp, cst, spill_bb->reloads[i], 1.0);
-                       lpp_set_factor_fast(si->lpp, cst, spill->mem_out, -1.0);
+                               /* reload <= mem_out */
+                               cst = lpp_add_cst(si->lpp, buf, lpp_less, 0.0);
+                               lpp_set_factor_fast(si->lpp, cst, spill_bb->reloads[i], 1.0);
+                               lpp_set_factor_fast(si->lpp, cst, spill->mem_out, -1.0);
+                       }
 
                        op = get_irn_link(irn);
                        assert(!op->is_remat);
@@ -1299,7 +1333,7 @@ luke_blockwalker(ir_node * bb, void * data)
 
                        /* reg_out - reload - remat - live_range <= 0 */
                        lpp_set_factor_fast(si->lpp, cst, spill->reg_out, 1.0);
-                       lpp_set_factor_fast(si->lpp, cst, spill_bb->reloads[i], -1.0);
+                       if(spill_bb->reloads) lpp_set_factor_fast(si->lpp, cst, spill_bb->reloads[i], -1.0);
                        lpp_set_factor_fast(si->lpp, cst, op->attr.live_range.ilp, -1.0);
                        foreach_pre_remat(si, bb, tmp) {
                                op_t     *remat_op = get_irn_link(tmp);
@@ -1437,9 +1471,9 @@ luke_blockwalker(ir_node * bb, void * data)
                                                }
                                        }
                                }
+fertig:
 #endif
 
-fertig:
                                if(prev_lr != ILP_UNDEF) {
                                        value_op->attr.live_range.ilp = prev_lr;
                                        value_op->attr.live_range.op = irn;
@@ -1502,7 +1536,7 @@ fertig:
                        assert(i<n);
 
                        ir_snprintf(buf, sizeof(buf), "reload_%N_%N", arg, irn);
-                       op->attr.live_range.reloads[i] = lpp_add_var(si->lpp, buf, lpp_binary, COST_LOAD*execution_frequency(si, irn));
+                       op->attr.live_range.reloads[i] = lpp_add_var(si->lpp, buf, lpp_binary, COST_LOAD*execution_frequency(si, bb));
 
                        /* reload <= mem_out */
                        cst = lpp_add_cst(si->lpp, buf, lpp_less, 0.0);
@@ -1882,15 +1916,16 @@ fertig:
 
        /* walk forward now and compute constraints for placing spills */
        /* this must only be done for values that are not defined in this block */
+       /* TODO are these values at start of block? if yes, just check whether this is a diverge edge and skip the loop */
        pset_foreach(live, irn) {
-               ir_snprintf(buf, sizeof(buf), "req_spill_%N_%N", spill->irn, bb);
-               cst = lpp_add_cst(si->lpp, buf, lpp_less, 0.0);
-
                spill = set_find_spill(spill_bb->ilp, irn);
                assert(spill);
 
+               ir_snprintf(buf, sizeof(buf), "req_spill_%N_%N", irn, bb);
+               cst = lpp_add_cst(si->lpp, buf, lpp_less, 0.0);
+
                lpp_set_factor_fast(si->lpp, cst, spill->spill, 1.0);
-               lpp_set_factor_fast(si->lpp, cst, spill->reg_in, -1.0);
+               if(is_diverge_edge(bb)) lpp_set_factor_fast(si->lpp, cst, spill->reg_in, -1.0);
 
                sched_foreach_op(bb, tmp) {
                        op_t   *op = get_irn_link(tmp);
@@ -1968,6 +2003,7 @@ is_mem_phi(const ir_node * phi, void *data)
        return 0;
 }
 
+#ifdef KEEPALIVE
 static int mark_remat_nodes_hook(FILE *F, ir_node *n, ir_node *l)
 {
        spill_ilp_t *si = get_irg_link(current_ir_graph);
@@ -2008,6 +2044,7 @@ dump_graph_with_remats(ir_graph * irg, const char * suffix)
        be_dump(irg, suffix, dump_ir_block_graph_sched);
        set_dump_node_vcgattr_hook(NULL);
 }
+#endif
 
 /**
  * Edge hook to dump the schedule edges with annotated register pressure.
@@ -2057,11 +2094,11 @@ walker_pressure_annotator(ir_node * bb, void * data)
                }
        }
 
-       set_irn_link(bb, (void*)pset_count(live));
+       set_irn_link(bb, INT_TO_PTR(pset_count(live)));
 
        sched_foreach_reverse(bb, irn) {
                if(is_Phi(irn)) {
-                       set_irn_link(irn, (void*)pset_count(live));
+                       set_irn_link(irn, INT_TO_PTR(pset_count(live)));
                        continue;
                }
 
@@ -2077,7 +2114,7 @@ walker_pressure_annotator(ir_node * bb, void * data)
 
                        if(has_reg_class(si, arg)) pset_insert_ptr(live, arg);
                }
-               set_irn_link(irn, (void*)pset_count(live)+projs);
+               set_irn_link(irn, INT_TO_PTR(pset_count(live)+projs));
        }
 
        del_pset(live);
@@ -2116,6 +2153,32 @@ connect_all_remats_with_keep(spill_ilp_t * si)
 }
 #endif
 
+static void
+connect_all_spills_with_keep(spill_ilp_t * si)
+{
+       ir_node   *irn;
+       ir_node  **ins,
+                        **pos;
+       int        n_spills;
+       ir_node   *keep;
+
+
+       n_spills = pset_count(si->spills);
+       if(n_spills) {
+               ins = obstack_alloc(si->obst, n_spills * sizeof(*ins));
+
+               pos = ins;
+               pset_foreach(si->spills, irn) {
+                       *pos = irn;
+                       ++pos;
+               }
+
+               keep = be_new_Keep(si->chordal_env->cls, si->chordal_env->irg, get_irg_end_block(si->chordal_env->irg), n_spills, ins);
+
+               obstack_free(si->obst, ins);
+       }
+}
+
 /** insert a spill at an arbitrary position */
 ir_node *be_spill2(const arch_env_t *arch_env, ir_node *irn, ir_node *insert, ir_node *ctx)
 {
@@ -2200,11 +2263,11 @@ clean_remat_info(spill_ilp_t * si)
 static void
 delete_unnecessary_remats(spill_ilp_t * si)
 {
+#ifdef KEEPALIVE
        int       i,
                      n;
        ir_node  *bad = get_irg_bad(si->chordal_env->irg);
 
-#ifdef KEEPALIVE
        if(si->keep) {
                ir_node   *end = get_irg_end(si->chordal_env->irg);
                ir_node  **keeps;
@@ -2267,10 +2330,18 @@ delete_unnecessary_remats(spill_ilp_t * si)
                        /* TODO check whether reload is preferred over remat (could be bug) */
                        delete_remat(si, remat);
                } else {
-                       if(remat_op->attr.remat.pre) {
-                               DBG((si->dbg, LEVEL_2, "\t**remat kept: %+F\n", remat));
+                       if(!remat_op->attr.remat.remat->inverse) {
+                               if(remat_op->attr.remat.pre) {
+                                       DBG((si->dbg, LEVEL_2, "\t**remat kept: %+F\n", remat));
+                               } else {
+                                       DBG((si->dbg, LEVEL_2, "\t%%%%remat2 kept: %+F\n", remat));
+                               }
                        } else {
-                               DBG((si->dbg, LEVEL_2, "\t%%%%remat2 kept: %+F\n", remat));
+                               if(remat_op->attr.remat.pre) {
+                                       DBG((si->dbg, LEVEL_2, "\t**INVERSE remat kept: %+F\n", remat));
+                               } else {
+                                       DBG((si->dbg, LEVEL_2, "\t%%%%INVERSE remat2 kept: %+F\n", remat));
+                               }
                        }
                }
        }
@@ -2300,12 +2371,47 @@ insert_spill(spill_ilp_t * si, const ir_node * irn, const ir_node * value, const
        defs->spills = spill;
 
 #ifdef KEEPALIVE_SPILLS
-       keep_alive(spill);
+       pset_insert_ptr(si->spills, spill);
 #endif
 
        return spill;
 }
 
+/**
+ * @param before   The Phi node which has to be spilled
+ */
+static ir_node *
+insert_mem_phi(spill_ilp_t * si, const ir_node * phi)
+{
+       ir_node   *mem_phi;
+       ir_node  **ins;
+       defs_t    *defs;
+       int        i,
+                          n;
+
+       NEW_ARR_A(ir_node*, ins, get_irn_arity(phi));
+
+       for(i=0,n=get_irn_arity(phi); i<n; ++i) {
+               ins[i] = si->m_unknown;
+       }
+
+       mem_phi =  new_r_Phi(si->chordal_env->irg, get_nodes_block(phi), get_irn_arity(phi), ins, mode_M);
+
+       defs = set_insert_def(si->values, phi);
+       assert(defs);
+
+       /* enter into the linked list */
+       set_irn_link(mem_phi, defs->spills);
+       defs->spills = mem_phi;
+
+       sched_add_after(phi, mem_phi);
+
+#ifdef KEEPALIVE_SPILLS
+       pset_insert_ptr(si->spills, mem_phi);
+#endif
+
+       return mem_phi;
+}
 
 /**
  * Add remat to list of defs, destroys link field!
@@ -2326,6 +2432,7 @@ insert_remat(spill_ilp_t * si, ir_node * remat)
        defs->remats = remat;
 }
 
+#if 0
 static void
 collect_spills(spill_ilp_t * si, ir_node * value, pset * spills, pset * visited)
 {
@@ -2355,15 +2462,26 @@ collect_spills(spill_ilp_t * si, ir_node * value, pset * spills, pset * visited)
 //             assert(0 && "Phi operand not spilled");
        }
 }
+#endif
 
 static pset *
 get_spills_for_value(spill_ilp_t * si, ir_node * value)
 {
        pset     *spills = pset_new_ptr_default();
-       pset     *visited = pset_new_ptr_default();
+//     pset     *visited = pset_new_ptr_default();
 
-       collect_spills(si, value, spills, visited);
-       del_pset(visited);
+//     collect_spills(si, value, spills, visited);
+//     del_pset(visited);
+       ir_node  *next;
+       defs_t   *defs;
+
+       defs = set_find_def(si->values, value);
+
+       if(defs && defs->spills) {
+               for(next = defs->spills; next; next = get_irn_link(next)) {
+                       pset_insert_ptr(spills, next);
+               }
+       }
 
        return spills;
 }
@@ -2383,6 +2501,7 @@ insert_reload(spill_ilp_t * si, const ir_node * value, const ir_node * after)
 
        defs = set_find_def(si->values, value);
        /* get a spill of this value */
+#if 0
        if((!defs || !defs->spills) && is_Phi(value)) {
                pset  *spills;
 
@@ -2394,9 +2513,13 @@ insert_reload(spill_ilp_t * si, const ir_node * value, const ir_node * after)
                if(!defs) {
                        defs = set_insert_def(si->values, value);
                }
+               defs->spills = spill;
+               set_irn_link(spill, NULL);
        } else {
                spill = defs->spills;
        }
+#endif
+       spill = defs->spills;
        assert(spill && "no spill placed before reload");
 
        reload = be_reload(arch_env, si->cls, after, get_irn_mode(value), spill);
@@ -2406,7 +2529,6 @@ insert_reload(spill_ilp_t * si, const ir_node * value, const ir_node * after)
        defs->remats = reload;
 
        return reload;
-
 }
 
 static void
@@ -2420,12 +2542,14 @@ walker_spill_placer(ir_node * bb, void * data) {
        set_foreach(spill_bb->ilp, spill) {
                lpp_name_t    *name;
 
-               assert(spill->spill > 0);
-
                if(is_Phi(spill->irn) && get_nodes_block(spill->irn) == bb) {
                        name = si->lpp->vars[spill->mem_in];
                        if(!is_zero(name->value)) {
-                               DBG((si->dbg, LEVEL_2, "\t >>spilled Phi %+F\n", spill->irn));
+                               ir_node   *mem_phi;
+
+                               mem_phi = insert_mem_phi(si, spill->irn);
+
+                               DBG((si->dbg, LEVEL_2, "\t >>spilled Phi %+F -> %+F\n", spill->irn, mem_phi));
                        }
                }
 
@@ -2472,6 +2596,43 @@ walker_spill_placer(ir_node * bb, void * data) {
        del_pset(spills_to_do);
 }
 
+static void
+phim_fixer(spill_ilp_t *si) {
+       defs_t  *defs;
+
+       set_foreach(si->values, defs) {
+               const ir_node  *phi = defs->value;
+               ir_node  *phi_m = NULL;
+               ir_node  *next = defs->spills;
+               int       i,
+                                 n;
+
+               if(!is_Phi(phi)) continue;
+
+               while(next) {
+                       if(is_Phi(next) && get_irn_mode(next) == mode_M) {
+                               phi_m = next;
+                               break;
+                       } else {
+                               next = get_irn_link(next);
+                       }
+               }
+               if(!phi_m) continue;
+
+               for(i=0,n=get_irn_arity(phi); i<n; ++i) {
+                       const ir_node  *value = get_irn_n(phi, i);
+                       defs_t         *val_defs = set_find_def(si->values, value);
+
+                       /* get a spill of this value */
+                       ir_node      *spill = val_defs->spills;
+
+                       assert(spill && "no spill placed before PhiM");
+
+                       set_irn_n(phi_m, i, spill);
+               }
+       }
+}
+
 static void
 walker_reload_placer(ir_node * bb, void * data) {
        spill_ilp_t   *si = (spill_ilp_t*)data;
@@ -2503,25 +2664,34 @@ walker_reload_placer(ir_node * bb, void * data) {
                                        if(!is_zero(name->value)) {
                                                ir_node    *reload;
                                                ir_node    *insert_pos = irn;
-                                               ir_node    *prev = sched_prev(insert_pos);
-                                               op_t       *prev_op = get_irn_link(prev);
+                                               ir_node    *prev = insert_pos;
+                                               op_t       *prev_op;
+
+                                               do {
+                                                       prev = sched_prev(prev);
+                                               } while(be_is_Spill(prev));
+
+                                               prev_op = get_irn_link(prev);
 
                                                /* insert reload before pre-remats */
-                                               while(!sched_is_end(prev) && !be_is_Reload(prev) && !be_is_Spill(prev)
+                                               while(!sched_is_end(prev) && !be_is_Reload(prev) && !is_Phi(prev)
                                                                && prev_op->is_remat && prev_op->attr.remat.pre) {
                                                        insert_pos = prev;
 
-                                                       prev = sched_prev(insert_pos);
+                                                       do {
+                                                               prev = sched_prev(prev);
+                                                       } while(be_is_Spill(prev));
+
                                                        prev_op = get_irn_link(prev);
+
                                                }
 
                                                reload = insert_reload(si, arg, insert_pos);
 
                                                set_irn_n(irn, i, reload);
 
-
-#ifdef KEEPALIVE_SPILLS
-                                               keep_alive(reload);
+#ifdef KEEPALIVE_RELOADS
+                                               pset_insert_ptr(si->spills, reload);
 #endif
                                        }
                                }
@@ -2529,36 +2699,39 @@ walker_reload_placer(ir_node * bb, void * data) {
                }
        }
 
-       i=0;
-       live_foreach(bb, li) {
-               ir_node        *irn = (ir_node *) li->irn;
+       /* reloads at end of block */
+       if(spill_bb->reloads) {
+               i=0;
+               live_foreach(bb, li) {
+                       ir_node        *irn = (ir_node *) li->irn;
 
-               if (live_is_end(li) && has_reg_class(si, irn) && !pset_find_ptr(si->all_possible_remats, irn)) {
-                       lpp_name_t    *name;
+                       if (live_is_end(li) && has_reg_class(si, irn) && !pset_find_ptr(si->all_possible_remats, irn)) {
+                               lpp_name_t    *name;
 
-                       name = si->lpp->vars[spill_bb->reloads[i]];
-                       if(!is_zero(name->value)) {
-                               ir_node    *reload;
-                               ir_node    *insert_pos = bb;
-                               ir_node    *prev = sched_prev(insert_pos);
-                               op_t       *prev_op = get_irn_link(prev);
-
-                               /* insert reload before pre-remats */
-                               while(!sched_is_end(prev) && !be_is_Reload(prev) && !be_is_Spill(prev)
-                                               && prev_op->is_remat && prev_op->attr.remat.pre) {
-                                       insert_pos = prev;
-
-                                       prev = sched_prev(insert_pos);
-                                       prev_op = get_irn_link(prev);
-                               }
+                               name = si->lpp->vars[spill_bb->reloads[i]];
+                               if(!is_zero(name->value)) {
+                                       ir_node    *reload;
+                                       ir_node    *insert_pos = bb;
+                                       ir_node    *prev = sched_prev(insert_pos);
+                                       op_t       *prev_op = get_irn_link(prev);
+
+                                       /* insert reload before pre-remats */
+                                       while(!sched_is_end(prev) && !be_is_Reload(prev) && !be_is_Spill(prev)
+                                                       && prev_op->is_remat && prev_op->attr.remat.pre) {
+                                               insert_pos = prev;
+
+                                               prev = sched_prev(insert_pos);
+                                               prev_op = get_irn_link(prev);
+                                       }
 
-                               reload = insert_reload(si, irn, insert_pos);
+                                       reload = insert_reload(si, irn, insert_pos);
 
-#ifdef KEEPALIVE_SPILLS
-                               keep_alive(reload);
+#ifdef KEEPALIVE_RELOADS
+                                       pset_insert_ptr(si->spills, reload);
 #endif
+                               }
+                               ++i;
                        }
-                       ++i;
                }
        }
 
@@ -2573,12 +2746,17 @@ walker_collect_used(ir_node * irn, void * data)
        lc_bitset_set(used, get_irn_idx(irn));
 }
 
+struct kill_helper {
+       lc_bitset_t  *used;
+       spill_ilp_t  *si;
+};
+
 static void
 walker_kill_unused(ir_node * bb, void * data)
 {
-       lc_bitset_t     *used = data;
-       const ir_node   *bad = get_irg_bad(get_irn_irg(bb));
-       ir_node         *irn;
+       struct kill_helper *kh = data;
+       const ir_node      *bad = get_irg_bad(get_irn_irg(bb));
+       ir_node            *irn;
 
 
        for(irn=sched_first(bb); !sched_is_end(irn);) {
@@ -2586,15 +2764,16 @@ walker_kill_unused(ir_node * bb, void * data)
                int          i,
                                         n;
 
-               if(!lc_bitset_is_set(used, get_irn_idx(irn))) {
-                       assert(!be_is_Spill(irn) && !be_is_Reload(irn) && "something is fishy, spill or remat is unused");
+               if(!lc_bitset_is_set(kh->used, get_irn_idx(irn))) {
+                       if(be_is_Spill(irn) || be_is_Reload(irn)) {
+                               DBG((kh->si->dbg, LEVEL_1, "\t SUBOPTIMAL! %+F IS UNUSED (cost: %g)\n", irn, get_cost(kh->si, irn)*execution_frequency(kh->si, bb)));
+                               assert(lpp_get_sol_state(kh->si->lpp) != lpp_optimal && "optimal solution is suboptimal?");
+                       }
 
                        sched_remove(irn);
 
                        set_nodes_block(irn, bad);
                        for (i = 0, n = get_irn_arity(irn); i < n; ++i) {
-                               ir_node             *arg = get_irn_n(irn, i);
-
                                set_irn_n(irn, i, bad);
                        }
                }
@@ -2605,12 +2784,15 @@ walker_kill_unused(ir_node * bb, void * data)
 static void
 kill_all_unused_values_in_schedule(spill_ilp_t * si)
 {
-       lc_bitset_t   *used = lc_bitset_malloc(get_irg_last_idx(si->chordal_env->irg));
+       struct kill_helper kh;
+
+       kh.used = lc_bitset_malloc(get_irg_last_idx(si->chordal_env->irg));
+       kh.si = si;
 
-       irg_walk_graph(si->chordal_env->irg, walker_collect_used, NULL, used);
-       irg_block_walk_graph(si->chordal_env->irg, walker_kill_unused, NULL, used);
+       irg_walk_graph(si->chordal_env->irg, walker_collect_used, NULL, kh.used);
+       irg_block_walk_graph(si->chordal_env->irg, walker_kill_unused, NULL, &kh);
 
-       lc_bitset_free(used);
+       lc_bitset_free(kh.used);
 }
 
 static void
@@ -2628,6 +2810,9 @@ rewire_uses(spill_ilp_t * si)
 {
        dom_front_info_t     *dfi = be_compute_dominance_frontiers(si->chordal_env->irg);
        defs_t               *defs;
+       pset                 *ignore = pset_new_ptr(1);
+
+       pset_insert_ptr(ignore, get_irg_end(si->chordal_env->irg));
 
        /* then fix uses of spills */
        set_foreach(si->values, defs) {
@@ -2636,30 +2821,29 @@ rewire_uses(spill_ilp_t * si)
                ir_node  *next = defs->remats;
                int remats = 0;
 
-               if(next) {
-                       reloads = pset_new_ptr_default();
+               reloads = pset_new_ptr_default();
 
-                       while(next) {
-                               if(be_is_Reload(next)) {
-                                       pset_insert_ptr(reloads, next);
-                               } else {
-                                       ++remats;
-                               }
-                               next = get_irn_link(next);
+               while(next) {
+                       if(be_is_Reload(next)) {
+                               pset_insert_ptr(reloads, next);
+                       } else {
+                               ++remats;
                        }
+                       next = get_irn_link(next);
+               }
 
-                       spills = get_spills_for_value(si, defs->value);
-                       DBG((si->dbg, LEVEL_2, "\t  %d remats, %d reloads, and %d spills for value %+F\n", remats, pset_count(reloads), pset_count(spills), defs->value));
-                       if(pset_count(spills) > 1) {
-                               assert(pset_count(reloads) > 0);
-//                             print_irn_pset(spills);
-//                             print_irn_pset(reloads);
-                               be_ssa_constr_set_uses(dfi, spills, reloads);
-                       }
+               spills = get_spills_for_value(si, defs->value);
+               DBG((si->dbg, LEVEL_2, "\t  %d remats, %d reloads, and %d spills for value %+F\n", remats, pset_count(reloads), pset_count(spills), defs->value));
+               if(pset_count(spills) > 1) {
+                       //assert(pset_count(reloads) > 0);
+                       //                              print_irn_pset(spills);
+                       //                              print_irn_pset(reloads);
 
-                       del_pset(reloads);
-                       del_pset(spills);
+                       be_ssa_constr_set_ignore(dfi, spills, ignore);
                }
+
+               del_pset(reloads);
+               del_pset(spills);
        }
 
        /* first fix uses of remats and reloads */
@@ -2699,7 +2883,9 @@ writeback_results(spill_ilp_t * si)
 
        DBG((si->dbg, LEVEL_1, "Applying results\n"));
        delete_unnecessary_remats(si);
+       si->m_unknown = new_r_Unknown(si->chordal_env->irg, mode_M);
        irg_block_walk_graph(si->chordal_env->irg, walker_spill_placer, NULL, si);
+       phim_fixer(si);
        irg_block_walk_graph(si->chordal_env->irg, walker_reload_placer, NULL, si);
 
        /* clean the remat info! there are still back-edges leading there! */
@@ -2707,6 +2893,8 @@ writeback_results(spill_ilp_t * si)
 
        rewire_uses(si);
 
+       connect_all_spills_with_keep(si);
+
        del_set(si->values);
 }
 
@@ -2731,12 +2919,12 @@ static void
 walker_reload_mover(ir_node * bb, void * data)
 {
        spill_ilp_t   *si = data;
-       ir_node           *irn;
+       ir_node           *tmp;
 
-       sched_foreach(bb, irn) {
-               if(be_is_Reload(irn) && has_reg_class(si, irn)) {
-                       ir_node       *reload = irn;
-                       ir_node       *irn = irn;
+       sched_foreach(bb, tmp) {
+               if(be_is_Reload(tmp) && has_reg_class(si, tmp)) {
+                       ir_node       *reload = tmp;
+                       ir_node       *irn = tmp;
 
                        /* move reload upwards */
 
@@ -2750,7 +2938,7 @@ walker_reload_mover(ir_node * bb, void * data)
                                while(pressure < si->n_regs) {
                                        if(sched_is_end(irn) || (be_is_Reload(irn) && has_reg_class(si, irn))) break;
 
-                                       set_irn_link(irn, (void*)(pressure+1));
+                                       set_irn_link(irn, INT_TO_PTR(pressure+1));
                                        DBG((si->dbg, LEVEL_5, "new regpressure before %+F: %d\n", irn, pressure+1));
                                        irn = sched_prev(irn);
 
@@ -2796,9 +2984,12 @@ be_spill_remat(const be_chordal_env_t * chordal_env)
        si.lpp = new_lpp(problem_name, lpp_minimize);
        si.remat_info = new_set(cmp_remat_info, 4096);
        si.all_possible_remats = pset_new_ptr_default();
+       si.spills = pset_new_ptr_default();
        si.inverse_ops = pset_new_ptr_default();
 #ifndef EXECFREQ_LOOPDEPH
        si.execfreqs = compute_execfreq(chordal_env->irg);
+#else
+       si.execfreqs = NULL;
 #endif
 #ifdef KEEPALIVE
        si.keep = NULL;
@@ -2855,7 +3046,9 @@ be_spill_remat(const be_chordal_env_t * chordal_env)
 
 #ifdef SOLVE
        DBG((si.dbg, LEVEL_1, "\tSolving %F\n", chordal_env->irg));
+#ifdef ILP_TIMEOUT
        lpp_set_time_limit(si.lpp, ILP_TIMEOUT);
+#endif
 
 #ifdef SOLVE_LOCAL
        lpp_solve_cplex(si.lpp);
@@ -2865,7 +3058,7 @@ be_spill_remat(const be_chordal_env_t * chordal_env)
        assert(lpp_is_sol_valid(si.lpp)
               && "solution of ILP must be valid");
 
-       DBG((si.dbg, LEVEL_1, "\t%s: iterations: %d, solution time: %g, objective function: %g\n", problem_name, si.lpp->iterations, si.lpp->sol_time, si.lpp->objval));
+       DBG((si.dbg, LEVEL_1, "\t%s: iterations: %d, solution time: %g, objective function: %g\n", problem_name, si.lpp->iterations, si.lpp->sol_time, is_zero(si.lpp->objval)?0.0:si.lpp->objval));
 
 #ifdef DUMP_SOLUTION
        {
@@ -2890,6 +3083,10 @@ be_spill_remat(const be_chordal_env_t * chordal_env)
 
        kill_all_unused_values_in_schedule(&si);
 
+#if defined(KEEPALIVE_SPILLS) || defined(KEEPALIVE_RELOADS)
+       be_dump(chordal_env->irg, "-spills-placed", dump_ir_block_graph);
+#endif
+
        be_liveness(chordal_env->irg);
        irg_block_walk_graph(chordal_env->irg, walker_pressure_annotator, NULL, &si);
 
@@ -2907,8 +3104,9 @@ be_spill_remat(const be_chordal_env_t * chordal_env)
        free_dom(chordal_env->irg);
        del_pset(si.inverse_ops);
        del_pset(si.all_possible_remats);
+       del_pset(si.spills);
 #ifndef EXECFREQ_LOOPDEPH
-       del_set(si.execfreqs);
+       free_execfreq(si.execfreqs);
 #endif
        free_lpp(si.lpp);
        obstack_free(&obst, NULL);