amd64: small changes w.r.t. stack alignment.
[libfirm] / ir / be / bespillbelady2.c
index 58fd6e4..4b2386f 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 1995-2007 University of Karlsruhe.  All right reserved.
+ * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
  *
  * This file is part of libFirm.
  *
@@ -32,9 +32,7 @@
  *   capacity of the blocks to let global variables live through
  *   them.
  */
-#ifdef HAVE_CONFIG_H
 #include "config.h"
-#endif
 
 #include <math.h>
 #include <limits.h>
 #include "irprintf.h"
 #include "execfreq.h"
 #include "dfs_t.h"
-#include "xmalloc.h"
 
 #include "beutil.h"
-#include "bearch_t.h"
-#include "bespillbelady.h"
-#include "besched_t.h"
+#include "bearch.h"
+#include "besched.h"
 #include "beirgmod.h"
 #include "belive_t.h"
-#include "benode_t.h"
+#include "benode.h"
 #include "bechordal_t.h"
-#include "bespilloptions.h"
+#include "bespill.h"
 #include "beloopana.h"
-#include "beirg_t.h"
+#include "beirg.h"
 #include "bemodule.h"
+#include "bespillutil.h"
 
-#include <libcore/lc_opts.h>
-#include <libcore/lc_opts_enum.h>
-#include <libcore/lc_timing.h>
+#include "lc_opts.h"
+#include "lc_opts_enum.h"
 
 #define DBG_SPILL     1
 #define DBG_WSETS     2
@@ -128,15 +124,16 @@ typedef struct _belady_env_t {
        be_lv_t *lv;
        ir_exec_freq *ef;
 
-       ir_node **blocks;   /**< Array of all blocks. */
-       int n_blocks;       /**< Number of blocks in the graph. */
-       int n_regs;                     /**< number of regs in this reg-class */
-       workset_t *ws;          /**< the main workset used while processing a block. ob-allocated */
-       ir_node *instr;         /**< current instruction */
-       int instr_nr;           /**< current instruction number (relative to block start) */
+       ir_node **blocks;            /**< Array of all blocks. */
+       int n_blocks;                /**< Number of blocks in the graph. */
+       int n_regs;                              /**< number of regs in this reg-class */
+       workset_t *ws;                   /**< the main workset used while processing a block. ob-allocated */
+       ir_node *instr;                  /**< current instruction */
+       int instr_nr;                    /**< current instruction number (relative to block start) */
 
-       spill_env_t *senv;      /**< see bespill.h */
-       bitset_t *spilled;  /**< bitset to keep all the irns which have already been spilled. */
+       spill_env_t *senv;               /**< see bespill.h */
+       bitset_t *spilled;           /**< bitset to keep all the irns which have already been spilled. */
+       ir_nodeset_t *extra_spilled; /** All nodes for which a special spill location has been computed. */
 } belady_env_t;
 
 
@@ -147,11 +144,11 @@ static int loc_compare(const void *a, const void *b)
        return (p->time > q->time) - (p->time < q->time);
 }
 
-static INLINE void workset_print(const workset_t *w)
+static inline void workset_print(const workset_t *w)
 {
        int i;
 
-       for(i = 0; i < w->len; ++i) {
+       for (i = 0; i < w->len; ++i) {
                ir_fprintf(stderr, "%+F %d\n", w->vals[i].irn, w->vals[i].time);
        }
 }
@@ -159,22 +156,18 @@ static INLINE void workset_print(const workset_t *w)
 /**
  * Alloc a new workset on obstack @p ob with maximum size @p max
  */
-static INLINE workset_t *new_workset(belady_env_t *env, struct obstack *ob) {
-       workset_t *res;
-       size_t size = sizeof(*res) + (env->n_regs)*sizeof(res->vals[0]);
-       res = obstack_alloc(ob, size);
-       memset(res, 0, size);
-       return res;
+static inline workset_t *new_workset(belady_env_t *env, struct obstack *ob)
+{
+       return OALLOCFZ(ob, workset_t, vals, env->n_regs);
 }
 
 /**
  * Alloc a new instance on obstack and make it equal to @param ws
  */
-static INLINE workset_t *workset_clone(belady_env_t *env, struct obstack *ob, workset_t *ws) {
-       workset_t *res;
-       size_t size = sizeof(*res) + (env->n_regs)*sizeof(res->vals[0]);
-       res = obstack_alloc(ob, size);
-       memcpy(res, ws, size);
+static inline workset_t *workset_clone(belady_env_t *env, struct obstack *ob, workset_t *ws)
+{
+       workset_t *res = OALLOCF(ob, workset_t, vals, env->n_regs);
+       memcpy(res, ws, sizeof(*res) + (env->n_regs)*sizeof(res->vals[0]));
        return res;
 }
 
@@ -182,7 +175,8 @@ static INLINE workset_t *workset_clone(belady_env_t *env, struct obstack *ob, wo
  * Do NOT alloc anything. Make @param tgt equal to @param src.
  * returns @param tgt for convenience
  */
-static INLINE workset_t *workset_copy(belady_env_t *env, workset_t *tgt, workset_t *src) {
+static inline workset_t *workset_copy(belady_env_t *env, workset_t *tgt, workset_t *src)
+{
        size_t size = sizeof(*src) + (env->n_regs)*sizeof(src->vals[0]);
        memcpy(tgt, src, size);
        return tgt;
@@ -193,7 +187,8 @@ static INLINE workset_t *workset_copy(belady_env_t *env, workset_t *tgt, workset
  * @param count locations given at memory @param locs.
  * Set the length of @param ws to count.
  */
-static INLINE void workset_bulk_fill(workset_t *workset, int count, const loc_t *locs) {
+static inline void workset_bulk_fill(workset_t *workset, int count, const loc_t *locs)
+{
        workset->len = count;
        memcpy(&(workset->vals[0]), locs, count * sizeof(locs[0]));
 }
@@ -202,16 +197,17 @@ static INLINE void workset_bulk_fill(workset_t *workset, int count, const loc_t
  * Inserts the value @p val into the workset, iff it is not
  * already contained. The workset must not be full.
  */
-static INLINE void workset_insert(belady_env_t *env, workset_t *ws, ir_node *val) {
+static inline void workset_insert(belady_env_t *env, workset_t *ws, ir_node *val)
+{
        int i;
        /* check for current regclass */
-       if (!arch_irn_consider_in_reg_alloc(env->arch, env->cls, val)) {
+       if (!arch_irn_consider_in_reg_alloc(env->cls, val)) {
                // DBG((dbg, DBG_WORKSET, "Skipped %+F\n", val));
                return;
        }
 
        /* check if val is already contained */
-       for(i=0; i<ws->len; ++i)
+       for (i=0; i<ws->len; ++i)
                if (ws->vals[i].irn == val)
                        return;
 
@@ -223,16 +219,18 @@ static INLINE void workset_insert(belady_env_t *env, workset_t *ws, ir_node *val
 /**
  * Removes all entries from this workset
  */
-static INLINE void workset_clear(workset_t *ws) {
+static inline void workset_clear(workset_t *ws)
+{
        ws->len = 0;
 }
 
 /**
  * Removes the value @p val from the workset if present.
  */
-static INLINE void workset_remove(workset_t *ws, ir_node *val) {
+static inline void workset_remove(workset_t *ws, ir_node *val)
+{
        int i;
-       for(i=0; i<ws->len; ++i) {
+       for (i=0; i<ws->len; ++i) {
                if (ws->vals[i].irn == val) {
                        ws->vals[i] = ws->vals[--ws->len];
                        return;
@@ -240,9 +238,10 @@ static INLINE void workset_remove(workset_t *ws, ir_node *val) {
        }
 }
 
-static INLINE int workset_get_index(const workset_t *ws, const ir_node *val) {
+static inline int workset_get_index(const workset_t *ws, const ir_node *val)
+{
        int i;
-       for(i=0; i<ws->len; ++i) {
+       for (i=0; i<ws->len; ++i) {
                if (ws->vals[i].irn == val)
                        return i;
        }
@@ -256,7 +255,7 @@ static INLINE int workset_get_index(const workset_t *ws, const ir_node *val) {
  * @p v  A variable to put the current value in
  * @p i  An integer for internal use
  */
-#define workset_foreach(ws, v, i)      for(i=0; \
+#define workset_foreach(ws, v, i)      for (i=0; \
                                                                                v=(i < ws->len) ? ws->vals[i].irn : NULL, i < ws->len; \
                                                                                ++i)
 
@@ -292,21 +291,22 @@ typedef struct _block_info_t {
                                                           real (non-phi) node. At the beginning
                                                           of the global pass, this is 0. */
        struct list_head br_head; /**< List head for all bring_in variables. */
+       int free_at_jump;         /**< registers free at jump. */
 
 } block_info_t;
 
-static INLINE void *new_block_info(belady_env_t *bel, int id)
+static inline void *new_block_info(belady_env_t *bel, int id)
 {
        ir_node      *bl  = bel->blocks[id];
-       block_info_t *res = obstack_alloc(&bel->ob, sizeof(*res));
-       memset(res, 0, sizeof(res[0]));
+       block_info_t *res = OALLOCZ(&bel->ob, block_info_t);
        res->first_non_in = NULL;
-       res->last_ins = NULL;
-       res->bel = bel;
-       res->bl  = bl;
-       res->id  = id;
+       res->last_ins     = NULL;
+       res->bel          = bel;
+       res->bl           = bl;
+       res->id           = id;
        res->exec_freq    = get_block_execfreq(bel->ef, bl);
-       res->reload_cost  = bel->arch->isa->reload_cost * res->exec_freq;
+       res->reload_cost  = bel->arch->reload_cost * res->exec_freq;
+       res->free_at_jump = bel->n_regs;
        INIT_LIST_HEAD(&res->br_head);
        set_irn_link(bl, res);
        return res;
@@ -315,7 +315,7 @@ static INLINE void *new_block_info(belady_env_t *bel, int id)
 #define get_block_info(block)        ((block_info_t *)get_irn_link(block))
 #define set_block_info(block, info)  set_irn_link(block, info)
 
-static INLINE ir_node *block_info_get_last_ins(block_info_t *bi)
+static inline ir_node *block_info_get_last_ins(block_info_t *bi)
 {
        if (!bi->last_ins)
                bi->last_ins = be_get_end_of_block_insertion_point(bi->bl);
@@ -334,7 +334,7 @@ typedef struct _next_use_t {
                                                                 or NULL. */
 } next_use_t;
 
-static void *next_use_init(ir_phase *phase, ir_node *irn, void *old)
+static void *next_use_init(ir_phase *phase, const ir_node *irn, void *old)
 {
        (void) phase;
        (void) irn;
@@ -348,7 +348,7 @@ static void build_next_uses(block_info_t *bi)
 
        sched_renumber(bi->bl);
 
-       phase_init(&bi->next_uses, "next uses", bi->bel->irg, PHASE_DEFAULT_GROWTH, next_use_init, NULL);
+       phase_init(&bi->next_uses, bi->bel->irg, next_use_init);
        sched_foreach_reverse(bi->bl, irn) {
                int i;
 
@@ -377,7 +377,7 @@ static void build_next_uses(block_info_t *bi)
 
 #define get_current_use(bi, irn)        phase_get_irn_data(&(bi)->next_uses, (irn))
 
-static INLINE void advance_current_use(block_info_t *bi, const ir_node *irn)
+static inline void advance_current_use(block_info_t *bi, const ir_node *irn)
 {
        next_use_t *use = get_current_use(bi, irn);
 
@@ -432,17 +432,18 @@ struct _bring_in_t {
        block_info_t *bi;          /**< The block to which bring in should happen. */
        int pressure_so_far;       /**< The maximal pressure till the first use of irn in bl. */
        ir_node *first_use;        /**< The first user of irn in bl. */
-       sched_timestep_t use_step; /**< Schedule sttep of the first use. */
+       sched_timestep_t use_step; /**< Schedule step of the first use. */
 
        int is_remat : 1;          /**< Is rematerializable. */
        int sect_pressure;         /**< Offset to maximum pressure in block. */
        struct list_head list;
+       struct list_head sect_list;
+       bring_in_t *sect_head;
 };
 
-static INLINE bring_in_t *new_bring_in(block_info_t *bi, ir_node *irn, const next_use_t *use)
+static inline bring_in_t *new_bring_in(block_info_t *bi, ir_node *irn, const next_use_t *use)
 {
-       bring_in_t *br    = obstack_alloc(&bi->bel->ob, sizeof(br[0]));
-
+       bring_in_t *br      = OALLOC(&bi->bel->ob, bring_in_t);
        br->irn             = irn;
        br->bi              = bi;
        br->first_use       = use->irn;
@@ -450,8 +451,10 @@ static INLINE bring_in_t *new_bring_in(block_info_t *bi, ir_node *irn, const nex
        br->is_remat        = be_is_rematerializable(bi->bel->senv, irn, use->irn);
        br->pressure_so_far = bi->pressure;
        br->sect_pressure   = bi->front_pressure;
+       br->sect_head       = br;
 
        INIT_LIST_HEAD(&br->list);
+       INIT_LIST_HEAD(&br->sect_list);
        list_add_tail(&br->list, &bi->br_head);
        return br;
 }
@@ -493,17 +496,17 @@ static int bring_in_cmp(const void *a, const void *b)
        return (fq > fp) - (fq < fp);
 }
 
-static INLINE unsigned get_curr_distance(block_info_t *bi, const ir_node *irn, int is_usage)
+static inline unsigned get_curr_distance(block_info_t *bi, const ir_node *irn, int is_usage)
 {
        belady_env_t *env          = bi->bel;
        sched_timestep_t curr_step = sched_get_time_step(env->instr);
        next_use_t *use            = get_current_use(bi, irn);
-       int flags                  = arch_irn_get_flags(env->arch, irn);
+       int flags                  = arch_irn_get_flags(irn);
 
-       assert(!(flags & arch_irn_flags_ignore));
+       assert(!arch_irn_is_ignore(irn));
 
-       /* We have to keep nonspillable nodes in the workingset */
-       if(flags & arch_irn_flags_dont_spill)
+       /* We have to keep non-spillable nodes in the working set */
+       if (flags & arch_irn_flags_dont_spill)
                return 0;
 
        if (!is_usage && use && use->step == curr_step)
@@ -527,7 +530,7 @@ static INLINE unsigned get_curr_distance(block_info_t *bi, const ir_node *irn, i
        return be_is_live_end(env->lv, bi->bl, irn) ? LIVE_END : DEAD;
 }
 
-static INLINE int is_local_phi(const ir_node *bl, const ir_node *irn)
+static inline int is_local_phi(const ir_node *bl, const ir_node *irn)
 {
        return is_Phi(irn) && get_nodes_block(irn) == bl;
 }
@@ -540,10 +543,10 @@ static INLINE int is_local_phi(const ir_node *bl, const ir_node *irn)
  * @param irn  The node in question.
  * @return     1, if node is something transported into @p bl, 0 if not.
  * @note       The function will only give correct answers in the case
- *             where @p irn is unsed in the block @p bl which is always
+ *             where @p irn is unused in the block @p bl which is always
  *             the case in our usage scenario.
  */
-static INLINE int is_transport_in(const ir_node *bl, const ir_node *irn)
+static inline int is_transport_in(const ir_node *bl, const ir_node *irn)
 {
        return get_nodes_block(irn) != bl || is_Phi(irn);
 }
@@ -557,10 +560,11 @@ static INLINE int is_transport_in(const ir_node *bl, const ir_node *irn)
  * @p is_usage indicates that the values in new_vals are used (not defined)
  * In this case reloads must be performed
  */
-static void displace(block_info_t *bi, workset_t *new_vals, int is_usage) {
+static void displace(block_info_t *bi, workset_t *new_vals, int is_usage)
+{
        belady_env_t *env       = bi->bel;
        workset_t    *ws        = env->ws;
-       ir_node     **to_insert = alloca(env->n_regs * sizeof(to_insert[0]));
+       ir_node     **to_insert = ALLOCAN(ir_node*, env->n_regs);
 
        int i, len, max_allowed, demand, iter;
        ir_node *val;
@@ -604,7 +608,7 @@ static void displace(block_info_t *bi, workset_t *new_vals, int is_usage) {
                                }
                        }
                } else {
-                       assert(is_usage || "Defined value already in workset?!?");
+                       assert(is_usage && "Defined value already in workset?!?");
                        DBG((dbg, DBG_DECIDE, "\t\tskip %+F\n", val));
                }
        }
@@ -653,7 +657,8 @@ static void displace(block_info_t *bi, workset_t *new_vals, int is_usage) {
  * whether it is used from a register or is reloaded
  * before the use.
  */
-static void belady(belady_env_t *env, int id) {
+static void belady(belady_env_t *env, int id)
+{
        block_info_t *block_info = new_block_info(env, id);
        const ir_node *block     = block_info->bl;
 
@@ -676,7 +681,7 @@ static void belady(belady_env_t *env, int id) {
                int i, arity;
                assert(workset_get_length(env->ws) <= env->n_regs && "Too much values in workset!");
 
-               /* projs are handled with the tuple value.
+               /* Projs are handled with the tuple value.
                 * Phis are no real instr (see insert_starters())
                 * instr_nr does not increase */
                if (is_Proj(irn) || is_Phi(irn))
@@ -691,7 +696,7 @@ static void belady(belady_env_t *env, int id) {
 
                /* allocate all values _used_ by this instruction */
                workset_clear(new_vals);
-               for(i = 0, arity = get_irn_arity(irn); i < arity; ++i) {
+               for (i = 0, arity = get_irn_arity(irn); i < arity; ++i) {
                        workset_insert(env, new_vals, get_irn_n(irn, i));
                }
                DBG((dbg, DBG_DECIDE, "\t* uses\n"));
@@ -703,7 +708,7 @@ static void belady(belady_env_t *env, int id) {
                 * augmenting the pressure. Note, that a variable is dead
                 * if it has no further use in this block and is *not* live end
                 */
-               for(i = 0, arity = get_irn_arity(irn); i < arity; ++i) {
+               for (i = 0, arity = get_irn_arity(irn); i < arity; ++i) {
                        ir_node *op = get_irn_n(irn, i);
                        next_use_t *use = get_current_use(block_info, op);
 
@@ -716,7 +721,7 @@ static void belady(belady_env_t *env, int id) {
 
                /* allocate all values _defined_ by this instruction */
                workset_clear(new_vals);
-               if (get_irn_mode(irn) == mode_T) { /* special handling for tuples and projs */
+               if (get_irn_mode(irn) == mode_T) { /* special handling for Tuples and Projs */
                        const ir_edge_t *edge;
 
                        foreach_out_edge(irn, edge) {
@@ -729,10 +734,17 @@ static void belady(belady_env_t *env, int id) {
                DBG((dbg, DBG_DECIDE, "\t* defs\n"));
                displace(block_info, new_vals, 0);
 
+               if (is_op_forking(get_irn_op(env->instr))) {
+                       for (i = get_irn_arity(env->instr) - 1; i >= 0; --i) {
+                               ir_node *op = get_irn_n(env->instr, i);
+                               block_info->free_at_jump -= arch_irn_consider_in_reg_alloc(env->cls, op);
+                       }
+               }
+
                env->instr_nr++;
        }
 
-       phase_free(&block_info->next_uses);
+       phase_deinit(&block_info->next_uses);
 
        /* Remember end-workset for this block */
        block_info->ws_end = workset_clone(env, &env->ob, env->ws);
@@ -805,14 +817,14 @@ typedef struct {
        irn_action_t  *ia_top;
 } rollback_info_t;
 
-static INLINE block_state_t *get_block_state(global_end_state_t *ges, const block_info_t *bi)
+static inline block_state_t *get_block_state(global_end_state_t *ges, const block_info_t *bi)
 {
        int id = bi->id;
        assert(!ver_is_younger(ges->bs_tops_vers[id], ges->version));
        return ver_is_older(ges->bs_tops_vers[id], ges->version) ? NULL : ges->bs_tops[bi->id];
 }
 
-static INLINE const workset_t *get_end_state(global_end_state_t *ges, block_info_t *bi)
+static inline const workset_t *get_end_state(global_end_state_t *ges, block_info_t *bi)
 {
        block_state_t *bs = get_block_state(ges, bi);
        return bs ? bs->end_state : bi->ws_end;
@@ -821,7 +833,7 @@ static INLINE const workset_t *get_end_state(global_end_state_t *ges, block_info
 static block_state_t *new_block_state(global_end_state_t *ges, block_info_t *bi)
 {
        block_state_t *bs = get_block_state(ges, bi);
-       block_state_t *nw = obstack_alloc(&ges->obst, sizeof(nw[0]));
+       block_state_t *nw = OALLOC(&ges->obst, block_state_t);
 
        nw->next_intern = bs;
        nw->next        = ges->bs_top;
@@ -844,7 +856,7 @@ static block_state_t *new_block_state(global_end_state_t *ges, block_info_t *bi)
 
 static irn_action_t *new_irn_action(global_end_state_t *ges, ir_node *irn, const ir_node *bl)
 {
-       irn_action_t *ia = obstack_alloc(&ges->obst, sizeof(ia[0]));
+       irn_action_t *ia = OALLOC(&ges->obst, irn_action_t);
 
        ia->irn  = irn;
        ia->bl   = bl;
@@ -854,7 +866,7 @@ static irn_action_t *new_irn_action(global_end_state_t *ges, ir_node *irn, const
        return ia;
 }
 
-static INLINE rollback_info_t trans_begin(global_end_state_t *ges)
+static inline rollback_info_t trans_begin(global_end_state_t *ges)
 {
        rollback_info_t rb;
        rb.obst_level = obstack_base(&ges->obst);
@@ -863,7 +875,7 @@ static INLINE rollback_info_t trans_begin(global_end_state_t *ges)
        return rb;
 }
 
-static INLINE void trans_rollback(global_end_state_t *ges, rollback_info_t *rb)
+static inline void trans_rollback(global_end_state_t *ges, rollback_info_t *rb)
 {
        block_state_t *bs;
 
@@ -951,7 +963,7 @@ static double can_make_available_at_end(global_end_state_t *ges, ir_node *bl, ir
         */
 
        {
-               int n_regs = bi->bel->n_regs;
+               int n_regs = bi->free_at_jump;
                int len  = workset_get_length(end);
                int slot = -1;
                int i;
@@ -983,7 +995,7 @@ static double can_make_available_at_end(global_end_state_t *ges, ir_node *bl, ir
 
                /*
                 * finally there is some room. we can at least reload the value.
-                * but we will try to let ot live through anyhow.
+                * but we will try to let or live through anyhow.
                 */
                if (slot >= 0) {
                        irn_action_t *vs    = new_irn_action(ges, irn, bi->bl);
@@ -1054,7 +1066,6 @@ static double can_bring_in(global_end_state_t *ges, ir_node *bl, ir_node *irn, d
 
        if (is_transport_in(bl, irn)) {
                int i, n           = get_irn_arity(bl);
-               ir_node **nodes    = alloca(get_irn_arity(bl) * sizeof(nodes[0]));
                rollback_info_t rb = trans_begin(ges);
 
                glob_costs = 0.0;
@@ -1064,10 +1075,10 @@ static double can_bring_in(global_end_state_t *ges, ir_node *bl, ir_node *irn, d
                        double c;
 
                        /*
-                        * there might by unknwons as operands of phis in that case
+                        * there might by Unknowns as operands of Phis in that case
                         * we set the costs to zero, since they won't get spilled.
                         */
-                       if (arch_irn_consider_in_reg_alloc(env->arch, env->cls, op))
+                       if (arch_irn_consider_in_reg_alloc(env->cls, op))
                                c = can_make_available_at_end(ges, pr, op, limit - glob_costs, level + 1);
                        else
                                c = 0.0;
@@ -1101,9 +1112,18 @@ static void materialize_and_commit_end_state(global_end_state_t *ges)
        for (ia = ges->ia_top; ia != NULL; ia = ia->next) {
                switch (ia->act) {
                        case irn_act_live_through:
-                               if (is_local_phi(ia->bl, ia->irn)) {
-                                       bitset_add_irn(ges->succ_phis, ia->irn);
-                                       DBG((dbg, DBG_GLOBAL, "\t\tlive through phi kept alive: %+F\n", ia->irn));
+                               {
+                                       block_info_t *bi = get_block_info(ia->bl);
+                                       bring_in_t *iter;
+
+                                       if (is_local_phi(ia->bl, ia->irn)) {
+                                               bitset_add_irn(ges->succ_phis, ia->irn);
+                                               DBG((dbg, DBG_GLOBAL, "\t\tlive through phi kept alive: %+F\n", ia->irn));
+                                       }
+
+                                       list_for_each_entry_reverse(bring_in_t, iter, &bi->br_head, list)
+                                               ++iter->sect_pressure;
+                                       ++bi->front_pressure;
                                }
                                break;
                        case irn_act_reload:
@@ -1128,7 +1148,6 @@ static void materialize_and_commit_end_state(global_end_state_t *ges)
                        DBG((dbg, DBG_GLOBAL, "\t\told pressure: %d, new pressure: %d, end length: %d\n",
                                                bi->pressure, bs->pressure, workset_get_length(bs->end_state)));
                        bi->pressure = bs->pressure;
-                       /* TODO: commit front pressure */
                        bitset_set(ges->committed, bi->id);
                }
        }
@@ -1195,7 +1214,7 @@ static int get_block_max_pressure(const block_info_t *bi)
  */
 static void optimize_variable(global_end_state_t *ges, bring_in_t *br)
 {
-       block_info_t *bi    = br->bi;
+       block_info_t *bi          = br->bi;
        ir_node *irn              = br->irn;
        ir_node *bl               = bi->bl;
        belady_env_t *env         = ges->env;
@@ -1208,19 +1227,23 @@ static void optimize_variable(global_end_state_t *ges, bring_in_t *br)
        assert(front_pressure <= k);
        assert(pressure_upto_use <= k);
 
-       DBG((dbg, DBG_GLOBAL, "fixing %+F at %+F (%f), front pr: %d, pr to use: %d, first use: %u\n",
+       DBG((dbg, DBG_GLOBAL, "fixing %+F at %+F (%f), front pr: %d, pr to use: %d, first use: %x\n",
                                irn, bl, bi->exec_freq, front_pressure, pressure_upto_use, br->first_use));
 
+       // assert(!is_local_phi(bl, irn) || !bitset_contains_irn(ges->succ_phis, irn));
+
        /*
-        * if we cannot bring the value to the use, let's see ifit would be worthwhile
+        * if we cannot bring the value to the use, let's see if it would be worthwhile
         * to bring the value to the beginning of the block to have a better spill
         * location.
         *
         * better _spilled_here will return a node where the value can be spilled after
         * or NULL if this block does not provide a better spill location.
         */
-       if (pressure_upto_use >= k && front_pressure < k)
+#if 1
+       if (pressure_upto_use >= k && front_pressure < k && !bitset_contains_irn(env->spilled, irn))
                better_spill_loc = better_spilled_here(br);
+#endif
 
        /*
         * If either we can bring the value to the use or we should try
@@ -1247,7 +1270,7 @@ static void optimize_variable(global_end_state_t *ges, bring_in_t *br)
                 *
                 * If the second is larger than the first,
                 * we have to increment the total block pressure and hence
-                * save the old pressure to restire it in case of failing to
+                * save the old pressure to restore it in case of failing to
                 * bring the variable into the block in a register.
                 */
                trans = trans_begin(ges);
@@ -1269,7 +1292,7 @@ static void optimize_variable(global_end_state_t *ges, bring_in_t *br)
                 *
                 * following actions can be taken:
                 * a) commit changes
-                * b) mark phi as succeded if node was phi
+                * b) mark phi as succeeded if node was phi
                 * c) insert reload at use location
                 * d) give a spill location hint
                 *
@@ -1283,6 +1306,7 @@ static void optimize_variable(global_end_state_t *ges, bring_in_t *br)
 
                /* the costs were acceptable... */
                if (bring_in_costs < local_costs) {
+                       int check = 0;
                        bring_in_t *iter;
 
                        /*
@@ -1295,37 +1319,42 @@ static void optimize_variable(global_end_state_t *ges, bring_in_t *br)
                        if (is_local_phi(bl, irn))
                                bitset_add_irn(ges->succ_phis, irn);
 
-                       pressure_inc = bi->pressure - pressure_inc;
-                       assert(pressure_inc >= 0);
-
-                       DBG((dbg, DBG_GLOBAL, "\t-> bring it in\n"));
+                       DBG((dbg, DBG_GLOBAL, "\t-> bring it in.", pressure_inc));
 
                        /* second half of case 2 */
                        if (pressure_upto_use >= k) {
                                DBG((dbg, DBG_GLOBAL, "\t-> use blocked. local reload: %+F, try spill at: %+F\n",
                                                        br->first_use, better_spill_loc));
-                               be_add_reload2(env->senv, irn, br->first_use, better_spill_loc, env->cls, 1);
+                               be_add_reload(env->senv, irn, br->first_use, env->cls, 1);
+                               be_add_spill(env->senv, irn, better_spill_loc);
+                               ir_nodeset_insert(env->extra_spilled, irn);
                        }
 
                        /*
-                        * go from the last bring in use to the first and add all the variabled
+                        * go from the last bring in use to the first and add all the variables
                         * which additionally live through the block to their pressure.
                         * at the point were the actually treated use is, we have to increase
-                        * the pressure by one more as the nrought in value starts to count.
+                        * the pressure by one more as the brought in value starts to count.
                         * Finally, adjust the front pressure as well.
                         */
+                       pressure_inc = 0;
                        list_for_each_entry_reverse(bring_in_t, iter, &bi->br_head, list) {
                                if (iter == br)
                                        pressure_inc += pressure_upto_use < k;
                                iter->sect_pressure += pressure_inc;
+                               check = MAX(check, iter->sect_pressure);
+                               DBG((dbg, DBG_GLOBAL, "\tinc section pressure of %+F by %d to %d\n", iter->first_use, pressure_inc, iter->sect_pressure));
                        }
                        bi->front_pressure += pressure_inc;
+                       assert(MAX(check, bi->front_pressure) <= bi->pressure);
+                       DBG((dbg, DBG_GLOBAL, "\t-> result: p: %d, fp: %d\n", bi->pressure, bi->front_pressure));
                }
 
                /* case 3: nothing worked. insert normal reload and rollback. */
                else {
                        DBG((dbg, DBG_GLOBAL, "\t-> bring in was too expensive. local reload: %+F\n", br->first_use));
                        be_add_reload(env->senv, irn, br->first_use, env->cls, 1);
+                       bitset_add_irn(env->spilled, irn);
                        trans_rollback(ges, &trans);
                }
        }
@@ -1334,6 +1363,7 @@ static void optimize_variable(global_end_state_t *ges, bring_in_t *br)
        else {
                DBG((dbg, DBG_GLOBAL, "\t-> can\'t do anything but reload before %+F\n", br->first_use));
                be_add_reload(env->senv, irn, br->first_use, env->cls, 1);
+               bitset_add_irn(env->spilled, irn);
        }
 
        DBG((dbg, DBG_GLOBAL, "\n"));
@@ -1363,10 +1393,15 @@ static bring_in_t **determine_global_order(belady_env_t *env)
        return res;
 }
 
+
+
+
 static void global_assign(belady_env_t *env)
 {
+       ir_nodeset_iterator_t iter;
        global_end_state_t ges;
        bring_in_t **br;
+       ir_node *irn;
        int i, j;
 
        /*
@@ -1381,8 +1416,8 @@ static void global_assign(belady_env_t *env)
        ges.version      = ver_make_newer(ver_oldest);
        ges.succ_phis    = bitset_irg_obstack_alloc(&ges.obst, env->irg);
        ges.committed    = bitset_obstack_alloc(&ges.obst, env->n_blocks);
-       ges.bs_tops      = obstack_alloc(&ges.obst, sizeof(ges.bs_tops[0])      * env->n_blocks);
-       ges.bs_tops_vers = obstack_alloc(&ges.obst, sizeof(ges.bs_tops_vers[0]) * env->n_blocks);
+       ges.bs_tops      = OALLOCN(&ges.obst, block_state_t*, env->n_blocks);
+       ges.bs_tops_vers = OALLOCN(&ges.obst, unsigned,       env->n_blocks);
 
        /* invalidate all state stack pointer versions */
        for (i = 0; i < env->n_blocks; ++i) {
@@ -1394,7 +1429,7 @@ static void global_assign(belady_env_t *env)
                        workset_set_version(bi->ws_end, j, ver_youngest);
        }
 
-       /* determine ordeer and optimize them */
+       /* determine order and optimize them */
        for (br = determine_global_order(env); *br; ++br)
                optimize_variable(&ges, *br);
 
@@ -1410,11 +1445,15 @@ static void global_assign(belady_env_t *env)
                        if (!is_Phi(irn))
                                break;
 
-                       if (arch_irn_consider_in_reg_alloc(env->arch, env->cls, irn)
+                       if (arch_irn_consider_in_reg_alloc(env->cls, irn)
                                        && !bitset_contains_irn(ges.succ_phis, irn))
                                be_spill_phi(env->senv, irn);
                }
        }
+
+       /* check dominance for specially spilled nodes. */
+       foreach_ir_nodeset (env->extra_spilled, irn, iter)
+               make_spill_locations_dominate_irn(env->senv, irn);
 }
 
 static void collect_blocks(ir_node *bl, void *data)
@@ -1432,7 +1471,7 @@ static void collect_blocks(ir_node *bl, void *data)
  * @param birg  The backend graph
  * @param cls   The register class to spill
  */
-void be_spill_belady(be_irg_t *birg, const arch_register_class_t *cls)
+static void be_spill_belady(be_irg_t *birg, const arch_register_class_t *cls)
 {
        ir_graph *irg = be_get_birg_irg(birg);
        belady_env_t env;
@@ -1440,7 +1479,7 @@ void be_spill_belady(be_irg_t *birg, const arch_register_class_t *cls)
 
        /* some special classes contain only ignore regs, nothing to do then */
        n_regs = cls->n_regs - be_put_ignore_regs(birg, cls, NULL);
-       if(n_regs == 0)
+       if (n_regs == 0)
                return;
 
        be_clear_links(irg);
@@ -1457,6 +1496,7 @@ void be_spill_belady(be_irg_t *birg, const arch_register_class_t *cls)
        env.senv       = be_new_spill_env(birg);
        env.ef         = be_get_birg_exec_freq(birg);
        env.spilled    = bitset_irg_obstack_alloc(&env.ob, irg);
+       env.extra_spilled = ir_nodeset_new(64);
        env.n_blocks   = 0;
 
        irg_block_walk_graph(irg, NULL, collect_blocks, &env);
@@ -1475,15 +1515,26 @@ void be_spill_belady(be_irg_t *birg, const arch_register_class_t *cls)
 
        global_assign(&env);
 
+       /* check dominance for specially spilled nodes. */
+       {
+               ir_nodeset_iterator_t iter;
+               ir_node *irn;
+
+               foreach_ir_nodeset (env.extra_spilled, irn, iter)
+                       make_spill_locations_dominate_irn(env.senv, irn);
+       }
+
        /* Insert spill/reload nodes into the graph and fix usages */
        be_insert_spills_reloads(env.senv);
 
        /* clean up */
        be_delete_spill_env(env.senv);
+       ir_nodeset_del(env.extra_spilled);
 
        obstack_free(&env.ob, NULL);
 }
 
+BE_REGISTER_MODULE_CONSTRUCTOR(be_init_spillbelady2);
 void be_init_spillbelady2(void)
 {
        lc_opt_entry_t *be_grp    = lc_opt_get_grp(firm_opt_get_root(), "be");
@@ -1498,5 +1549,3 @@ void be_init_spillbelady2(void)
        be_register_spiller("belady2", &belady_spiller);
        FIRM_DBG_REGISTER(dbg, "firm.be.spill.belady2");
 }
-
-BE_REGISTER_MODULE_CONSTRUCTOR(be_init_spillbelady2);