- Split bearch.h correctly into bearch.h and bearch_t.h
[libfirm] / ir / be / beschedrss.c
index 6575100..ec7385a 100644 (file)
@@ -23,6 +23,7 @@
 #include "ircons_t.h"
 #include "irphase_t.h"
 #include "irgwalk.h"
+#include "irdump.h"
 #include "irtools.h"
 #include "irbitset.h"
 #include "irprintf.h"
 #include "height.h"
 
 #include "beabi.h"
+#include "bemodule.h"
 #include "benode_t.h"
 #include "besched_t.h"
+#include "beirg_t.h"
+
+#include <libcore/lc_opts.h>
+#include <libcore/lc_opts_enum.h>
+
 
 #define ARR_LEN_SAFE(arr) ((arr) != NULL ? ARR_LEN((arr)) : 0)
 
 
 #define BLOCK_IDX_MAP(rss, irn) bsearch_for_index(get_irn_idx((irn)), (rss)->idx_map, ARR_LEN_SAFE((rss)->idx_map), 1)
 
+/* the rss options */
+typedef struct _rss_opts_t {
+       int dump_flags;
+} rss_opts_t;
+
 /* Represents a child with associated costs */
 typedef struct _child {
        ir_node *irn;
@@ -59,15 +71,15 @@ typedef struct _rss_edge {
 
 /* Represents a connected bipartite component. */
 typedef struct _cbc {
-       nodeset *parents;       /**< = S  a set of value producers */
-       nodeset *children;      /**< = T  a set of value consumers */
+       ir_nodeset_t parents;       /**< = S  a set of value producers */
+       ir_nodeset_t children;      /**< = T  a set of value consumers */
        pset    *kill_edges;    /**< = E  a set of edges (t in T, s in S) such as each s in S gets killed by at least one t in T */
        int     nr;             /**< a deterministic index for set insertion (used as hash) */
 } cbc_t;
 
 /* Represents a disjoint value DAG. */
 typedef struct _dvg {
-       nodeset *nodes;
+       ir_nodeset_t nodes;
        pset    *edges;
 } dvg_t;
 
@@ -82,32 +94,22 @@ typedef struct _rss_irn {
        ir_node **consumer;         /**< Sorted consumer array (needed for faster access) */
 
        plist_t  *parent_list;      /**< List of parents */
-       ir_node **parents;          /**< Sorted parent array (needed for faster access) */
+       plist_t  *pkiller_list;     /**< List of potential killers */
 
        plist_t  *descendant_list;  /**< List of descendants */
        ir_node **descendants;      /**< Sorted descendant array (needed for faster access) */
 
-       plist_t  *pkiller_list;     /**< List of potential killers */
-       ir_node **pkillers;         /**< Sorted pkiller array (needed for faster access) */
-
-#if 0
-       plist_t  *dvg_desc_list;    /**< List of all descendants in the DVG */
-       ir_node **dvg_desc;         /**< Sorted dvg descendant array (needed for faster access) */
-
-       plist_t  *dvg_pkiller_list; /**< List of potential killers in the DVG */
-       ir_node **dvg_pkiller;      /**< Sorted dvg pkiller array (needed for faster access) */
-
-       plist_t  *dvg_user_list;    /**< List of users in the disjoint value DAG DVG */
-#endif
-       plist_t  *kill_value_list;  /**< List of values getting potentially killed by this node */
-
        ir_node  *killer;           /**< The selected unique killer */
        ir_node  *irn;              /**< The corresponding firm node to this rss_irn */
 
        chain_t  *chain;            /**< The chain, this node is associated to */
 
+       unsigned desc_walk;         /**< visited flag for collecting descendants */
+       unsigned kill_count;        /**< number of nodes killed by this one */
+
        unsigned live_out : 1;      /**< irn has consumers outside of it's block */
        unsigned visited  : 1;      /**< visited flag for bipartite decomposition */
+       unsigned havecons : 1;      /**< visited flag collect consumer */
        unsigned handled  : 1;      /**< flag indicating whether or not the list structures have been build */
        unsigned dumped   : 1;      /**< flag indication whether or not this node was dumped */
 } rss_irn_t;
@@ -119,10 +121,11 @@ typedef struct _serialization {
        rss_edge_t *edge;    /* the edge selected for the serialization */
        int        omega1;   /* estimated: available regs - RS reduction */
        int        omega2;   /* increase in critical path length */
+       int        new_killer;
 } serialization_t;
 
 typedef struct _rss {
-       phase_t          ph;              /**< Phase to hold some data */
+       ir_phase          ph;              /**< Phase to hold some data */
        heights_t        *h;              /**< The current height object */
        ir_graph         *irg;            /**< The irg to preprocess */
        plist_t          *nodes;          /**< The list of interesting nodes */
@@ -132,6 +135,9 @@ typedef struct _rss {
        ir_node          *block;          /**< The current block in progress. */
        int              *idx_map;        /**< Mapping irn indices to per block indices */
        unsigned         max_height;      /**< maximum height in the current block */
+       rss_opts_t       *opts;           /**< The options */
+       be_lv_t          *liveness;       /**< The liveness information for this irg */
+       pset             *live_block;     /**< Values alive at end of block */
        const arch_register_class_t *cls; /**< The current register class */
        DEBUG_ONLY(firm_dbg_module_t *dbg);
 } rss_t;
@@ -142,19 +148,59 @@ typedef struct _rss {
  * We need some special nodes:
  * a source and a sink for all live-in and live-out values of a block
  */
-
-static enum {
+enum {
        iro_rss_Source,
        iro_rss_Sink,
        iro_rss_last
 };
 
+/** The opcode of the rss_Source node once allocated. */
+static ir_op *op_rss_Source;
+/** The opcode of the rss_Sink node once allocated. */
+static ir_op *op_rss_Sink;
+
+/** The rss_Source node of the current graph. */
 static ir_node *_source = NULL;
+/** The rss_Sink node of the current graph. */
 static ir_node *_sink   = NULL;
 
 #define is_Source(irn) ((irn) == _source)
 #define is_Sink(irn)   ((irn) == _sink)
 
+enum {
+       RSS_DUMP_NONE  = 0,
+       RSS_DUMP_CBC   = 1 << 0,
+       RSS_DUMP_PKG   = 1 << 1,
+       RSS_DUMP_KILL  = 1 << 2,
+       RSS_DUMP_DVG   = 1 << 3,
+       RSS_DUMP_MAXAC = 1 << 4,
+       RSS_DUMP_ALL   = (RSS_DUMP_MAXAC << 1) - 1,
+};
+
+static rss_opts_t rss_options = {
+       RSS_DUMP_NONE,
+};
+
+static const lc_opt_enum_int_items_t dump_items[] = {
+       { "none",  RSS_DUMP_NONE  },
+       { "cbc",   RSS_DUMP_CBC   },
+       { "pkg",   RSS_DUMP_PKG   },
+       { "kill",  RSS_DUMP_KILL  },
+       { "dvg",   RSS_DUMP_DVG   },
+       { "maxac", RSS_DUMP_MAXAC },
+       { "all",   RSS_DUMP_ALL   },
+       { NULL, 0 }
+};
+
+static lc_opt_enum_int_var_t dump_var = {
+       &rss_options.dump_flags, dump_items
+};
+
+static const lc_opt_table_entry_t rss_option_table[] = {
+       LC_OPT_ENT_ENUM_MASK("dump", "dump phases", &dump_var),
+       { NULL }
+};
+
 /******************************************************************************
  *  _          _                    __                  _   _
  * | |        | |                  / _|                | | (_)
@@ -167,15 +213,19 @@ static ir_node *_sink   = NULL;
  ******************************************************************************/
 
 /**
- * Acquire opcodes and create source and sink nodes.
+ * Acquire opcodes if needed and create source and sink nodes.
  */
 static void init_rss_special_nodes(ir_graph *irg) {
-       ir_node *block         = get_irg_start_block(irg);
-       int     iro_rss_base   = get_next_ir_opcodes(iro_rss_last);
-       ir_op   *op_rss_Source = new_ir_op(iro_rss_base + iro_rss_Source, "rss_Source", op_pin_state_pinned, irop_flag_none, oparity_zero, 0, 0, NULL);
-       ir_op   *op_rss_Sink   = new_ir_op(iro_rss_base + iro_rss_Sink,   "rss_Sink",   op_pin_state_pinned, irop_flag_none, oparity_zero, 0, 0, NULL);
-       _source                = new_ir_node(NULL, irg, block, op_rss_Source, mode_ANY, 0, NULL);
-       _sink                  = new_ir_node(NULL, irg, block, op_rss_Sink, mode_ANY, 0, NULL);
+       ir_node *block;
+
+       if (op_rss_Source == NULL) {
+               int iro_rss_base = get_next_ir_opcodes(iro_rss_last);
+               op_rss_Source = new_ir_op(iro_rss_base + iro_rss_Source, "rss_Source", op_pin_state_pinned, irop_flag_none, oparity_zero, 0, 0, NULL);
+               op_rss_Sink   = new_ir_op(iro_rss_base + iro_rss_Sink,   "rss_Sink",   op_pin_state_pinned, irop_flag_none, oparity_zero, 0, 0, NULL);
+       }
+       block   = get_irg_start_block(irg);
+       _source = new_ir_node(NULL, irg, block, op_rss_Source, mode_ANY, 0, NULL);
+       _sink   = new_ir_node(NULL, irg, block, op_rss_Sink, mode_ANY, 0, NULL);
 }
 
 static int cmp_int(const void *a, const void *b) {
@@ -255,19 +305,24 @@ static ir_node **build_sorted_array_from_list(plist_t *irn_list, struct obstack
  *
  *****************************************************/
 
-static void dump_nodeset(nodeset *ns, const char *prefix) {
+#ifdef DEBUG_libfirm
+static void dump_nodeset(ir_nodeset_t *ns, const char *prefix) {
+       ir_nodeset_iterator_t iter;
        ir_node *irn;
-       foreach_nodeset(ns, irn) {
-               ir_printf("%s%+F\n", prefix, irn);
+
+       ir_nodeset_iterator_init(&iter, ns);
+       while( (irn = ir_nodeset_iterator_next(&iter)) != NULL ) {
+               ir_fprintf(stderr, "%s%+F\n", prefix, irn);
        }
 }
+#endif
 
 static void build_file_name(rss_t *rss, const char *suffix, size_t suf_len, char *buf, size_t len) {
        const char *irg_name;
 
        memset(buf, 0, len);
        irg_name = get_entity_name(get_irg_entity(rss->irg));
-       snprintf(buf, len - suf_len, "%s-%s-block-%d",
+       snprintf(buf, len - suf_len, "%s-%s-block-%ld",
                irg_name, arch_register_class_name(rss->cls), get_irn_node_nr(rss->block));
        strcat(buf, suffix);
 }
@@ -288,14 +343,15 @@ static void debug_vcg_dump_bipartite(rss_t *rss) {
        fprintf(f, "manhattan_edges: yes\n\n");
 
        foreach_pset(rss->cbc_set, cbc) {
+               ir_nodeset_iterator_t iter;
                ir_node    *n;
                rss_edge_t *ke;
 
                fprintf(f, "graph: { titel: \"cbc %d\" label: \"cbc %d\" status:clustered color:yellow\n", cbc->nr, cbc->nr);
-               foreach_nodeset(cbc->parents, n) {
+               foreach_ir_nodeset(&cbc->parents, n, iter) {
                        ir_fprintf(f, "node: { title: \"n%d_%d\" label: \"%+F\" }\n", get_irn_node_nr(n), cbc->nr, n);
                }
-               foreach_nodeset(cbc->children, n) {
+               foreach_ir_nodeset(&cbc->children, n, iter) {
                        ir_fprintf(f, "node: { title: \"n%d_%d\" label: \"%+F\" }\n", get_irn_node_nr(n), cbc->nr, n);
                }
                foreach_pset(cbc->kill_edges, ke) {
@@ -323,11 +379,13 @@ static void debug_vcg_dump_kill(rss_t *rss) {
        fprintf(f, "layoutalgorithm: mindepth\n");
        fprintf(f, "manhattan_edges: yes\n\n");
 
-       /* first: reset dumped flag of all nodes */
-       foreach_plist(rss->nodes, el) {
-               ir_node   *irn  = plist_element_get_value(el);
-               rss_irn_t *rirn = get_rss_irn(rss, irn);
-               rirn->dumped = 0;
+       {
+               /* reset dump_flag */
+               ir_node *irn;
+               foreach_phase_irn(&rss->ph, irn) {
+                       rss_irn_t *node = get_rss_irn(rss, irn);
+                       node->dumped = 0;
+               }
        }
 
        /* dump all nodes and their killers */
@@ -355,7 +413,7 @@ static void debug_vcg_dump_kill(rss_t *rss) {
 }
 
 /* Dumps the potential killing DAG (PKG) as vcg. */
-static void debug_vcg_dump_pkg(rss_t *rss, nodeset *max_ac, int iteration) {
+static void debug_vcg_dump_pkg(rss_t *rss, ir_nodeset_t *max_ac, int iteration) {
        FILE              *f;
        char              file_name[256];
        char              *suffix   = alloca(32);
@@ -367,7 +425,7 @@ static void debug_vcg_dump_pkg(rss_t *rss, nodeset *max_ac, int iteration) {
                snprintf(suffix, 32, "%s", suffix1);
        }
        else {
-               snprintf(suffix, 32, "-%0.2d%s", iteration, suffix2);
+               snprintf(suffix, 32, "-%02d%s", iteration, suffix2);
        }
 
        build_file_name(rss, suffix, strlen(suffix) + 1, file_name, sizeof(file_name));
@@ -378,6 +436,15 @@ static void debug_vcg_dump_pkg(rss_t *rss, nodeset *max_ac, int iteration) {
        fprintf(f, "layoutalgorithm: mindepth\n");
        fprintf(f, "manhattan_edges: yes\n\n");
 
+       {
+               /* reset dump_flag */
+               ir_node *irn;
+               foreach_phase_irn(&rss->ph, irn) {
+                       rss_irn_t *node = get_rss_irn(rss, irn);
+                       node->dumped = 0;
+               }
+       }
+
        foreach_plist(rss->nodes, el) {
                ir_node   *irn  = plist_element_get_value(el);
                rss_irn_t *rirn = get_rss_irn(rss, irn);
@@ -385,22 +452,30 @@ static void debug_vcg_dump_pkg(rss_t *rss, nodeset *max_ac, int iteration) {
                plist_element_t *k_el;
 
                /* dump selected saturating values in yellow */
-               if (max_ac && nodeset_find(max_ac, irn))
+               if (max_ac && ir_nodeset_contains(max_ac, irn))
                        c1 = "color:yellow";
 
-               ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" %s }\n", get_irn_node_nr(irn), irn, c1);
-               rirn->dumped = 1;
+               if (! rirn->dumped) {
+                       if (rirn->chain)
+                               ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F   c%d\" %s }\n", get_irn_node_nr(irn), irn, rirn->chain->nr, c1);
+                       else
+                               ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" %s }\n", get_irn_node_nr(irn), irn, c1);
+                       rirn->dumped = 1;
+               }
 
                foreach_plist(rirn->pkiller_list, k_el) {
                        ir_node   *pkiller = plist_element_get_value(k_el);
                        rss_irn_t *pk_rirn = get_rss_irn(rss, pkiller);
                        char      *c2      = "";
 
-                       if (max_ac && nodeset_find(max_ac, pkiller))
+                       if (max_ac && ir_nodeset_contains(max_ac, pkiller))
                                c2 = "color:yellow";
 
                        if (! pk_rirn->dumped) {
-                               ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" %s }\n", get_irn_node_nr(pkiller), pkiller, c2);
+                               if (pk_rirn->chain)
+                                       ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F   c%d\" %s }\n", get_irn_node_nr(pkiller), pkiller, pk_rirn->chain->nr, c2);
+                               else
+                                       ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" %s }\n", get_irn_node_nr(pkiller), pkiller, c2);
                                pk_rirn->dumped = 1;
                        }
                        ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
@@ -416,6 +491,7 @@ static void debug_vcg_dump_dvg(rss_t *rss, dvg_t *dvg) {
        static const char suffix[] = "-RSS-DVG.vcg";
        FILE       *f;
        char       file_name[256];
+       ir_nodeset_iterator_t iter;
        ir_node    *irn;
        rss_edge_t *edge;
 
@@ -428,15 +504,12 @@ static void debug_vcg_dump_dvg(rss_t *rss, dvg_t *dvg) {
        fprintf(f, "manhattan_edges: yes\n\n");
 
        /* dump all nodes */
-       foreach_nodeset(dvg->nodes, irn) {
+       foreach_ir_nodeset(&dvg->nodes, irn, iter) {
                ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
        }
 
        /* dump all edges */
        foreach_pset(dvg->edges, edge) {
-               rss_irn_t *src = get_rss_irn(rss, edge->src);
-               rss_irn_t *tgt = get_rss_irn(rss, edge->tgt);
-
                ir_fprintf(f, "edge: { sourcename: \"n%d\" targetname: \"n%d\" }\n",
                        get_irn_node_nr(edge->src), get_irn_node_nr(edge->tgt));
        }
@@ -451,6 +524,7 @@ static void debug_vcg_dump_dvg_pkiller(rss_t *rss, dvg_t *dvg) {
        static const char suffix[] = "-RSS-DVG-PKG.vcg";
        FILE       *f;
        char       file_name[256];
+       ir_nodeset_iterator_t iter;
        ir_node    *irn;
 
        build_file_name(rss, suffix, sizeof(suffix), file_name, sizeof(file_name));
@@ -462,12 +536,12 @@ static void debug_vcg_dump_dvg_pkiller(rss_t *rss, dvg_t *dvg) {
        fprintf(f, "manhattan_edges: yes\n\n");
 
        /* dump all nodes */
-       foreach_nodeset(dvg->nodes, irn) {
+       foreach_ir_nodeset(&dvg->nodes, irn, iter) {
                ir_fprintf(f, "node: { title: \"n%d\" label: \"%+F\" }\n", get_irn_node_nr(irn), irn);
        }
 
        /* dump all edges */
-       foreach_nodeset(dvg->nodes, irn) {
+       foreach_ir_nodeset(&dvg->nodes, irn, iter) {
                rss_irn_t       *node = get_rss_irn(rss, irn);
                plist_element_t *el;
 
@@ -485,36 +559,30 @@ static void debug_vcg_dump_dvg_pkiller(rss_t *rss, dvg_t *dvg) {
 /**
  * In case there is no rss information for irn, initialize it.
  */
-static void *init_rss_irn(phase_t *ph, ir_node *irn, void *old) {
+static void *init_rss_irn(ir_phase *ph, ir_node *irn, void *old) {
        rss_irn_t *res = old ? old : phase_alloc(ph, sizeof(res[0]));
 
-       res->descendant_list  = plist_obstack_new(phase_obst(ph));
-       res->descendants      = NULL;
+       res->descendant_list = plist_obstack_new(phase_obst(ph));
+       res->descendants     = NULL;
 
-       res->consumer_list    = plist_obstack_new(phase_obst(ph));
-       res->consumer         = NULL;
+       res->consumer_list   = plist_obstack_new(phase_obst(ph));
+       res->consumer        = NULL;
 
-       res->pkiller_list     = plist_obstack_new(phase_obst(ph));
-       res->pkillers         = NULL;
+       res->pkiller_list    = plist_obstack_new(phase_obst(ph));
 
-       res->parent_list      = plist_obstack_new(phase_obst(ph));
-       res->parents          = NULL;
+       res->parent_list     = plist_obstack_new(phase_obst(ph));
 
-       //res->dvg_desc_list    = plist_obstack_new(phase_obst(ph));
-       //res->dvg_desc         = NULL;
+       res->killer          = NULL;
+       res->irn             = irn;
+       res->chain           = NULL;
 
-       res->kill_value_list  = plist_obstack_new(phase_obst(ph));
-       //res->dvg_user_list    = plist_obstack_new(phase_obst(ph));
-       //res->dvg_pkiller_list = plist_obstack_new(phase_obst(ph));
-
-       res->killer           = NULL;
-       res->irn              = irn;
-       res->chain            = NULL;
-
-       res->live_out         = 0;
-       res->visited          = 0;
-       res->handled          = 0;
-       res->dumped           = 0;
+       res->kill_count      = 0;
+       res->desc_walk       = 0;
+       res->live_out        = 0;
+       res->visited         = 0;
+       res->handled         = 0;
+       res->dumped          = 0;
+       res->havecons        = 0;
 
        return res;
 }
@@ -522,11 +590,20 @@ static void *init_rss_irn(phase_t *ph, ir_node *irn, void *old) {
 /**
  * Collect all nodes data dependent on current node.
  */
-static void collect_descendants(rss_t *rss, rss_irn_t *rirn, ir_node *irn, int *got_sink) {
+static void collect_descendants(rss_t *rss, rss_irn_t *rirn, ir_node *irn, int *got_sink, unsigned cur_desc_walk) {
        const ir_edge_t *edge;
-       ir_node         *block = rss->block;
-       ir_edge_kind_t  ekind[2] = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
-       int i;
+       rss_irn_t       *cur_node = get_rss_irn(rss, irn);
+       ir_node         *block    = rss->block;
+       ir_edge_kind_t  ekind[2]  = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
+       int             i;
+
+       if (cur_node->desc_walk >= cur_desc_walk)
+               return;
+       cur_node->desc_walk = cur_desc_walk;
+
+       /* Stop at Barriers */
+       if (be_is_Barrier(irn))
+               return;
 
        /* loop over normal and dependency edges */
        for (i = 0; i < 2; ++i) {
@@ -537,20 +614,38 @@ static void collect_descendants(rss_t *rss, rss_irn_t *rirn, ir_node *irn, int *
                        if (arch_irn_is(rss->arch_env, user, ignore))
                                continue;
 
-                       /* check if user lives in block and is not a control flow node */
-                       if (get_nodes_block(user) == block && get_irn_mode(user) != mode_X) {
-                               /* skip mode_T nodes */
-                               if (get_irn_mode(user) != mode_T && ! plist_has_value(rirn->descendant_list, user)) {
-                                       plist_insert_back(rirn->descendant_list, user);
-                                       DBG((rss->dbg, LEVEL_2, "\t\tdescendant %+F\n", user));
-                               }
-                               collect_descendants(rss, rirn, user, got_sink);
+                       /*
+                               (a) mode_X means Jump            -> out_edge
+                               (b) Phi as user of a normal node -> out-edge
+                       */
+                       if (get_irn_mode(user) == mode_X || is_Phi(user)) {
+                               if (! *got_sink)
+                                       goto force_sink;
+                               else
+                                       continue;
                        }
-                       else if (! *got_sink) {
-                               /* user lives out of block: add sink as descendant if not already done */
-                               plist_insert_back(rirn->descendant_list, _sink);
-                               *got_sink = 1;
-                               DBG((rss->dbg, LEVEL_2, "\t\tdescendant %+F\n", _sink));
+
+                       if (is_Proj(user)) {
+
+                               //if (arch_get_irn_reg_class(rss->arch_env, user, -1) == rss->cls)
+                                       collect_descendants(rss, rirn, user, got_sink, cur_desc_walk);
+                       }
+                       else {
+                               /* check if user lives in block and is not a control flow node */
+                               if (get_nodes_block(user) == block) {
+                                       if (! plist_has_value(rirn->descendant_list, user)) {
+                                               plist_insert_back(rirn->descendant_list, user);
+                                               DBG((rss->dbg, LEVEL_2, "\t\tdescendant %+F\n", user));
+                                       }
+                                       collect_descendants(rss, rirn, user, got_sink, cur_desc_walk);
+                               }
+                               else if (! *got_sink) {
+force_sink:
+                                       /* user lives out of block: add sink as descendant if not already done */
+                                       plist_insert_back(rirn->descendant_list, _sink);
+                                       *got_sink = 1;
+                                       DBG((rss->dbg, LEVEL_2, "\t\tdescendant %+F\n", _sink));
+                               }
                        }
                }
        }
@@ -559,34 +654,13 @@ static void collect_descendants(rss_t *rss, rss_irn_t *rirn, ir_node *irn, int *
 /**
  * Handles a single consumer.
  */
-static int collect_single_consumer(rss_t *rss, rss_irn_t *rss_irn, ir_node *consumer, int got_sink) {
-       ir_node        *block   = rss->block;
-       ir_edge_kind_t ekind[2] = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
-       int            i;
-
-       if (get_nodes_block(consumer) == block) {
-               /* the consumer of a mode_T node are it's projs */
-               if (get_irn_mode(consumer) == mode_T) {
-                       const ir_edge_t *cons_edge;
-
-                       DBG((rss->dbg, LEVEL_2, "\t\tmode_T consumer %+F skipped\n", consumer));
-
-                       for (i = 0; i < 2; ++i) {
-                               foreach_out_edge_kind(consumer, cons_edge, ekind[i]) {
-                                       ir_node *cons_proj = get_edge_src_irn(cons_edge);
-
-                                       assert(get_nodes_block(cons_proj) == block && "Proj in wrong block!");
+static void collect_single_consumer(rss_t *rss, rss_irn_t *rss_irn, ir_node *consumer, int *got_sink) {
+       ir_node *block = rss->block;
 
-                                       /* skip ignore nodes, as they do not really contribute to register pressure */
-                                       if (arch_irn_is(rss->arch_env, cons_proj, ignore))
-                                               continue;
+       assert(! is_Proj(consumer) && "Cannot handle Projs");
 
-                                       plist_insert_back(rss_irn->consumer_list, cons_proj);
-                                       DBG((rss->dbg, LEVEL_3, "\t\t\treal consumer %+F\n", cons_proj));
-                               }
-                       }
-               }
-               else if (! arch_irn_is(rss->arch_env, consumer, ignore)) {
+       if (! is_Phi(consumer) && ! is_Block(consumer) && get_nodes_block(consumer) == block) {
+               if (! arch_irn_is(rss->arch_env, consumer, ignore) && ! plist_has_value(rss_irn->consumer_list, consumer)) {
                        plist_insert_back(rss_irn->consumer_list, consumer);
                        DBG((rss->dbg, LEVEL_2, "\t\tconsumer %+F\n", consumer));
                }
@@ -594,97 +668,62 @@ static int collect_single_consumer(rss_t *rss, rss_irn_t *rss_irn, ir_node *cons
        else {
                rss_irn->live_out = 1;
                DBG((rss->dbg, LEVEL_2, "\t\tlive out %+F", consumer));
-               if (! got_sink) {
+               if (! *got_sink) {
                        plist_insert_back(rss_irn->consumer_list, _sink);
-                       got_sink = 1;
+                       *got_sink = 1;
                        DB((rss->dbg, LEVEL_2, ", %+F added instead", _sink));
                }
                DB((rss->dbg, LEVEL_2, "\n"));
        }
-
-       return got_sink;
 }
 
 /**
  * Collect all nodes consuming the value(s) produced by current node.
  */
-static void collect_consumer(rss_t *rss, rss_irn_t *rss_irn, ir_node *irn) {
+static void collect_consumer(rss_t *rss, rss_irn_t *rss_irn, ir_node *irn, int *got_sink) {
        const ir_edge_t *edge;
-       int             got_sink = 0;
        int             i;
-       ir_edge_kind_t  ekind[2] = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
+       ir_edge_kind_t  ekind[2]  = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
+       rss_irn_t       *cur_node = get_rss_irn(rss, irn);
+
+       if (cur_node->havecons)
+               return;
+       cur_node->havecons = 1;
 
        for (i = 0; i < 2; ++i) {
                foreach_out_edge_kind(irn, edge, ekind[i]) {
                        ir_node *consumer = get_edge_src_irn(edge);
-                       got_sink = collect_single_consumer(rss, rss_irn, consumer, got_sink);
-               }
-       }
-}
-
-#if 0
-/**
- * We need to build the consumer and descendant list for _source.
- */
-static void collect_node_info_source(rss_t *rss) {
-       const ir_edge_t *edge;
-       rss_irn_t       *rirn  = get_rss_irn(rss, _source);
-       ir_node         *block = rss->block;
-
-       if (rirn->handled)
-               return;
-
-       foreach_out_edge(block, edge) {
-               ir_node *irn = get_edge_src_irn(edge);
-               int     i;
-
-               for (i = get_irn_arity(n) - 1; i >= 0; --i) {
 
+                       if (is_Proj(consumer)) {
+                               //if (arch_get_irn_reg_class(rss->arch_env, consumer, -1) == rss->cls)
+                                       collect_consumer(rss, rss_irn, consumer, got_sink);
+                       }
+                       else
+                               collect_single_consumer(rss, rss_irn, consumer, got_sink);
                }
        }
 }
 
-static void reset_node_info(rss_irn_t *rss_irn) {
-       /* Beware: array data resides on phase obstack, so it gets removed automatically */
-
-       plist_clear(rss_irn->consumer_list);
-       rss_irn->consumer = NULL;
-
-       plist_clear(rss_irn->parent_list);
-       rss_irn->parents = NULL;
-
-       plist_clear(rss_irn->descendant_list);
-       rss_irn->descendants = NULL;
-
-       plist_clear(rss_irn->pkiller_list);
-       rss_irn->pkillers = NULL;
-
-       plist_clear(rss_irn->kill_value_list);
-
-       rss_irn->killer   = NULL;
-       rss_irn->live_out = 0;
-       rss_irn->visited  = 0;
-       rss_irn->handled  = 0;
-}
-#endif /* if 0 */
-
 /**
  * Collects all consumer and descendant of a irn.
  */
 static void collect_node_info(rss_t *rss, ir_node *irn) {
-       rss_irn_t *rss_irn = get_rss_irn(rss, irn);
-       int       got_sink;
+       static unsigned cur_desc_walk = 0;
+       rss_irn_t       *rss_irn      = get_rss_irn(rss, irn);
+       int             got_sink;
 
-       assert(get_irn_mode(irn) != mode_T && "Cannot handle mode_T nodes.");
+       assert(! is_Proj(irn) && "Cannot handle Projs.");
 
        /* check if node info is already available */
        if (rss_irn->handled)
-               phase_reinit_single_irn_data(&rss->ph, irn);
+               return;
+               //phase_reinit_single_irn_data(&rss->ph, irn);
 
        DBG((rss->dbg, LEVEL_1, "\tcomputing consumers of %+F:\n", irn));
 
        /* collect all consumer */
-       collect_consumer(rss, rss_irn, irn);
+       got_sink = 0;
+       collect_consumer(rss, rss_irn, irn, &got_sink);
 
        /* build sorted consumer array */
        rss_irn->consumer = build_sorted_array_from_list(rss_irn->consumer_list, phase_obst(&rss->ph));
@@ -693,7 +732,7 @@ static void collect_node_info(rss_t *rss, ir_node *irn) {
 
        /* collect descendants */
        got_sink = 0;
-       collect_descendants(rss, rss_irn, irn, &got_sink);
+       collect_descendants(rss, rss_irn, irn, &got_sink, ++cur_desc_walk);
 
        /* build sorted descendant array */
        rss_irn->descendants = build_sorted_array_from_list(rss_irn->descendant_list, phase_obst(&rss->ph));
@@ -769,16 +808,6 @@ static void update_node_info(rss_t *rss, ir_node *irn, ir_node *pk_irn) {
                node->descendants = build_sorted_array_from_list(node->descendant_list, phase_obst(&rss->ph));
        }
 
-#if 0
-       /* check, if pkiller is a potential killer of irn */
-       if (is_potential_killer(rss, pkiller, node)) {
-               if (! plist_has_value(node->pkiller_list, pk_irn))
-                       plist_insert_back(node->pkiller_list, pk_irn);
-               if (! plist_has_value(pkiller->kill_value_list, irn))
-                       plist_insert_back(pkiller->kill_value_list, irn);
-               DBG((rss->dbg, LEVEL_1, "\tpotential killer %+F added to %+F\n", pk_irn, irn));
-       }
-#endif
 }
 
 /**
@@ -802,8 +831,7 @@ static void compute_pkill_set(rss_t *rss) {
                        if (is_potential_killer(rss, v, u)) {
                                if (! plist_has_value(u->pkiller_list, v_irn))
                                        plist_insert_back(u->pkiller_list, v_irn);
-                               if (! plist_has_value(v->kill_value_list, u_irn))
-                                       plist_insert_back(v->kill_value_list, u_irn);
+                               v->kill_count++;
                                DBG((rss->dbg, LEVEL_2, "\t\tpotential killer %+F\n", v_irn));
                        }
                }
@@ -811,10 +839,8 @@ static void compute_pkill_set(rss_t *rss) {
                u->killer = _sink;
        }
 
-       DEBUG_ONLY(
-               if (firm_dbg_get_mask(rss->dbg))
-                       debug_vcg_dump_pkg(rss, NULL, 0);
-       )
+       if (rss->opts->dump_flags & RSS_DUMP_PKG)
+               debug_vcg_dump_pkg(rss, NULL, 0);
 }
 
 /**
@@ -844,17 +870,19 @@ static void build_kill_edges(rss_t *rss, pset *epk) {
        }
 }
 
+#ifdef DEBUG_libfirm
 /* print the given cbc for debugging purpose */
 static void debug_print_cbc(firm_dbg_module_t *mod, cbc_t *cbc) {
+       ir_nodeset_iterator_t iter;
        ir_node    *n;
        rss_edge_t *ke;
 
        DBG((mod, LEVEL_3, "\t\tS = set of parents:\n"));
-       foreach_nodeset(cbc->parents, n) {
+       foreach_ir_nodeset(&cbc->parents, n, iter) {
                DBG((mod, LEVEL_3, "\t\t\t%+F\n", n));
        }
        DBG((mod, LEVEL_3, "\t\tT = set of children:\n"));
-       foreach_nodeset(cbc->children, n) {
+       foreach_ir_nodeset(&cbc->children, n, iter) {
                DBG((mod, LEVEL_3, "\t\t\t%+F\n", n));
        }
        DBG((mod, LEVEL_3, "\t\tE = Edges from producers to consumers\n"));
@@ -862,6 +890,7 @@ static void debug_print_cbc(firm_dbg_module_t *mod, cbc_t *cbc) {
                DBG((mod, LEVEL_3, "\t\t\t%+F -> %+F\n", ke->src, ke->tgt));
        }
 }
+#endif
 
 /**
  * Construct the bipartite decomposition.
@@ -883,12 +912,14 @@ static void compute_bipartite_decomposition(rss_t *rss) {
                rss_irn_t *u       = get_rss_irn(rss, u_irn);
                int       p_change = 1;
                int       c_change = 1;
+               int       vrfy_ok  = 1;
 
                cbc_t           *cbc;
                plist_element_t *el2;
                rss_edge_t      *k_edge;
                rss_edge_t      *kedge_root = NULL;
                ir_node         *t_irn, *s_irn;
+               ir_nodeset_iterator_t iter;
 
                if (u->visited || u_irn == _sink)
                        continue;
@@ -899,8 +930,8 @@ static void compute_bipartite_decomposition(rss_t *rss) {
                cbc->nr = cur_num++;
 
                /* initialize S_cb */
-               cbc->parents = new_nodeset(5);
-               nodeset_insert(cbc->parents, u_irn);
+               ir_nodeset_init_size(&cbc->parents, 5);
+               ir_nodeset_insert(&cbc->parents, u_irn);
                DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to parents (init)\n", u_irn));
 
                /* E_cb = empty */
@@ -909,9 +940,9 @@ static void compute_bipartite_decomposition(rss_t *rss) {
                /* each parent gets killed by at least one value from children */
 
                /* T_cb = PK_successors(u) */
-               cbc->children = new_nodeset(5);
+               ir_nodeset_init_size(&cbc->children, 5);
                foreach_plist(u->pkiller_list, el2) {
-                       nodeset_insert(cbc->children, plist_element_get_value(el2));
+                       ir_nodeset_insert(&cbc->children, plist_element_get_value(el2));
                        DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to children (init)\n", plist_element_get_value(el2)));
                }
 
@@ -921,55 +952,53 @@ static void compute_bipartite_decomposition(rss_t *rss) {
                        until the sets don't change any more
                */
                while (p_change || c_change) {
+                       ir_nodeset_iterator_t iter;
                        p_change = c_change = 0;
 
                        /* accumulate parents */
-                       foreach_nodeset(cbc->children, t_irn) {
-                               rss_irn_t *t = get_rss_irn(rss, t_irn);
-                               plist_element_t *kvl_el;
-
+                       foreach_ir_nodeset(&cbc->children, t_irn, iter) {
                                foreach_pset(epk, k_edge) {
                                        ir_node *val = k_edge->src;
 
-                                       if (k_edge->src == t_irn && ! nodeset_find(cbc->parents, k_edge->src)) {
-                                               nodeset_insert(cbc->parents, val);
+                                       if (k_edge->tgt == t_irn && ! ir_nodeset_contains(&cbc->parents, val)) {
+                                               ir_nodeset_insert(&cbc->parents, val);
                                                p_change = 1;
-                                               DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to parents\n", val));
+                                               DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to parents (killed by %+F)\n", val, t_irn));
                                        }
                                }
                        }
 
                        /* accumulate children */
-                       foreach_nodeset(cbc->parents, s_irn) {
-                               rss_irn_t *s = get_rss_irn(rss, s_irn);
-                               plist_element_t *pkl_el;
-
-                               foreach_plist(s->pkiller_list, pkl_el) {
-                                       ir_node *val = plist_element_get_value(pkl_el);
+                       foreach_ir_nodeset(&cbc->parents, s_irn, iter) {
+                               foreach_pset(epk, k_edge) {
+                                       ir_node *val = k_edge->tgt;
 
-                                       if (! nodeset_find(cbc->children, val)) {
-                                               nodeset_insert(cbc->children, val);
+                                       if (k_edge->src == s_irn  && ! ir_nodeset_contains(&cbc->children, val)) {
+                                               ir_nodeset_insert(&cbc->children, val);
                                                c_change = 1;
-                                               DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to children\n", val));
+                                               DBG((rss->dbg, LEVEL_3, "\t\t\t%+F added to children (kills %+F)\n", val, s_irn));
                                        }
                                }
                        }
                }
 
                /* mark all parent values as visited */
-               foreach_nodeset(cbc->parents, s_irn) {
+               foreach_ir_nodeset(&cbc->parents, s_irn, iter) {
                        rss_irn_t *s = get_rss_irn(rss, s_irn);
                        s->visited = 1;
                        /* assure bipartite property */
-                       if (nodeset_find(cbc->children, s_irn)) {
-                               nodeset_remove(cbc->children, s_irn);
+#if 0
+                       if (ir_nodeset_contains(&cbc->children, s_irn)) {
+                               ir_nodeset_remove(&cbc->children, s_irn);
                                DBG((rss->dbg, LEVEL_3, "\t\t\t%+F removed from to children\n", s_irn));
                        }
+#endif
                }
 
                /* update edges */
                foreach_pset(epk, k_edge) {
-                       if (nodeset_find(cbc->parents, k_edge->src) && nodeset_find(cbc->children, k_edge->tgt)) {
+                       if (ir_nodeset_contains(&cbc->parents, k_edge->src) &&
+                           ir_nodeset_contains(&cbc->children, k_edge->tgt)) {
                                pset_insert(cbc->kill_edges, k_edge, HASH_RSS_EDGE(k_edge));
                                /*
                                        Link all k_edges which are about to be removed together.
@@ -985,13 +1014,38 @@ static void compute_bipartite_decomposition(rss_t *rss) {
                        pset_remove(epk, kedge_root, HASH_RSS_EDGE(kedge_root));
                }
 
+               /* verify the cbc */
+               foreach_ir_nodeset(&cbc->parents, s_irn, iter) {
+                       int is_killed = 0;
+
+                       foreach_pset(cbc->kill_edges, k_edge) {
+                               if (k_edge->src == s_irn) {
+                                       is_killed = 1;
+                                       pset_break(cbc->kill_edges);
+                                       break;
+                               }
+                       }
+
+                       if (! is_killed) {
+                               vrfy_ok = 0;
+                               ir_fprintf(stderr, "Warning: parent %+F is not killed in current cbc\n", s_irn);
+                       }
+               }
+               assert(vrfy_ok && "Verification of CBC failed");
+
                /* add the connected bipartite component */
-               pset_insert(rss->cbc_set, cbc, (unsigned)cbc->nr);
-               DBG((rss->dbg, LEVEL_1, "\tbipartite component %d inserted:\n", cbc->nr));
-               DEBUG_ONLY(debug_print_cbc(rss->dbg, cbc));
+               if (ir_nodeset_size(&cbc->parents) > 0 &&
+                   ir_nodeset_size(&cbc->children) > 0 &&
+                       pset_count(cbc->kill_edges) > 0)
+                       pset_insert(rss->cbc_set, cbc, (unsigned)cbc->nr);
+               DBG((rss->dbg, LEVEL_2, "\tbipartite component %d inserted:\n", cbc->nr));
+               DEBUG_ONLY(
+                       if (firm_dbg_get_mask(rss->dbg) & LEVEL_2)
+                               debug_print_cbc(rss->dbg, cbc);
+               );
        }
 
-       if (firm_dbg_get_mask(rss->dbg))
+       if (rss->opts->dump_flags & RSS_DUMP_CBC)
                debug_vcg_dump_bipartite(rss);
 
        del_pset(epk);
@@ -1000,13 +1054,14 @@ static void compute_bipartite_decomposition(rss_t *rss) {
 /**
  * Select the child with the maximum cost.
  */
-static child_t *select_child_max_cost(rss_t *rss, nodeset *x, nodeset *y, child_t *t, cbc_t *cbc) {
+static child_t *select_child_max_cost(rss_t *rss, ir_nodeset_t *x, ir_nodeset_t *y, child_t *t, cbc_t *cbc) {
        ir_node *child;
+       ir_nodeset_iterator_t iter;
        float   max_cost = -1.0f;
 
        DBG((rss->dbg, LEVEL_2, "\t\tcomputing children costs:\n"));
 
-       foreach_nodeset(cbc->children, child) {
+       foreach_ir_nodeset(&cbc->children, child, iter) {
                rss_irn_t  *r_child             = get_rss_irn(rss, child);
                int         num_unkilled_parents = 0;
                int         num_descendants;
@@ -1015,13 +1070,13 @@ static child_t *select_child_max_cost(rss_t *rss, nodeset *x, nodeset *y, child_
 
                /* get the number of unkilled parents */
                foreach_pset(cbc->kill_edges, k_edge) {
-                       if (k_edge->tgt == child && nodeset_find(x, k_edge->src))
+                       if (k_edge->tgt == child && ir_nodeset_contains(x, k_edge->src))
                                ++num_unkilled_parents;
                }
 
                cost = (float)num_unkilled_parents;
 
-               num_descendants = plist_count(r_child->descendant_list) + nodeset_count(y);
+               num_descendants = plist_count(r_child->descendant_list) + ir_nodeset_size(y);
 
                if (num_descendants > 0)
                        cost /= (float)num_descendants;
@@ -1041,29 +1096,29 @@ static child_t *select_child_max_cost(rss_t *rss, nodeset *x, nodeset *y, child_
 /**
  * Remove all parents from x which are killed by t_irn.
  */
-static void remove_covered_parents(rss_t *rss, nodeset *x, ir_node *t_irn, cbc_t *cbc) {
+static void remove_covered_parents(rss_t *rss, ir_nodeset_t *x, ir_node *t_irn, cbc_t *cbc) {
        rss_irn_t  *t = get_rss_irn(rss, t_irn);
        rss_edge_t *k_edge;
 
        DBG((rss->dbg, LEVEL_2, "\t\tremoving parents covered by %+F:\n", t_irn));
 
        foreach_pset(cbc->kill_edges, k_edge) {
-               if (k_edge->tgt == t_irn && nodeset_find(x, k_edge->src)) {
-                       nodeset_remove(x, k_edge->src);
+               if (k_edge->tgt == t_irn && ir_nodeset_contains(x, k_edge->src)) {
+                       ir_nodeset_remove(x, k_edge->src);
                        plist_insert_back(t->parent_list, k_edge->src);
                        DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", k_edge->src));
                }
        }
 }
 
-static void update_cumulated_descendent_values(rss_t *rss, nodeset *y, ir_node *t_irn) {
+static void update_cumulated_descendent_values(rss_t *rss, ir_nodeset_t *y, ir_node *t_irn) {
        rss_irn_t *t = get_rss_irn(rss, t_irn);
        plist_element_t *el;
 
        DBG((rss->dbg, LEVEL_2, "\t\tupdating cumulated descendant value of %+F:\n", t_irn));
 
        foreach_plist(t->descendant_list, el) {
-               nodeset_insert(y, plist_element_get_value(el));
+               ir_nodeset_insert(y, plist_element_get_value(el));
                DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", plist_element_get_value(el)));
        }
 }
@@ -1083,28 +1138,32 @@ static void compute_killing_function(rss_t *rss) {
        /* for all bipartite components do: */
        foreach_pset(rss->cbc_set, cbc) {
                ir_node *p;
-               nodeset *x       = new_nodeset(10);
-               nodeset *y       = new_nodeset(10);
+               ir_nodeset_t x;
+               ir_nodeset_t y;
+               ir_nodeset_iterator_t iter;
                child_t **sks    = NEW_ARR_F(child_t *, 20);
                int     cur_len  = 0;
                int     cur_size = 20;
                int     i;
 
+               ir_nodeset_init_size(&x, 10);
+               ir_nodeset_init_size(&y, 10);
+
                DBG((rss->dbg, LEVEL_1, "\tcomputing SKS for cbc %d:\n", cbc->nr));
                DBG((rss->dbg, LEVEL_2, "\t\tinitializing parents X:\n"));
 
                /* X = S_cb (all parents are initially uncovered) */
-               foreach_nodeset(cbc->parents, p) {
-                       nodeset_insert(x, p);
+               foreach_ir_nodeset(&cbc->parents, p, iter) {
+                       ir_nodeset_insert(&x, p);
                        DBG((rss->dbg, LEVEL_3, "\t\t\t%+F\n", p));
                }
 
                /* while X not empty */
-               while (nodeset_count(x) > 0) {
+               while (ir_nodeset_size(&x) > 0) {
                        child_t *t = obstack_alloc(&obst, sizeof(*t));
                        memset(t, 0, sizeof(*t));
 
-                       t = select_child_max_cost(rss, x, y, t, cbc);
+                       t = select_child_max_cost(rss, &x, &y, t, cbc);
 
                        if (cur_len >= cur_size) {
                                ARR_EXTO(child_t *, sks, cur_size * 2);
@@ -1114,8 +1173,8 @@ static void compute_killing_function(rss_t *rss) {
                        DBG((rss->dbg, LEVEL_2, "\t\tinsert child %+F (%.3f) into SKS at pos %d\n", t->irn, t->cost, cur_len));
 
                        sks[cur_len++] = t;
-                       remove_covered_parents(rss, x, t->irn, cbc);
-                       update_cumulated_descendent_values(rss, y, t->irn);
+                       remove_covered_parents(rss, &x, t->irn, cbc);
+                       update_cumulated_descendent_values(rss, &y, t->irn);
                }
 
                ARR_SHRINKLEN(sks, cur_len);
@@ -1148,15 +1207,13 @@ static void compute_killing_function(rss_t *rss) {
                        }
                }
 
-               del_nodeset(x);
-               del_nodeset(y);
+               ir_nodeset_destroy(&x);
+               ir_nodeset_destroy(&y);
                DEL_ARR_F(sks);
        }
 
-       DEBUG_ONLY(
-               if (firm_dbg_get_mask(rss->dbg))
-                       debug_vcg_dump_kill(rss);
-       );
+       if (rss->opts->dump_flags & RSS_DUMP_KILL)
+               debug_vcg_dump_kill(rss);
 
        del_pset(rss->cbc_set);
        obstack_free(&obst, NULL);
@@ -1166,26 +1223,33 @@ static void compute_killing_function(rss_t *rss) {
  * Adds the edge src -> tgt to the dvg. Checks if reverse edge is already there (asserts).
  */
 static INLINE void add_dvg_edge(rss_t *rss, dvg_t *dvg, ir_node *src, ir_node *tgt, int have_source) {
-       rss_edge_t *dvg_edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*dvg_edge));
+       rss_edge_t *dvg_edge;
        rss_edge_t key;
 
        if (! have_source)
-               nodeset_insert(dvg->nodes, src);
+               ir_nodeset_insert(&dvg->nodes, src);
+       else
+               assert(ir_nodeset_contains(&dvg->nodes, src) && "Wrong assumption");
 
-       nodeset_insert(dvg->nodes, tgt);
-
-       /* add an edge to our killer */
-       dvg_edge->src  = src;
-       dvg_edge->tgt  = tgt;
-       dvg_edge->next = NULL;
+       ir_nodeset_insert(&dvg->nodes, tgt);
 
        key.src = tgt;
        key.tgt = src;
        assert(pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key)) == NULL && "DVG must be acyclic!");
 
-       /* add the edge to the DVG */
-       DBG((rss->dbg, LEVEL_3, "\t\tadd edge %+F -> %+F\n", src, tgt));
-       pset_insert(dvg->edges, dvg_edge, HASH_RSS_EDGE(dvg_edge));
+       key.src = src;
+       key.tgt = tgt;
+       if (NULL != pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key))) {
+               /* add the edge to the DVG */
+               dvg_edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*dvg_edge));
+
+               dvg_edge->src  = src;
+               dvg_edge->tgt  = tgt;
+               dvg_edge->next = NULL;
+
+               DBG((rss->dbg, LEVEL_3, "\t\tadd edge %+F -> %+F\n", src, tgt));
+               pset_insert(dvg->edges, dvg_edge, HASH_RSS_EDGE(dvg_edge));
+       }
 }
 
 /**
@@ -1230,7 +1294,7 @@ static void compute_dvg(rss_t *rss, dvg_t *dvg) {
                                rss_edge_t key;
 
                                /* insert the user into the DVG and append it to the user list of u */
-                               nodeset_insert(dvg->nodes, v_irn);
+                               ir_nodeset_insert(&dvg->nodes, v_irn);
                                if (! plist_has_value(u->dvg_user_list, v_irn))
                                        plist_insert_back(u->dvg_user_list, v_irn);
 
@@ -1251,23 +1315,45 @@ static void compute_dvg(rss_t *rss, dvg_t *dvg) {
 #endif /* if 0 */
        }
 
-       DEBUG_ONLY(
-               if (firm_dbg_get_mask(rss->dbg))
-                       debug_vcg_dump_dvg(rss, dvg);
-       );
+       if (rss->opts->dump_flags & RSS_DUMP_DVG)
+               debug_vcg_dump_dvg(rss, dvg);
 }
 
 /**
  * Updates the dvg structure when a serialization edge from src -> tgt is added.
  */
 static void update_dvg(rss_t *rss, dvg_t *dvg, rss_irn_t *src, rss_irn_t *tgt) {
-       int i;
+       int i, j, idx;
+       rss_edge_t *edge;
+       rss_edge_t **arr = alloca(pset_count(dvg->edges) * sizeof(arr[0]));
 
-       add_dvg_edge(rss, dvg, tgt->irn, src->irn, 1);
+       /*
+               Add an edge from serialization target to serialization src:
+               src cannot be alive with target
+       */
+       add_dvg_edge(rss, dvg, tgt->irn, src->irn, 0);
 
+       /* Add edges to src's descendants as well, they also getting serialized. */
        for (i = ARR_LEN_SAFE(src->descendants) - 1; i >= 0; --i) {
                add_dvg_edge(rss, dvg, tgt->irn, src->descendants[i], 1);
        }
+
+       /* We also have to add edges from targets predecessors in dvg */
+       idx = 0;
+       /* We cannot insert elements into set over which we iterate ... */
+       foreach_pset(dvg->edges, edge) {
+               if (edge->tgt == tgt->irn) {
+                       arr[idx++] = edge;
+               }
+       }
+
+       for (i = 0; i < idx; ++i) {
+               edge = arr[i];
+               add_dvg_edge(rss, dvg, edge->src, src->irn, 1);
+               for (j = ARR_LEN_SAFE(src->descendants) - 1; j >= 0; --j) {
+                       add_dvg_edge(rss, dvg, edge->src, src->descendants[j], 1);
+               }
+       }
 }
 
 #if 0
@@ -1300,9 +1386,10 @@ static void accumulate_dvg_descendant_values(rss_t *rss, rss_irn_t *root, plist_
  * Needs the descendant list for all user as sorted array.
  */
 static void build_dvg_pkiller_list(rss_t *rss, dvg_t *dvg) {
+       ir_nodeset_iterator_t iter;
        ir_node *irn;
 
-       foreach_nodeset(dvg->nodes, irn) {
+       foreach_nodeset(&dvg->nodes, irn, iter) {
                rss_irn_t       *node = get_rss_irn(rss, irn);
                plist_element_t *el, *el2;
 
@@ -1343,13 +1430,14 @@ static void build_dvg_pkiller_list(rss_t *rss, dvg_t *dvg) {
  * This is a reimplementation of the MAXIMAL_ANTI_CHAIN function
  * from the DDG library 1.1 (DAG.cpp).
  */
-static nodeset *compute_maximal_antichain(rss_t *rss, dvg_t *dvg, int iteration) {
-       int         n               = nodeset_count(dvg->nodes);
+static ir_nodeset_t *compute_maximal_antichain(rss_t *rss, dvg_t *dvg, int iteration) {
+       int         n               = ir_nodeset_size(&dvg->nodes);
        int         *assignment     = alloca(n * sizeof(assignment[0]));
        int         *assignment_rev = alloca(n * sizeof(assignment_rev[0]));
        int         *idx_map        = alloca(n * sizeof(idx_map[0]));
        hungarian_problem_t *bp;
-       nodeset     *values, *temp;
+       ir_nodeset_t *values, *temp;
+       ir_nodeset_iterator_t iter;
        ir_node     *u_irn;
        int         i, j, cost, cur_chain, res;
        rss_edge_t  *dvg_edge;
@@ -1369,7 +1457,7 @@ static nodeset *compute_maximal_antichain(rss_t *rss, dvg_t *dvg, int iteration)
                and build a sorted index map for their irn indices.
        */
        i = 0;
-       foreach_nodeset(dvg->nodes, u_irn) {
+       foreach_ir_nodeset(&dvg->nodes, u_irn, iter) {
                idx_map[i++] = get_irn_idx(u_irn);
        }
        qsort(idx_map, n, sizeof(idx_map[0]), cmp_int);
@@ -1389,7 +1477,7 @@ static nodeset *compute_maximal_antichain(rss_t *rss, dvg_t *dvg, int iteration)
                path in the DVG from u to v, ie. connect all descendants(v) to v.
                desc_dvg(v) = accumulated descendants of all z in dvg_user_list(v)
        */
-       foreach_nodeset(dvg->nodes, u_irn) {
+       foreach_ir_nodeset(&dvg->nodes, u_irn, iter) {
                rss_irn_t *u        = get_rss_irn(rss, u_irn);
                int       idx_u_irn = MAP_IDX(u_irn);
 
@@ -1437,7 +1525,7 @@ static nodeset *compute_maximal_antichain(rss_t *rss, dvg_t *dvg, int iteration)
                The maximum cardinality bipartite matching gives us the minimal
                chain partition, which corresponds to the maximum anti chains.
        */
-       res = hungarian_solve(bp, assignment, &cost);
+       res = hungarian_solve(bp, assignment, &cost, 1);
        assert(res == 0 && "Bipartite matching failed!");
 
        hungarian_free(bp);
@@ -1457,7 +1545,8 @@ static nodeset *compute_maximal_antichain(rss_t *rss, dvg_t *dvg, int iteration)
                DBG((rss->dbg, LEVEL_3, "\t\t\t%3d -> %3d         %3d -> %3d\n", i, assignment[i], i, assignment_rev[i]));
        }
 
-       values    = new_nodeset(10);
+       values    = xmalloc(sizeof(values[0]));
+       ir_nodeset_init_size(values, 10);
        cur_chain = 0;
        /* Construction of the minimal chain partition */
        for (j = 0; j < n; ++j) {
@@ -1470,7 +1559,7 @@ static nodeset *compute_maximal_antichain(rss_t *rss, dvg_t *dvg, int iteration)
                        int       source;
 
                        /* there was no source for j -> we have a source of a new chain */
-                       nodeset_insert(values, xj_irn);
+                       ir_nodeset_insert(values, xj_irn);
 
                        c->elements = plist_obstack_new(phase_obst(&rss->ph));
                        c->nr       = cur_chain++;
@@ -1507,26 +1596,35 @@ static nodeset *compute_maximal_antichain(rss_t *rss, dvg_t *dvg, int iteration)
                chain such, such it is parallel with the others.
        */
        DBG((rss->dbg, LEVEL_1, "\t\tcomputing set of saturation values (MAX AC)\n"));
-       DBG((rss->dbg, LEVEL_2, "\t\tstarting with:\n"));
-       dump_nodeset(values, "\t\t\t");
+       DBG((rss->dbg, LEVEL_3, "\t\tstarting with:\n"));
+
+       DEBUG_ONLY(
+               if (firm_dbg_get_mask(rss->dbg) & LEVEL_3)
+                       dump_nodeset(values, "\t\t\t");
+       )
+
        temp = NULL;
        do {
                /*
                        We need an explicit array for the values as
                        we cannot iterate multiple times over the same
                        set at the same time. :-(((((
+                       TODO Matze: now we can, so rewrite this...
                */
-               int     n         = nodeset_count(values);
+               int     n         = ir_nodeset_size(values);
                int     i         = 0;
                ir_node **val_arr = NEW_ARR_F(ir_node *, n);
 
-               foreach_nodeset(values, u_irn)
+               foreach_ir_nodeset(values, u_irn, iter)
                        val_arr[i++] = u_irn;
 
-               if (temp)
-                       del_nodeset(temp);
+               if (temp) {
+                       ir_nodeset_destroy(temp);
+                       free(temp);
+               }
 
-               temp = new_nodeset(10);
+               temp = xmalloc(sizeof(temp[0]));
+               ir_nodeset_init_size(temp, 10);
 
                /* Select all nodes from current value set, having another node in the set as descendant. */
                for (i = 0; i < n; ++i) {
@@ -1541,8 +1639,8 @@ static nodeset *compute_maximal_antichain(rss_t *rss, dvg_t *dvg, int iteration)
 
                                        if (pset_find(dvg->edges, &key, HASH_RSS_EDGE(&key))) {
                                                /* v[j] is descendant of u -> remove u and break */
-                                               nodeset_insert(temp, u->irn);
-                                               nodeset_remove(values, u->irn);
+                                               ir_nodeset_insert(temp, u->irn);
+                                               ir_nodeset_remove(values, u->irn);
 
                                                DBG((rss->dbg, LEVEL_3, "\t\t\tremoving %+F from values, adding it to temp\n", u->irn));
 
@@ -1553,7 +1651,7 @@ static nodeset *compute_maximal_antichain(rss_t *rss, dvg_t *dvg, int iteration)
                }
 
                /* Try to insert the chain predecessor of all selected u's */
-               foreach_nodeset(temp, u_irn) {
+               foreach_ir_nodeset(temp, u_irn, iter) {
                        rss_irn_t       *u  = get_rss_irn(rss, u_irn);
                        chain_t         *c  = u->chain;
                        plist_element_t *el = plist_find_value(c->elements, u_irn);
@@ -1561,25 +1659,30 @@ static nodeset *compute_maximal_antichain(rss_t *rss, dvg_t *dvg, int iteration)
                        assert(el && "Missing element in chain!");
 
                        /* If u has predecessor in chain: insert the predecessor */
-                       if (el = plist_element_get_prev(el)) {
-                               nodeset_insert(values, plist_element_get_value(el));
+                       if (el == plist_element_get_prev(el)) {
+                               ir_nodeset_insert(values, plist_element_get_value(el));
                                DBG((rss->dbg, LEVEL_3, "\t\t\tadding %+F to values\n", plist_element_get_value(el)));
                        }
                }
 
 
                DEL_ARR_F(val_arr);
-       } while (nodeset_count(temp) > 0);
+       } while (ir_nodeset_size(temp) > 0);
 
        DBG((rss->dbg, LEVEL_2, "\t\tfinal set:\n"));
        DEBUG_ONLY(
                if (firm_dbg_get_mask(rss->dbg) & LEVEL_2) {
                        dump_nodeset(values, "\t\t\t");
-                       debug_vcg_dump_pkg(rss, values, iteration);
                }
        );
 
-       del_nodeset(temp);
+       if (rss->opts->dump_flags & RSS_DUMP_MAXAC)
+               debug_vcg_dump_pkg(rss, values, iteration);
+
+       if(temp != NULL) {
+               ir_nodeset_destroy(temp);
+               free(temp);
+       }
 
        return values;
 
@@ -1589,8 +1692,8 @@ static nodeset *compute_maximal_antichain(rss_t *rss, dvg_t *dvg, int iteration)
 /**
  * Computes the best serialization between two nodes of sat_vals.
  */
-static serialization_t *compute_best_admissible_serialization(rss_t *rss, nodeset *sat_vals, serialization_t *ser, int num_regs) {
-       int        n                    = nodeset_count(sat_vals);
+static serialization_t *compute_best_admissible_serialization(rss_t *rss, ir_nodeset_t *sat_vals, serialization_t *ser, int num_regs) {
+       int        n                    = ir_nodeset_size(sat_vals);
        int        n_idx                = ARR_LEN_SAFE(rss->idx_map);
        int        i                    = 0;
        ir_node    **val_arr            = alloca(n * sizeof(val_arr[0]));
@@ -1598,16 +1701,17 @@ static serialization_t *compute_best_admissible_serialization(rss_t *rss, nodese
        bitset_t   *bs_vdesc            = bitset_alloca(n_idx);
        bitset_t   *bs_tmp              = bitset_alloca(n_idx);
        bitset_t   *bs_ukilldesc        = bitset_alloca(n_idx);
-       unsigned   best_benefit         = UINT_MAX;
-       unsigned   best_omega2          = UINT_MAX;
-       unsigned   best_benefit_omega20 = UINT_MAX;
-       int        has_positive_omega1  = 0;
+       int        best_benefit         = INT_MAX;
+       int        best_omega2          = INT_MAX;
+       int        best_benefit_omega20 = INT_MAX;
+       int        has_omega1           = 0;
        int        j, k;
        ir_node    *irn;
+       ir_nodeset_iterator_t iter;
        rss_edge_t min_benefit_edge;
        rss_edge_t min_omega20_edge;
-       rss_irn_t  *ser_u_omega1, *ser_v_omega1;
-       rss_irn_t  *ser_u_omega20, *ser_v_omega20;
+       rss_irn_t  *ser_u_omega1 = NULL, *ser_v_omega1 = NULL;
+       rss_irn_t  *ser_u_omega20 = NULL, *ser_v_omega20 = NULL;
 
        DBG((rss->dbg, LEVEL_1, "\tcomputing admissible serializations:\n"));
 
@@ -1617,7 +1721,7 @@ static serialization_t *compute_best_admissible_serialization(rss_t *rss, nodese
                set at the same time. :-(((((
        */
 
-       foreach_nodeset(sat_vals, irn) {
+       foreach_ir_nodeset(sat_vals, irn, iter) {
                val_arr[i++] = irn;
                bitset_set(bs_sv, BLOCK_IDX_MAP(rss, irn));
        }
@@ -1637,20 +1741,27 @@ static serialization_t *compute_best_admissible_serialization(rss_t *rss, nodese
        In this case there is no serialization which could
        reduce the registerpressure
 */
-#define IS_UNSERIALIZABLE_NODE(rss_node)           \
-       ((plist_count(rss_node->pkiller_list) == 1) && \
-       is_Sink(rss_node->killer)                   && \
-       (plist_count(rss_node->kill_value_list) == 0))
+#define IS_UNSERIALIZABLE_NODE(rss_node)                  \
+       (                                                     \
+               (                                                 \
+                       (plist_count(rss_node->pkiller_list) == 1) && \
+                       is_Sink(rss_node->killer)                  && \
+                       (rss_node->kill_count                == 0)    \
+               )                            ||                   \
+               be_is_Barrier(rss_node->irn) ||                   \
+               be_is_Keep(rss_node->irn)                         \
+       )
 
        /* for all u in sat_vals */
        for (i = 0; i < n; ++i) {
-               rss_irn_t       *u       = get_rss_irn(rss, val_arr[i]);
-               int             u_height = get_irn_height(rss->h, val_arr[i]);
+               rss_irn_t       *u = get_rss_irn(rss, val_arr[i]);
                plist_element_t *el;
 
                /* ignore nodes where serialization does not help */
-               if (IS_UNSERIALIZABLE_NODE(u))
+               if (IS_UNSERIALIZABLE_NODE(u)) {
+                       DBG((rss->dbg, LEVEL_3, "\t\t\t%+F considered unserializable\n", u->irn));
                        continue;
+               }
 
                /* accumulate all descendants of all pkiller(u) */
                bitset_clear_all(bs_ukilldesc);
@@ -1674,12 +1785,15 @@ static serialization_t *compute_best_admissible_serialization(rss_t *rss, nodese
                        ir_node   *v_irn   = val_arr[j];
                        rss_irn_t *v       = get_rss_irn(rss, v_irn);
                        unsigned  v_height = get_irn_height(rss->h, v_irn);
-                       unsigned  omega1, omega2, is_pkiller;
+                       int       omega1, omega2, is_pkiller;
 
                        /* v cannot be serialized with itself
                         * ignore nodes where serialization does not help */
-                       if (i == j || IS_UNSERIALIZABLE_NODE(v))
+                       if (i == j || IS_UNSERIALIZABLE_NODE(v)) {
+                               if (i != j)
+                                       DBG((rss->dbg, LEVEL_3, "\t\t\t%+F considered unserializable\n", v->irn));
                                continue;
+                       }
 
                        /* get descendants of v */
                        bitset_clear_all(bs_vdesc);
@@ -1690,14 +1804,14 @@ static serialization_t *compute_best_admissible_serialization(rss_t *rss, nodese
                        }
 
                        /* if v is in pkiller(u) */
-                       is_pkiller = plist_has_value(u->pkiller_list, val_arr[j]);
+                       is_pkiller = plist_has_value(u->pkiller_list, v_irn);
 
                        /* for all vv in pkiller(u) */
                        foreach_plist(u->pkiller_list, el) {
                                ir_node *vv_irn  = plist_element_get_value(el);
                                int     add_edge;
 
-                               if (is_Sink(vv_irn))
+                               if (is_Sink(vv_irn) || is_cfop(vv_irn))
                                        continue;
 
                                if (is_pkiller)
@@ -1711,8 +1825,8 @@ static serialization_t *compute_best_admissible_serialization(rss_t *rss, nodese
                                */
 
                                if (add_edge) {
-                                       unsigned vv_height = get_irn_height(rss->h, vv_irn);
-                                       unsigned mu1, mu2, critical_path_cost;
+                                       int vv_height = get_irn_height(rss->h, vv_irn);
+                                       int mu1, mu2, critical_path_cost;
 
                                        /*
                                                mu1 = | descendants(v) cut sat_vals |
@@ -1733,13 +1847,11 @@ static serialization_t *compute_best_admissible_serialization(rss_t *rss, nodese
                                                mu2 = 0;
                                        }
 
-                                       //assert(mu1 >= mu2);
-
                                        /* omega1 = mu1 - mu2 */
-                                       omega1 = mu2 >= mu2 ? mu1 - mu2 : 0;
+                                       omega1 = mu1 - mu2;
 
-                                       if (omega1 > 0)
-                                               has_positive_omega1 = 1;
+                                       if (omega1 != 0)
+                                               has_omega1 = 1;
 
                                        /* omega2 = increase of critical path */
                                        critical_path_cost =
@@ -1754,25 +1866,27 @@ static serialization_t *compute_best_admissible_serialization(rss_t *rss, nodese
                                        omega2 = critical_path_cost > rss->max_height ? critical_path_cost - rss->max_height : 0;
 
                                        /* this keeps track of the edge with the best benefit */
-                                       if (num_regs - omega1 < best_benefit) {
+                                       if (omega1 >= num_regs - n && omega1 < best_benefit) {
                                                min_benefit_edge.src = v_irn;
                                                min_benefit_edge.tgt = vv_irn;
 
                                                ser_u_omega1 = u;
                                                ser_v_omega1 = v;
 
-                                               best_benefit = num_regs - omega1;
+                                               best_benefit    = omega1;
+                                               ser->new_killer = is_pkiller;
                                        }
 
                                        /* this keeps track of the edge with the best omega1 costs where omega2 == 0 */
-                                       if (omega2 == 0 && (num_regs - omega1 < best_benefit_omega20)) {
+                                       if (omega2 == 0 && omega1 >= num_regs - n && omega1 < best_benefit_omega20) {
                                                min_omega20_edge.src = v_irn;
                                                min_omega20_edge.tgt = vv_irn;
 
                                                ser_u_omega20 = u;
                                                ser_v_omega20 = v;
 
-                                               best_benefit_omega20 = num_regs - omega1;
+                                               best_benefit_omega20 = omega1;
+                                               ser->new_killer      = is_pkiller;
                                        }
 
                                        best_omega2 = MIN(best_omega2, omega2);
@@ -1784,7 +1898,7 @@ static serialization_t *compute_best_admissible_serialization(rss_t *rss, nodese
                } /* for all v in sat_vals */
        } /* for all u in sat_vals */
 
-       if (! has_positive_omega1)
+       if (! has_omega1)
                return NULL;
 
        if (best_omega2 == 0) {
@@ -1818,14 +1932,16 @@ static void perform_value_serialization_heuristic(rss_t *rss) {
        bitset_t *abi_ign_bs     = bitset_alloca(arch_register_class_n_regs(rss->cls));
        int      available_regs, iteration;
        dvg_t    dvg;
-       nodeset  *sat_vals;
+       ir_nodeset_t *sat_vals;
        pset *ser_set = new_pset(cmp_rss_edges, 20);
 
        /* available_regs = R = |arch_non_ignore_regs cut ~abi_ignore_regs| */
        arch_put_non_ignore_regs(rss->arch_env, rss->cls, arch_nonign_bs);
        be_abi_put_ignore_regs(rss->abi, rss->cls, abi_ign_bs);
        bitset_andnot(arch_nonign_bs, abi_ign_bs);
-       available_regs = bitset_popcnt(arch_nonign_bs);
+       available_regs  = bitset_popcnt(arch_nonign_bs);
+       //num_live = pset_count(rss->live_block);
+       //available_regs -= num_live < available_regs ? num_live : 0;
 
        DBG((rss->dbg, LEVEL_1, "\n\t#available regs: %d\n\n", available_regs));
 
@@ -1834,7 +1950,7 @@ static void perform_value_serialization_heuristic(rss_t *rss) {
                V    = set of all nodes we are currently interested in
                E_dv = there is an edge from u to v iff v is a descendant of killer(u), forall u, v in V
        */
-       dvg.nodes = new_nodeset(plist_count(rss->nodes));
+       ir_nodeset_init_size(&dvg.nodes, plist_count(rss->nodes));
        dvg.edges = new_pset(cmp_rss_edges, plist_count(rss->nodes) * 5);
        compute_dvg(rss, &dvg);
 
@@ -1846,16 +1962,15 @@ static void perform_value_serialization_heuristic(rss_t *rss) {
        DBG((rss->dbg, LEVEL_1, "\tcomputing maximal antichain:\n"));
        iteration = 0;
        sat_vals  = compute_maximal_antichain(rss, &dvg, iteration++);
-       while (sat_vals && (nodeset_count(sat_vals) > available_regs)) {
+       while (sat_vals && (ir_nodeset_size(sat_vals) > available_regs)) {
                serialization_t *ser, best_ser;
                rss_edge_t      *edge = obstack_alloc(phase_obst(&rss->ph), sizeof(*edge));
-               rss_irn_t       *tgt, *src;
                ir_node         *dep_src, *dep_tgt;
 
                best_ser.edge = edge;
                ser = compute_best_admissible_serialization(rss, sat_vals, &best_ser, available_regs);
 
-               DBG((rss->dbg, LEVEL_1, "\tcurrent register saturation %d, target %d\n", nodeset_count(sat_vals), available_regs));
+               DBG((rss->dbg, LEVEL_1, "\tcurrent register saturation %d, target %d\n", ir_nodeset_size(sat_vals), available_regs));
 
                if (! ser) {
                        DBG((rss->dbg, LEVEL_1, "\tno RS improving serialization found, breaking at iteration %d\n", iteration));
@@ -1875,7 +1990,10 @@ static void perform_value_serialization_heuristic(rss_t *rss) {
                /* update the dvg */
                update_dvg(rss, &dvg, ser->v, ser->u);
                update_dvg(rss, &dvg, get_rss_irn(rss, ser->edge->src), get_rss_irn(rss, ser->edge->tgt));
-               del_nodeset(sat_vals);
+               if(sat_vals != NULL) {
+                       ir_nodeset_destroy(sat_vals);
+                       free(sat_vals);
+               }
 
                dep_src = skip_Proj(ser->edge->src);
                dep_tgt = ser->edge->tgt;
@@ -1887,23 +2005,12 @@ static void perform_value_serialization_heuristic(rss_t *rss) {
                /* TODO: try to find a cheaper way for updating height information */
                rss->max_height = heights_recompute_block(rss->h, rss->block);
 
-#if 0
-               src = get_rss_irn(rss, ser->edge->src);
-               tgt = get_rss_irn(rss, ser->edge->tgt);
-               src->killer = tgt->irn;
-               del_nodeset(dvg.nodes);
-               del_pset(dvg.edges);
-               dvg.nodes = new_nodeset(plist_count(rss->nodes));
-               dvg.edges = new_pset(cmp_rss_edges, plist_count(rss->nodes) * 5);
-               compute_dvg(rss, &dvg);
-#endif /* if 0 */
-
                /* Recompute the antichain for next serialization */
                DBG((rss->dbg, LEVEL_1, "\tre-computing maximal antichain:\n"));
                sat_vals = compute_maximal_antichain(rss, &dvg, iteration++);
        }
 
-       del_nodeset(dvg.nodes);
+       ir_nodeset_destroy(&dvg.nodes);
        del_pset(dvg.edges);
 }
 
@@ -1938,23 +2045,51 @@ static void process_block(ir_node *block, void *env) {
                rss->cls = cls;
                DBG((rss->dbg, LEVEL_1, "register class %s\n", arch_register_class_name(cls)));
 
+               /* Get all live value at end of Block having current register class */
+               rss->live_block = pset_new_ptr(10);
+               be_liveness_end_of_block(rss->liveness, rss->arch_env, rss->cls, rss->block, rss->live_block);
+
                /* reset the list of interesting nodes */
                plist_clear(rss->nodes);
                plist_insert_back(rss->nodes, _sink);
 
                /* collect all nodes having a certain register class */
                foreach_out_edge(block, edge) {
-                       ir_node *irn = get_edge_src_irn(edge);
+                       ir_node *irn  = get_edge_src_irn(edge);
+                       ir_mode *mode = get_irn_mode(irn);
 
-                       if (get_irn_mode(irn) == mode_T || be_is_Keep(irn) || be_is_Barrier(skip_Proj(irn)))
+                       /*
+                               We skip:
+                               - mode_T nodes (the projs are asked)
+                               - mode_X nodes (control flow nodes are always scheduled last)
+                               - Keeps (they are always immediately scheduled)
+                               - Phi (same as Keep)
+                       */
+                       if (mode == mode_T || mode == mode_X || is_Phi(irn))
                                continue;
 
-                       if (! arch_irn_is(rss->arch_env, irn, ignore) && arch_get_irn_reg_class(rss->arch_env, irn, -1) == cls) {
-                               plist_insert_back(rss->nodes, irn);
+                       /*
+                               In case of a proj, we skip
+                               - Barrier (they are a Barrier :)
+                               - Start
+                               - the Proj itself, as it's scheduled always with it's super node
+                       */
+                       if (is_Proj(irn)) {
+                               ir_node *pred = get_Proj_pred(irn);
+                               if (be_is_Barrier(pred) || is_Start(pred))
+                                       continue;
+                       }
 
-                               /* calculate the descendants and consumer for each node in the block */
-                               collect_node_info(rss, irn);
+                       /* calculate the descendants and consumer for each node in the block */
+                       collect_node_info(rss, skip_Proj(irn));
+
+                       if (be_is_Keep(irn))
+                               continue;
+
+                       if (! arch_irn_is(rss->arch_env, irn, ignore) && arch_get_irn_reg_class(rss->arch_env, irn, -1) == cls) {
+                               plist_insert_back(rss->nodes, skip_Proj(irn));
                        }
+                       //}
                }
 
                /* compute the potential killing set PK(G) */
@@ -1968,29 +2103,51 @@ static void process_block(ir_node *block, void *env) {
                        add the necessary dependencies to the irg.
                */
                perform_value_serialization_heuristic(rss);
+
+               del_pset(rss->live_block);
        }
 
        phase_free(&rss->ph);
 }
 
+/**
+ * Register the options.
+ */
+void be_init_schedrss(void) {
+       lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be");
+       lc_opt_entry_t *sched_grp = lc_opt_get_grp(be_grp, "sched");
+       lc_opt_entry_t *rss_grp = lc_opt_get_grp(sched_grp, "rss");
+
+       lc_opt_add_table(rss_grp, rss_option_table);
+}
+
+BE_REGISTER_MODULE_CONSTRUCTOR(be_init_schedrss);
+
 /**
  * Preprocess the irg for scheduling.
  */
 void rss_schedule_preparation(const be_irg_t *birg) {
+       ir_graph *irg = be_get_birg_irg(birg);
        rss_t rss;
 
        FIRM_DBG_REGISTER(rss.dbg, "firm.be.sched.rss");
 
-       firm_dbg_set_mask(rss.dbg, LEVEL_1 | LEVEL_2 | LEVEL_3);
+       //firm_dbg_set_mask(rss.dbg, LEVEL_1 | LEVEL_2 | LEVEL_3);
 
-       init_rss_special_nodes(birg->irg);
+       init_rss_special_nodes(irg);
 
-       rss.irg      = birg->irg;
-       rss.arch_env = birg->main_env->arch_env;
+       rss.irg      = irg;
+       rss.arch_env = be_get_birg_arch_env(birg);
        rss.abi      = birg->abi;
-       rss.h        = heights_new(birg->irg);
+       rss.h        = heights_new(irg);
        rss.nodes    = plist_new();
-       irg_block_walk_graph(birg->irg, NULL, process_block, &rss);
+       rss.opts     = &rss_options;
+       rss.liveness = be_liveness(irg);
+       irg_block_walk_graph(irg, NULL, process_block, &rss);
        heights_free(rss.h);
        plist_free(rss.nodes);
+       be_liveness_free(rss.liveness);
+
+       if (birg->main_env->options->dump_flags & DUMP_SCHED)
+               be_dump(rss.irg, "-rss", dump_ir_block_graph);
 }