bescripts: Copy all common node attributes into the constructor variants.
[libfirm] / ir / be / bepbqpcoloring.c
index 3ae587f..f9de569 100644 (file)
  * @brief       PBQP based register allocation.
  * @author      Thomas Bersch
  * @date        27.11.2009
- * @version     $Id: bechordal.c 26750 2009-11-27 09:37:43Z bersch $
  */
 
-/*     miscellaneous includes */
+/* miscellaneous includes */
 #include "config.h"
 
-#ifdef FIRM_KAPS
-
 #include "debug.h"
 #include "error.h"
 
 #include "irdom.h"
+#include "irdump.h"
 #include "iredges_t.h"
 #include "irprintf.h"
 #include "irgwalk.h"
+#include "irtools.h"
 #include "time.h"
+#include "execfreq_t.h"
+#include "bipartite.h"
 
 /* libfirm/ir/be includes */
 #include "bearch.h"
 #include "beutil.h"
 #include "plist.h"
 #include "pqueue.h"
+#include "becopyopt.h"
 
 /* pbqp includes */
 #include "kaps.h"
 #include "matrix.h"
 #include "vector.h"
 #include "vector_t.h"
-#include "heuristical.h"
+#include "heuristical_co.h"
+#include "heuristical_co_ld.h"
 #include "pbqp_t.h"
 #include "html_dumper.h"
 #include "pbqp_node_t.h"
 #include "pbqp_node.h"
+#include "pbqp_edge_t.h"
+
+#define TIMER                 0
+#define PRINT_RPEO            0
+#define USE_BIPARTIT_MATCHING 0
+#define DO_USEFUL_OPT         1
+
 
+static int use_exec_freq     = true;
+static int use_late_decision = false;
 
-typedef struct _be_pbqp_alloc_env_t {
-       pbqp                                            *pbqp_inst;             /**< PBQP instance for register allocation */
-       be_irg_t                        *birg;          /**< Back-end IRG session. */
-       ir_graph                        *irg;           /**< The graph under examination. */
-       const arch_register_class_t *cls;                       /**< Current processed register class */
+typedef struct be_pbqp_alloc_env_t {
+       pbqp_t                      *pbqp_inst;         /**< PBQP instance for register allocation */
+       ir_graph                    *irg;               /**< The graph under examination. */
+       const arch_register_class_t *cls;               /**< Current processed register class */
        be_lv_t                     *lv;
-       bitset_t                    *ignored_regs;
-       pbqp_matrix                                     *ife_matrix_dummy;
-       pbqp_matrix                                     *aff_matrix_dummy;
-       plist_t                                         *rpeo;
-       unsigned                                        *restr_nodes;
-       be_chordal_env_t                        *env;
+       bitset_t                    *allocatable_regs;
+       pbqp_matrix_t               *ife_matrix_template;
+       pbqp_matrix_t               *aff_matrix_template;
+       plist_t                     *rpeo;
+       unsigned                    *restr_nodes;
+       unsigned                    *ife_edge_num;
+       ir_execfreq_int_factors      execfreq_factors;
+       be_chordal_env_t            *env;
 } be_pbqp_alloc_env_t;
 
 
-#define is_Reg_Phi(irn)                (is_Phi(irn) && mode_is_data(get_irn_mode(irn)))
-#define get_Perm_src(irn)      (get_irn_n(get_Proj_pred(irn), get_Proj_proj(irn)))
-#define is_Perm_Proj(irn)      (is_Proj(irn) && be_is_Perm(get_Proj_pred(irn)))
-
-static inline int is_2addr_code(const arch_register_req_t *req)
-{
-       return (req->type & arch_register_req_type_should_be_same) != 0;
-}
+#define is_Reg_Phi(irn)                                        (is_Phi(irn) && mode_is_data(get_irn_mode(irn)))
+#define get_Perm_src(irn)                                      (get_irn_n(get_Proj_pred(irn), get_Proj_proj(irn)))
+#define is_Perm_Proj(irn)                                      (is_Proj(irn) && be_is_Perm(get_Proj_pred(irn)))
+#define insert_edge(pbqp, src_node, trg_node, template_matrix) (add_edge_costs(pbqp, get_irn_idx(src_node), get_irn_idx(trg_node), pbqp_matrix_copy(pbqp, template_matrix)))
+#define get_free_regs(restr_nodes, cls, irn)                   (arch_register_class_n_regs(cls) - restr_nodes[get_irn_idx(irn)])
 
+static const lc_opt_table_entry_t options[] = {
+       LC_OPT_ENT_BOOL("exec_freq", "use exec_freq",  &use_exec_freq),
+       LC_OPT_ENT_BOOL("late_decision", "use late decision for register allocation",  &use_late_decision),
+       LC_OPT_LAST
+};
 
 #if KAPS_DUMP
 static FILE *my_open(const be_chordal_env_t *env, const char *prefix, const char *suffix)
 {
-       FILE *result;
-       char buf[1024];
-       size_t i, n;
-       char *tu_name;
-
-       n = strlen(env->birg->main_env->cup_name);
+       FILE       *result;
+       char        buf[1024];
+       size_t      i;
+       size_t      n;
+       char       *tu_name;
+       const char *cup_name = be_get_irg_main_env(env->irg)->cup_name;
+
+       n = strlen(cup_name);
        tu_name = XMALLOCN(char, n + 1);
-       strcpy(tu_name, env->birg->main_env->cup_name);
+       strcpy(tu_name, cup_name);
        for (i = 0; i < n; ++i)
                if (tu_name[i] == '.')
                        tu_name[i] = '_';
@@ -110,7 +127,7 @@ static FILE *my_open(const be_chordal_env_t *env, const char *prefix, const char
        ir_snprintf(buf, sizeof(buf), "%s%s_%F_%s%s", prefix, tu_name, env->irg, env->cls->name, suffix);
        xfree(tu_name);
        result = fopen(buf, "wt");
-       if(result == NULL) {
+       if (result == NULL) {
                panic("Couldn't open '%s' for writing.", buf);
        }
 
@@ -119,20 +136,25 @@ static FILE *my_open(const be_chordal_env_t *env, const char *prefix, const char
 #endif
 
 
-static unsigned create_pbqp_node(be_pbqp_alloc_env_t *pbqp_alloc_env, ir_node *irn) {
+static void create_pbqp_node(be_pbqp_alloc_env_t *pbqp_alloc_env, ir_node *irn)
+{
        const arch_register_class_t *cls = pbqp_alloc_env->cls;
-       pbqp     *pbqp_inst              = pbqp_alloc_env->pbqp_inst;
-       bitset_t *ignored_regs           = pbqp_alloc_env->ignored_regs;
+       pbqp_t   *pbqp_inst              = pbqp_alloc_env->pbqp_inst;
+       bitset_t *allocatable_regs       = pbqp_alloc_env->allocatable_regs;
        unsigned  colors_n               = arch_register_class_n_regs(cls);
        unsigned  cntConstrains          = 0;
 
        /* create costs vector depending on register constrains */
-       struct vector *costs_vector = vector_alloc(pbqp_inst, colors_n);
+       vector_t *costs_vector = vector_alloc(pbqp_inst, colors_n);
 
        /* set costs depending on register constrains */
        unsigned idx;
-       for(idx = 0; idx < colors_n; idx++) {
-               if(bitset_is_set(ignored_regs, idx) || !arch_reg_out_is_allocatable(irn, arch_register_for_index(cls, idx))) {
+       for (idx = 0; idx < colors_n; idx++) {
+               const arch_register_req_t *req = arch_get_irn_register_req(irn);
+               const arch_register_t     *reg = arch_register_for_index(cls, idx);
+               if (!bitset_is_set(allocatable_regs, idx)
+                   || !arch_reg_is_allocatable(req, reg)) {
+                       /* constrained */
                        vector_set(costs_vector, idx, INF_COSTS);
                        cntConstrains++;
                }
@@ -140,100 +162,165 @@ static unsigned create_pbqp_node(be_pbqp_alloc_env_t *pbqp_alloc_env, ir_node *i
 
        /* add vector to pbqp node */
        add_node_costs(pbqp_inst, get_irn_idx(irn), costs_vector);
+       pbqp_alloc_env->restr_nodes[get_irn_idx(irn)] = cntConstrains;
+}
 
-       /* return number of free selectable registers */
-       return (colors_n - cntConstrains);
+static void insert_ife_edge(be_pbqp_alloc_env_t *pbqp_alloc_env, ir_node *src_node, ir_node *trg_node)
+{
+       pbqp_t                      *pbqp                = pbqp_alloc_env->pbqp_inst;
+       const arch_register_class_t *cls                 = pbqp_alloc_env->cls;
+       pbqp_matrix_t               *ife_matrix_template = pbqp_alloc_env->ife_matrix_template;
+       unsigned                    *restr_nodes         = pbqp_alloc_env->restr_nodes;
+
+       if (get_edge(pbqp, get_irn_idx(src_node), get_irn_idx(trg_node)) == NULL) {
+
+               /* increase ife edge counter */
+               pbqp_alloc_env->ife_edge_num[get_irn_idx(src_node)]++;
+               pbqp_alloc_env->ife_edge_num[get_irn_idx(trg_node)]++;
+
+#if DO_USEFUL_OPT || USE_BIPARTIT_MATCHING
+               /* do useful optimization to speed up pbqp solving (we can do this because we know our matrix) */
+               if (get_free_regs(restr_nodes, cls, src_node) == 1 && get_free_regs(restr_nodes, cls, trg_node) == 1) {
+                       assert(vector_get_min_index(get_node(pbqp, get_irn_idx(src_node))->costs) !=
+                              vector_get_min_index(get_node(pbqp, get_irn_idx(trg_node))->costs) &&
+                              "Interfering nodes must not have the same register!");
+                       return;
+               }
+               if (get_free_regs(restr_nodes, cls, src_node) == 1 || get_free_regs(restr_nodes, cls, trg_node) == 1) {
+                       if (get_free_regs(restr_nodes, cls, src_node) == 1) {
+                               unsigned idx = vector_get_min_index(get_node(pbqp, get_irn_idx(src_node))->costs);
+                               vector_set(get_node(pbqp, get_irn_idx(trg_node))->costs, idx, INF_COSTS);
+                       }
+                       else {
+                               unsigned idx = vector_get_min_index(get_node(pbqp, get_irn_idx(trg_node))->costs);
+                               vector_set(get_node(pbqp, get_irn_idx(src_node))->costs, idx, INF_COSTS);
+                       }
+                       return;
+               }
+#endif
+               /* insert interference edge */
+               insert_edge(pbqp, src_node, trg_node, ife_matrix_template);
+       }
 }
 
-static void build_graph_walker(ir_node *irn, void *env) {
-       be_pbqp_alloc_env_t         *pbqp_alloc_env = env;
-       pbqp                                            *pbqp_inst              = pbqp_alloc_env->pbqp_inst;
-       const arch_register_class_t *cls            = pbqp_alloc_env->cls;
-       const arch_register_req_t   *req            = arch_get_register_req_out(irn);
-       unsigned pos, max;
+static void insert_afe_edge(be_pbqp_alloc_env_t *pbqp_alloc_env, ir_node *src_node, ir_node *trg_node, int pos)
+{
+       pbqp_t                      *pbqp        = pbqp_alloc_env->pbqp_inst;
+       const arch_register_class_t *cls         = pbqp_alloc_env->cls;
+       unsigned                    *restr_nodes = pbqp_alloc_env->restr_nodes;
+       pbqp_matrix_t               *afe_matrix  = pbqp_matrix_alloc(pbqp, arch_register_class_n_regs(cls), arch_register_class_n_regs(cls));
+       unsigned                     colors_n    = arch_register_class_n_regs(cls);
+
+       if (get_edge(pbqp, get_irn_idx(src_node), get_irn_idx(trg_node)) == NULL) {
+               if (use_exec_freq) {
+                       /* get exec_freq for copy_block */
+                       ir_node *root_bl = get_nodes_block(src_node);
+                       ir_node *copy_bl = is_Phi(src_node) ? get_Block_cfgpred_block(root_bl, pos) : root_bl;
+                       int      res     = get_block_execfreq_int(&pbqp_alloc_env->execfreq_factors, copy_bl);
+
+                       /* create afe-matrix */
+                       unsigned row, col;
+                       for (row = 0; row < colors_n; row++) {
+                               for (col = 0; col < colors_n; col++) {
+                                       if (row != col)
+                                               pbqp_matrix_set(afe_matrix, row, col, (num)res);
+                               }
+                       }
+               }
+               else {
+                       afe_matrix = pbqp_alloc_env->aff_matrix_template;
+               }
+#if DO_USEFUL_OPT || USE_BIPARTIT_MATCHING
+               /* do useful optimization to speed up pbqp solving */
+               if (get_free_regs(restr_nodes, cls, src_node) == 1 && get_free_regs(restr_nodes, cls, trg_node) == 1) {
+                       return;
+               }
+               if (get_free_regs(restr_nodes, cls, src_node) == 1 || get_free_regs(restr_nodes, cls, trg_node) == 1) {
+                       if (get_free_regs(restr_nodes, cls, src_node) == 1) {
+                               unsigned regIdx = vector_get_min_index(get_node(pbqp, get_irn_idx(src_node))->costs);
+                               vector_add_matrix_col(get_node(pbqp, get_irn_idx(trg_node))->costs, afe_matrix, regIdx);
+                       }
+                       else {
+                               unsigned regIdx = vector_get_min_index(get_node(pbqp, get_irn_idx(trg_node))->costs);
+                               vector_add_matrix_col(get_node(pbqp, get_irn_idx(src_node))->costs, afe_matrix, regIdx);
+                       }
+                       return;
+               }
+#endif
+               /* insert interference edge */
+               insert_edge(pbqp, src_node, trg_node, afe_matrix);
+       }
+}
 
-       if (arch_irn_consider_in_reg_alloc(cls, irn))
-               return;
+static void create_affinity_edges(ir_node *irn, void *env)
+{
+       be_pbqp_alloc_env_t         *pbqp_alloc_env = (be_pbqp_alloc_env_t*)env;
+       const arch_register_class_t *cls            = pbqp_alloc_env->cls;
+       const arch_register_req_t   *req            = arch_get_irn_register_req(irn);
+       unsigned                     pos;
+       unsigned                     max;
 
        if (is_Reg_Phi(irn)) { /* Phis */
-               for (pos=0, max=get_irn_arity(irn); pos<max; ++pos) {
+               for (pos = 0, max = get_irn_arity(irn); pos < max; ++pos) {
                        ir_node *arg = get_irn_n(irn, pos);
-                       //add_edges(co, irn, arg, co->get_costs(co, irn, arg, pos));
 
                        if (!arch_irn_consider_in_reg_alloc(cls, arg))
                                continue;
 
                        /* no edges to itself */
-                       if(irn == arg) {
+                       if (irn == arg) {
                                continue;
                        }
 
-                       if(get_edge(pbqp_inst, get_irn_idx(irn), get_irn_idx(arg)) == NULL) {
-                               /* copy matrix */
-                               struct pbqp_matrix *matrix = pbqp_matrix_copy(pbqp_inst, pbqp_alloc_env->aff_matrix_dummy);
-                               /* add costs matrix to affinity edge */
-                               add_edge_costs(pbqp_inst, get_irn_idx(irn), get_irn_idx(arg) , matrix);
-                       }
+                       insert_afe_edge(pbqp_alloc_env, irn, arg, pos);
                }
        }
        else if (is_Perm_Proj(irn)) { /* Perms */
                ir_node *arg = get_Perm_src(irn);
-               //add_edges(co, irn, arg, co->get_costs(co, irn, arg, 0));
-
                if (!arch_irn_consider_in_reg_alloc(cls, arg))
                        return;
 
-               if(get_edge(pbqp_inst, get_irn_idx(irn), get_irn_idx(arg)) == NULL) {
-                       /* copy matrix */
-                       struct pbqp_matrix *matrix = pbqp_matrix_copy(pbqp_inst, pbqp_alloc_env->aff_matrix_dummy);
-                       /* add costs matrix to affinity edge */
-                       add_edge_costs(pbqp_inst, get_irn_idx(irn), get_irn_idx(arg) , matrix);
-               }
-       }
-       else { /* 2-address code */
-               if (is_2addr_code(req)) {
-                       const unsigned other = req->other_same;
-                       int i;
-
-                       for (i = 0; 1U << i <= other; ++i) {
-                               if (other & (1U << i)) {
-                                       ir_node *other = get_irn_n(skip_Proj(irn), i);
-//                                     if (!arch_irn_is_ignore(other)) {
-                                               //add_edges(co, irn, other, co->get_costs(co, irn, other, 0));
-                                               if (!arch_irn_consider_in_reg_alloc(cls, other))
-                                                       continue;
-
-                                               /* no edges to itself */
-                                               if(irn == other) {
-                                                       continue;
-                                               }
+               insert_afe_edge(pbqp_alloc_env, irn, arg, -1);
+       } else if (arch_register_req_is(req, should_be_same)) {
+               const unsigned other = req->other_same;
+               int            i;
 
-                                               if(get_edge(pbqp_inst, get_irn_idx(irn), get_irn_idx(other)) == NULL) {
-                                                       /* copy matrix */
-                                                       struct pbqp_matrix *matrix = pbqp_matrix_copy(pbqp_inst, pbqp_alloc_env->aff_matrix_dummy);
-                                                       /* add costs matrix to affinity edge */
-                                                       add_edge_costs(pbqp_inst, get_irn_idx(irn), get_irn_idx(other) , matrix);
-                                               }
-//                                     }
+               for (i = 0; 1U << i <= other; ++i) {
+                       if (other & (1U << i)) {
+                               ir_node *other = get_irn_n(skip_Proj(irn), i);
+                               if (!arch_irn_consider_in_reg_alloc(cls, other))
+                                       continue;
+
+                               /* no edges to itself */
+                               if (irn == other) {
+                                       continue;
                                }
+
+                               insert_afe_edge(pbqp_alloc_env, irn, other, i);
                        }
                }
        }
 }
 
-static void create_pbqp_coloring_inst(ir_node *block, void *data) {
-       be_pbqp_alloc_env_t         *pbqp_alloc_env     = data;
-       be_lv_t                     *lv                 = pbqp_alloc_env->lv;
-       const arch_register_class_t *cls                = pbqp_alloc_env->cls;
-       plist_t                                         *rpeo                           = pbqp_alloc_env->rpeo;
-       pbqp                                            *pbqp_inst                      = pbqp_alloc_env->pbqp_inst;
-       unsigned                                        *restr_nodes            = pbqp_alloc_env->restr_nodes;
-       pbqp_matrix                             *ife_matrix_dummy       = pbqp_alloc_env->ife_matrix_dummy;
-       pqueue_t                                        *queue                  = new_pqueue();
-       pqueue_t                                        *restr_nodes_queue      = new_pqueue();
-       plist_t                                         *temp_list              = plist_new();
-       ir_node                     *irn;
+static void create_pbqp_coloring_instance(ir_node *block, void *data)
+{
+       be_pbqp_alloc_env_t         *pbqp_alloc_env     = (be_pbqp_alloc_env_t*)data;
+       be_lv_t                     *lv                 = pbqp_alloc_env->lv;
+       const arch_register_class_t *cls                = pbqp_alloc_env->cls;
+       plist_t                     *rpeo               = pbqp_alloc_env->rpeo;
+       pbqp_t                      *pbqp_inst          = pbqp_alloc_env->pbqp_inst;
+       plist_t                     *temp_list          = plist_new();
+       plist_element_t             *el;
        ir_nodeset_t                 live_nodes;
+#if USE_BIPARTIT_MATCHING
+       int                         *assignment         = ALLOCAN(int, cls->n_regs);
+#else
+       unsigned                    *restr_nodes        = pbqp_alloc_env->restr_nodes;
+       pqueue_t                    *restr_nodes_queue  = new_pqueue();
+       pqueue_t                    *queue              = new_pqueue();
+       plist_t                     *sorted_list        = plist_new();
+       ir_node                     *last_element       = NULL;
+#endif
 
        /* first, determine the pressure */
        /* (this is only for compatibility with copymin optimization, it's not needed for pbqp coloring) */
@@ -245,111 +332,210 @@ static void create_pbqp_coloring_inst(ir_node *block, void *data) {
 
        /* create pbqp nodes, interference edges and reverse perfect elimination order */
        sched_foreach_reverse(block, irn) {
-               ir_node *live, *if_live;
-               ir_nodeset_iterator_t  iter, iter2;
+               be_foreach_value(irn, value,
+                       if (!arch_irn_consider_in_reg_alloc(cls, value))
+                               continue;
 
-               /* create nodes and interference edges */
-               foreach_ir_nodeset(&live_nodes, live, iter) {
                        /* create pbqp source node if it dosn't exist */
-                       if(get_node(pbqp_inst, get_irn_idx(live)) == NULL) {
-                               restr_nodes[get_irn_idx(live)] = create_pbqp_node(pbqp_alloc_env, live);
+                       if (!get_node(pbqp_inst, get_irn_idx(value)))
+                               create_pbqp_node(pbqp_alloc_env, value);
+
+                       /* create nodes and interference edges */
+                       foreach_ir_nodeset(&live_nodes, live, iter) {
+                               /* create pbqp source node if it dosn't exist */
+                               if (!get_node(pbqp_inst, get_irn_idx(live)))
+                                       create_pbqp_node(pbqp_alloc_env, live);
+
+                               /* no edges to itself */
+                               if (value == live)
+                                       continue;
+
+                               insert_ife_edge(pbqp_alloc_env, value, live);
                        }
+               );
 
-                       iter2 = iter;
-                       for(if_live = ir_nodeset_iterator_next(&iter2); if_live != NULL; if_live = ir_nodeset_iterator_next(&iter2)) {
-                               /* create pbqp target node if it dosn't exist */
-                               if(get_node(pbqp_inst, get_irn_idx(if_live)) == NULL) {
-                                       restr_nodes[get_irn_idx(if_live)] = create_pbqp_node(pbqp_alloc_env, if_live);
-                               }
-                               else {
-                                       /* no edges to itself */
-                                       if(live == if_live)
-                                               continue;
-                                       /* only one interference edge between two nodes */
-                                       if(get_edge(pbqp_inst, get_irn_idx(live), get_irn_idx(if_live)))
-                                               continue;
-                               }
+               /* get living nodes for next step */
+               if (!is_Phi(irn)) {
+                       be_liveness_transfer(cls, irn, &live_nodes);
+               }
 
-                               /* do useful optimization to improve pbqp solving (we can do this because we know our matrix) */
-                               if(restr_nodes[get_irn_idx(live)] == 1 && restr_nodes[get_irn_idx(if_live)] == 1) {
-                                       unsigned src_idx = vector_get_min_index(get_node(pbqp_inst, get_irn_idx(live))->costs);
-                                       unsigned trg_idx = vector_get_min_index(get_node(pbqp_inst, get_irn_idx(if_live))->costs);
-                                       assert(src_idx != trg_idx && "Interfering nodes could not have the same register!");
+#if USE_BIPARTIT_MATCHING
+               if (get_irn_mode(irn) == mode_T) {
+                       unsigned     clique_size         = 0;
+                       unsigned     n_alloc             = 0;
+                       pbqp_node   *clique[cls->n_regs];
+                       bipartite_t *bp                  = bipartite_new(cls->n_regs, cls->n_regs);
+
+                       /* add all proj after a perm to clique */
+                       foreach_out_edge(irn, edge) {
+                               ir_node *proj = get_edge_src_irn(edge);
+
+                               /* ignore node if it is not necessary for register allocation */
+                               if (!arch_irn_consider_in_reg_alloc(cls, proj))
                                        continue;
+
+                               /* insert pbqp node into temp rpeo list of this block */
+                               plist_insert_front(temp_list, get_node(pbqp_inst, get_irn_idx(proj)));
+
+                               if(is_Perm_Proj(proj)) {
+                                       /* add proj to clique */
+                                       pbqp_node *clique_member = get_node(pbqp_inst,proj->node_idx);
+                                       vector    *costs         = clique_member->costs;
+                                       unsigned   idx           = 0;
+
+                                       clique[clique_size] = clique_member;
+
+                                       for(idx = 0; idx < costs->len; idx++) {
+                                               if(costs->entries[idx].data != INF_COSTS) {
+                                                       bipartite_add(bp, clique_size, idx);
+                                               }
+                                       }
+
+                                       /* increase node counter */
+                                       clique_size++;
+                                       n_alloc++;
                                }
-                               if(restr_nodes[get_irn_idx(live)] == 1 || restr_nodes[get_irn_idx(if_live)] == 1) {
-                                       if(restr_nodes[get_irn_idx(live)] == 1) {
-                                               unsigned idx = vector_get_min_index(get_node(pbqp_inst, get_irn_idx(live))->costs);
-                                               vector_set(get_node(pbqp_inst, get_irn_idx(if_live))->costs, idx, INF_COSTS);
+                       }
+
+                       if(clique_size > 0) {
+                               plist_element_t *listElement;
+                               foreach_plist(temp_list, listElement) {
+                                       pbqp_node *clique_candidate  = listElement->data;
+                                       unsigned   idx               = 0;
+                                       bool       isMember          = true;
+
+                                       /* clique size not bigger then register class size */
+                                       if(clique_size >= cls->n_regs) break;
+
+                                       for(idx = 0; idx < clique_size; idx++) {
+                                               pbqp_node *member = clique[idx];
+
+                                               if(member == clique_candidate) {
+                                                       isMember = false;
+                                                       break;
+                                               }
+
+                                               if(get_edge(pbqp_inst, member->index, clique_candidate->index) == NULL && get_edge(pbqp_inst, clique_candidate->index, member->index) == NULL) {
+                                                       isMember = false;
+                                                       break;
+                                               }
                                        }
-                                       else {
-                                               unsigned idx = vector_get_min_index(get_node(pbqp_inst, get_irn_idx(if_live))->costs);
-                                               vector_set(get_node(pbqp_inst, get_irn_idx(live))->costs, idx, INF_COSTS);
+
+                                       /* goto next list element if current node is not a member of the clique */
+                                       if(!isMember) { continue; }
+
+                                       /* add candidate to clique */
+                                       clique[clique_size] = clique_candidate;
+
+                                       vector *costs = clique_candidate->costs;
+                                       for(idx = 0; idx < costs->len; idx++) {
+                                               if(costs->entries[idx].data != INF_COSTS) {
+                                                       bipartite_add(bp, clique_size, idx);
+                                               }
+                                       }
+
+                                       /* increase node counter */
+                                       clique_size++;
+                               }
+                       }
+
+                       /* solve bipartite matching */
+                       bipartite_matching(bp, assignment);
+
+                       /* assign colors */
+                       unsigned nodeIdx = 0;
+                       for(nodeIdx = 0; nodeIdx < clique_size; nodeIdx++) {
+                               vector *costs = clique[nodeIdx]->costs;
+                               int     idx;
+                               for(idx = 0; idx < (int)costs->len; idx++) {
+                                       if(assignment[nodeIdx] != idx) {
+                                               costs->entries[idx].data = INF_COSTS;
                                        }
-                                       continue;
                                }
+                               assert(assignment[nodeIdx] >= 0 && "there must have been a register assigned (node not register pressure faithful?)");
+                       }
 
-                               /* copy matrix */
-                               struct pbqp_matrix *matrix = pbqp_matrix_copy(pbqp_inst, ife_matrix_dummy);
-                               /* add costs matrix to interference edge */
-                               add_edge_costs(pbqp_inst, get_irn_idx(live), get_irn_idx(if_live) , matrix);
+                       /* free memory */
+                       bipartite_free(bp);
+               }
+               else {
+                       if (arch_irn_consider_in_reg_alloc(cls, irn)) {
+                               plist_insert_front(temp_list, get_node(pbqp_inst, get_irn_idx(irn)));
                        }
                }
-
+#else
                /* order nodes for perfect elimination order */
                if (get_irn_mode(irn) == mode_T) {
-                       plist_element_t *first = plist_first(temp_list);
-                       const ir_edge_t *edge;
-
+                       bool allHaveIFEdges = true;
                        foreach_out_edge(irn, edge) {
                                ir_node *proj = get_edge_src_irn(edge);
                                if (!arch_irn_consider_in_reg_alloc(cls, proj))
                                        continue;
 
-                               // insert proj node into priority queue (descending by the number of interference edges)
-                               if(restr_nodes[get_irn_idx(proj)] <= 4/*bitset_is_set(restr_nodes, get_irn_idx(proj))*/) {
-                                       pqueue_put(restr_nodes_queue, proj, pbqp_node_get_degree(get_node(pbqp_inst, get_irn_idx(proj))));
+                               /* insert proj node into priority queue (descending by the number of interference edges) */
+                               if (get_free_regs(restr_nodes, cls, proj) <= 4) {
+                                       pqueue_put(restr_nodes_queue, proj, pbqp_alloc_env->ife_edge_num[get_irn_idx(proj)]);
                                }
                                else {
-                                       pqueue_put(queue,proj, pbqp_node_get_degree(get_node(pbqp_inst, get_irn_idx(proj))));
+                                       pqueue_put(queue, proj, pbqp_alloc_env->ife_edge_num[get_irn_idx(proj)]);
                                }
 
+                               /* skip last step if there is no last_element */
+                               if(last_element == NULL)
+                                       continue;
+
+                               /* check if proj has an if edge to last_element (at this time pbqp contains only if edges) */
+                               if(get_edge(pbqp_inst, proj->node_idx, last_element->node_idx) == NULL && get_edge(pbqp_inst, last_element->node_idx, proj->node_idx) == NULL) {
+                                       allHaveIFEdges = false; /* there is no if edge between proj and last_element */
+                               }
                        }
 
-                       /* first insert all restricted nodes */
-                       while(!pqueue_empty(restr_nodes_queue)) {
-                               if(first == NULL) {
-                                       plist_insert_back(temp_list, get_node(pbqp_inst, get_irn_idx(pqueue_pop_front(restr_nodes_queue))));
-                                       first = plist_first(temp_list);
-                               } else {
-                                       plist_insert_before(temp_list, first, get_node(pbqp_inst, get_irn_idx(pqueue_pop_front(restr_nodes_queue))));
+                       if(last_element != NULL && allHaveIFEdges) {
+                               if (get_free_regs(restr_nodes, cls, last_element) <= 4) {
+                                       pqueue_put(restr_nodes_queue, last_element, pbqp_alloc_env->ife_edge_num[get_irn_idx(last_element)]);
+                               }
+                               else {
+                                       pqueue_put(queue, last_element, pbqp_alloc_env->ife_edge_num[get_irn_idx(last_element)]);
                                }
+                               plist_erase(temp_list, plist_find_value(temp_list, get_node(pbqp_inst, last_element->node_idx)));
+                               last_element = NULL;
+                       }
+
+                       /* first insert all restricted proj nodes */
+                       while (!pqueue_empty(restr_nodes_queue)) {
+                               ir_node *node = (ir_node*)pqueue_pop_front(restr_nodes_queue);
+                               plist_insert_front(sorted_list, get_node(pbqp_inst, get_irn_idx(node)));
                        }
 
                        /* insert proj nodes descending by their number of interference edges */
-                       while(!pqueue_empty(queue)) {
-                               if(first == NULL) {
-                                       plist_insert_back(temp_list, get_node(pbqp_inst, get_irn_idx(pqueue_pop_front(queue))));
-                                       first = plist_first(temp_list);
-                               } else {
-                                       plist_insert_before(temp_list, first, get_node(pbqp_inst, get_irn_idx(pqueue_pop_front(queue))));
-                               }
+                       while (!pqueue_empty(queue)) {
+                               ir_node *node = (ir_node*)pqueue_pop_front(queue);
+                               plist_insert_front(sorted_list, get_node(pbqp_inst, get_irn_idx(node)));
+                       }
+
+                       /* invert sorted list */
+                       foreach_plist(sorted_list, el) {
+                               plist_insert_front(temp_list, el->data);
                        }
+
+                       plist_clear(sorted_list);
+
                }
                else {
                        if (arch_irn_consider_in_reg_alloc(cls, irn)) {
+                               // remember last colorable node
+                               last_element = irn;
                                plist_insert_front(temp_list, get_node(pbqp_inst, get_irn_idx(irn)));
                        }
+                       else {
+                               // node not colorable, so ignore it
+                               last_element = NULL;
+                       }
                }
-
-               /* get living nodes for next step */
-               if (!is_Phi(irn)) {
-                       be_liveness_transfer(cls, irn, &live_nodes);
-               }
+#endif
        }
 
-       /* insert nodes into reverse perfect elimination order */
-       plist_element_t *el;
+       /* add the temp rpeo list of this block to the global reverse perfect elimination order list*/
        foreach_plist(temp_list, el) {
                plist_insert_back(rpeo, el->data);
        }
@@ -357,61 +543,53 @@ static void create_pbqp_coloring_inst(ir_node *block, void *data) {
        /* free reserved memory */
        ir_nodeset_destroy(&live_nodes);
        plist_free(temp_list);
+#if USE_BIPARTIT_MATCHING
+#else
+       plist_free(sorted_list);
        del_pqueue(queue);
        del_pqueue(restr_nodes_queue);
+#endif
 }
 
-static void insert_perms(ir_node *block, void *data) {
-       /*
-        * Start silent in the start block.
-        * The silence remains until the first barrier is seen.
-        * Each other block is begun loud.
-        */
-       be_chordal_env_t *env    = data;
+static void insert_perms(ir_node *block, void *data)
+{
+       be_chordal_env_t *env    = (be_chordal_env_t*)data;
        ir_node          *irn;
-       int               silent = block == get_irg_start_block(get_irn_irg(block));
 
-       /*
-        * If the block is the start block search the barrier and
-        * start handling constraints from there.
-        */
        for (irn = sched_first(block); !sched_is_end(irn);) {
-               int silent_old = silent;        /* store old silent value */
-               if (be_is_Barrier(irn))
-                       silent = !silent;               /* toggle silent flag */
-
-               be_insn_t *insn         = chordal_scan_insn(env, irn);
-               irn                                     = insn->next_insn;
+               ir_node   *const next = sched_next(irn);
+               be_insn_t *      insn = be_scan_insn(env, irn);
+               if (insn)
+                       pre_process_constraints(env, &insn);
 
-               if (silent_old)
-                       continue;
-
-               if (!insn->has_constraints)
-                       continue;
-
-               pre_process_constraints(env, &insn);
+               irn = next;
        }
 }
 
+static void be_pbqp_coloring(be_chordal_env_t *env)
+{
+       ir_graph                    *irg            = env->irg;
+       const arch_register_class_t *cls            = env->cls;
+       be_lv_t                     *lv             = NULL;
+       plist_element_t             *element        = NULL;
+       unsigned                     colors_n       = arch_register_class_n_regs(cls);
+       be_pbqp_alloc_env_t          pbqp_alloc_env;
+       unsigned                     col;
+       unsigned                     row;
+       pbqp_matrix_t               *ife_matrix;
+       num                          solution;
+#if KAPS_DUMP
+       FILE                        *file_before;
+#endif
+#if TIMER
+       ir_timer_t *t_ra_pbqp_alloc_create     = ir_timer_new();
+       ir_timer_t *t_ra_pbqp_alloc_solve      = ir_timer_new();
+       ir_timer_t *t_ra_pbqp_alloc_create_aff = ir_timer_new();
 
-void be_pbqp_coloring(be_chordal_env_t *env) {
-       ir_graph                      *irg  = env->irg;
-       be_irg_t                      *birg = env->birg;
-       const arch_register_class_t   *cls  = env->cls;
-       unsigned colors_n                                   = arch_register_class_n_regs(cls);
-       be_pbqp_alloc_env_t pbqp_alloc_env;
-       unsigned idx, row, col;
-       be_lv_t *lv;
-
-//     ir_timer_t *t_ra_pbqp_alloc_create    = ir_timer_register("be_pbqp_alloc_create", "pbqp alloc create");
-//     ir_timer_t *t_ra_pbqp_alloc_solve     = ir_timer_register("be_pbqp_alloc_solve", "pbqp alloc solve");
-//     ir_timer_t *t_ra_pbqp_alloc_create_aff  = ir_timer_register("be_pbqp_alloc_create_aff", "pbqp alloc create aff");
-
-       lv = be_assure_liveness(birg);
-       be_liveness_assure_sets(lv);
-       be_liveness_assure_chk(lv);
-
-//     printf("#### ----- === Allocating registers of %s (%s) ===\n", cls->name, get_entity_name(get_irg_entity(irg)));
+       printf("#### ----- === Allocating registers of %s (%s) ===\n", cls->name, get_entity_name(get_irg_entity(irg)));
+#endif
+       be_assure_live_sets(irg);
+       lv = be_get_irg_liveness(irg);
 
        /* insert perms */
        assure_doms(irg);
@@ -421,104 +599,160 @@ void be_pbqp_coloring(be_chordal_env_t *env) {
        if (env->opts->dump_flags & BE_CH_DUMP_CONSTR) {
                char buf[256];
                snprintf(buf, sizeof(buf), "-%s-constr", cls->name);
-               be_dump(irg, buf, dump_ir_block_graph_sched);
+               dump_ir_graph(irg, buf);
        }
 
+       ir_calculate_execfreq_int_factors(&pbqp_alloc_env.execfreq_factors, irg);
+
        /* initialize pbqp allocation data structure */
-       pbqp_alloc_env.pbqp_inst    = alloc_pbqp(get_irg_last_idx(irg));                /* initialize pbqp instance */
-       pbqp_alloc_env.birg         = birg;
-       pbqp_alloc_env.cls          = cls;
-       pbqp_alloc_env.irg          = irg;
-       pbqp_alloc_env.lv           = lv;
-       pbqp_alloc_env.ignored_regs = bitset_malloc(colors_n);
-       pbqp_alloc_env.rpeo                     = plist_new();
-       pbqp_alloc_env.restr_nodes  = XMALLOCNZ(unsigned, get_irg_last_idx(irg));
-       pbqp_alloc_env.env                      = env;
-       be_put_ignore_regs(birg, cls, pbqp_alloc_env.ignored_regs);                             /* get ignored registers */
-
-       /* create costs matrix for interference edges */
-       struct pbqp_matrix *ife_matrix = pbqp_matrix_alloc(pbqp_alloc_env.pbqp_inst, colors_n, colors_n);
+       pbqp_alloc_env.pbqp_inst        = alloc_pbqp(get_irg_last_idx(irg));  /* initialize pbqp instance */
+       pbqp_alloc_env.cls              = cls;
+       pbqp_alloc_env.irg              = irg;
+       pbqp_alloc_env.lv               = lv;
+       pbqp_alloc_env.allocatable_regs = bitset_malloc(colors_n);
+       pbqp_alloc_env.rpeo             = plist_new();
+       pbqp_alloc_env.restr_nodes      = XMALLOCNZ(unsigned, get_irg_last_idx(irg));
+       pbqp_alloc_env.ife_edge_num     = XMALLOCNZ(unsigned, get_irg_last_idx(irg));
+       pbqp_alloc_env.env              = env;
+       be_put_allocatable_regs(irg, cls, pbqp_alloc_env.allocatable_regs);
+
+
+       /* create costs matrix template for interference edges */
+       ife_matrix = pbqp_matrix_alloc(pbqp_alloc_env.pbqp_inst, colors_n, colors_n);
        /* set costs */
-       for(row = 0, col=0; row < colors_n; row++, col++)
+       for (row = 0, col = 0; row < colors_n; row++, col++)
                pbqp_matrix_set(ife_matrix, row, col, INF_COSTS);
 
-       pbqp_alloc_env.ife_matrix_dummy = ife_matrix;
+       pbqp_alloc_env.ife_matrix_template = ife_matrix;
 
-       /* create costs matrix for affinity edges */
-       struct pbqp_matrix *afe_matrix = pbqp_matrix_alloc(pbqp_alloc_env.pbqp_inst, colors_n, colors_n);
-       /* set costs */
-       for(row = 0; row < colors_n; row++) {
-               for(col = 0; col < colors_n; col++) {
-                       if(row != col)
-                               pbqp_matrix_set(afe_matrix, row, col, 2);
+
+       if (!use_exec_freq) {
+               /* create costs matrix template for affinity edges */
+               pbqp_matrix_t *afe_matrix = pbqp_matrix_alloc(pbqp_alloc_env.pbqp_inst, colors_n, colors_n);
+               /* set costs */
+               for (row = 0; row < colors_n; row++) {
+                       for (col = 0; col < colors_n; col++) {
+                               if (row != col)
+                                       pbqp_matrix_set(afe_matrix, row, col, 2);
+                       }
                }
+               pbqp_alloc_env.aff_matrix_template = afe_matrix;
        }
-       pbqp_alloc_env.aff_matrix_dummy = afe_matrix;
 
 
        /* create pbqp instance */
-//     ir_timer_reset_and_start(t_ra_pbqp_alloc_create);
+#if TIMER
+       ir_timer_reset_and_start(t_ra_pbqp_alloc_create);
+#endif
        assure_doms(irg);
-       dom_tree_walk_irg(irg, create_pbqp_coloring_inst , NULL, &pbqp_alloc_env);
-//     ir_timer_stop(t_ra_pbqp_alloc_create);
+       dom_tree_walk_irg(irg, create_pbqp_coloring_instance , NULL, &pbqp_alloc_env);
+#if TIMER
+       ir_timer_stop(t_ra_pbqp_alloc_create);
+#endif
+
 
        /* set up affinity edges */
-//     ir_timer_reset_and_start(t_ra_pbqp_alloc_create_aff);
-       irg_walk_graph(irg, build_graph_walker, NULL, &pbqp_alloc_env);
-//     ir_timer_stop(t_ra_pbqp_alloc_create_aff);
+#if TIMER
+       ir_timer_reset_and_start(t_ra_pbqp_alloc_create_aff);
+#endif
+       foreach_plist(pbqp_alloc_env.rpeo, element) {
+               pbqp_node_t *node = (pbqp_node_t*)element->data;
+               ir_node     *irn  = get_idx_irn(irg, node->index);
+
+               create_affinity_edges(irn, &pbqp_alloc_env);
+       }
+#if TIMER
+       ir_timer_stop(t_ra_pbqp_alloc_create_aff);
+#endif
+
 
 #if KAPS_DUMP
        // dump graph before solving pbqp
-       FILE *file_before = my_open(env, "", "-pbqp_coloring.html");
+       file_before = my_open(env, "", "-pbqp_coloring.html");
        set_dumpfile(pbqp_alloc_env.pbqp_inst, file_before);
 #endif
 
+       /* print out reverse perfect elimination order */
+#if PRINT_RPEO
+       {
+               plist_element_t *elements;
+               foreach_plist(pbqp_alloc_env.rpeo, elements) {
+                       pbqp_node_t *node = elements->data;
+                       printf(" %d(%ld);", node->index, get_idx_irn(irg, node->index)->node_nr);
+               }
+               printf("\n");
+       }
+#endif
+
        /* solve pbqp instance */
-//     ir_timer_reset_and_start(t_ra_pbqp_alloc_solve);
-       solve_pbqp_heuristical_co(pbqp_alloc_env.pbqp_inst,pbqp_alloc_env.rpeo);
-//     ir_timer_stop(t_ra_pbqp_alloc_solve);
-       num solution = get_solution(pbqp_alloc_env.pbqp_inst);
-       assert(solution != INF_COSTS && "No PBQP solution found");
+#if TIMER
+       ir_timer_reset_and_start(t_ra_pbqp_alloc_solve);
+#endif
+       if(use_late_decision) {
+               solve_pbqp_heuristical_co_ld(pbqp_alloc_env.pbqp_inst,pbqp_alloc_env.rpeo);
+       }
+       else {
+               solve_pbqp_heuristical_co(pbqp_alloc_env.pbqp_inst,pbqp_alloc_env.rpeo);
+       }
+#if TIMER
+       ir_timer_stop(t_ra_pbqp_alloc_solve);
+#endif
 
-       plist_element_t *element;
+
+       solution = get_solution(pbqp_alloc_env.pbqp_inst);
+       if (solution == INF_COSTS)
+               panic("No PBQP solution found");
+
+
+       /* assign colors */
        foreach_plist(pbqp_alloc_env.rpeo, element) {
-               pbqp_node *node                    = element->data;
-               idx                                    = node->index;
-               ir_node *irn               = get_idx_irn(irg, idx);
-               num color                  = get_node_solution(pbqp_alloc_env.pbqp_inst, idx);
-               const arch_register_t *reg = arch_register_for_index(cls, color);
+               pbqp_node_t           *node  = (pbqp_node_t*)element->data;
+               ir_node               *irn   = get_idx_irn(irg, node->index);
+               num                    color = get_node_solution(pbqp_alloc_env.pbqp_inst, node->index);
+               const arch_register_t *reg   = arch_register_for_index(cls, color);
 
                arch_set_irn_register(irn, reg);
        }
 
-//     printf("%-20s: %8.3lf msec\n" , ir_timer_get_description(t_ra_pbqp_alloc_create), (double)ir_timer_elapsed_usec(t_ra_pbqp_alloc_create) / 1000.0);
-//     printf("%-20s: %8.3lf msec\n" , ir_timer_get_description(t_ra_pbqp_alloc_solve), (double)ir_timer_elapsed_usec(t_ra_pbqp_alloc_solve) / 1000.0);
-//     printf("%-20s: %8.3lf msec\n" , ir_timer_get_description(t_ra_pbqp_alloc_create_aff), (double)ir_timer_elapsed_usec(t_ra_pbqp_alloc_create_aff) / 1000.0);
+
+#if TIMER
+       printf("PBQP alloc create:     %10.3lf msec\n",
+              (double)ir_timer_elapsed_usec(t_ra_pbqp_alloc_create) / 1000.0);
+       printf("PBQP alloc solve:      %10.3lf msec\n",
+              (double)ir_timer_elapsed_usec(t_ra_pbqp_alloc_solve) / 1000.0);
+       printf("PBQP alloc create aff: %10.3lf msec\n",
+              (double)ir_timer_elapsed_usec(t_ra_pbqp_alloc_create_aff) / 1000.0);
+#endif
 
 
        /* free reserved memory */
 #if KAPS_DUMP
        fclose(file_before);
 #endif
-       bitset_free(pbqp_alloc_env.ignored_regs);
+       bitset_free(pbqp_alloc_env.allocatable_regs);
        free_pbqp(pbqp_alloc_env.pbqp_inst);
        plist_free(pbqp_alloc_env.rpeo);
        xfree(pbqp_alloc_env.restr_nodes);
+       xfree(pbqp_alloc_env.ife_edge_num);
 }
 
 
 /**
  * Initializes this module.
  */
-void be_init_pbqp_coloring(void) {
+BE_REGISTER_MODULE_CONSTRUCTOR(be_init_pbqp_coloring)
+void be_init_pbqp_coloring(void)
+{
+       lc_opt_entry_t *be_grp       = lc_opt_get_grp(firm_opt_get_root(), "be");
+       lc_opt_entry_t *ra_grp       = lc_opt_get_grp(be_grp, "ra");
+       lc_opt_entry_t *chordal_grp  = lc_opt_get_grp(ra_grp, "chordal");
+       lc_opt_entry_t *coloring_grp = lc_opt_get_grp(chordal_grp, "coloring");
+       lc_opt_entry_t *pbqp_grp     = lc_opt_get_grp(coloring_grp, "pbqp");
 
        static be_ra_chordal_coloring_t coloring = {
                be_pbqp_coloring
        };
 
+       lc_opt_add_table(pbqp_grp, options);
        be_register_chordal_coloring("pbqp", &coloring);
 }
-
-BE_REGISTER_MODULE_CONSTRUCTOR(be_pbqp_alloc);
-
-#endif