ignore last scheduled node on reordering after a perm if it is not colorable
[libfirm] / ir / be / bepbqpcoloring.c
index 28bd6d3..2e1f120 100644 (file)
 #include "vector.h"
 #include "vector_t.h"
 #include "heuristical_co.h"
+#include "heuristical_co_ld.h"
 #include "pbqp_t.h"
 #include "html_dumper.h"
 #include "pbqp_node_t.h"
 #include "pbqp_node.h"
 
-#define TIMER 0
+#define TIMER          1
+#define PRINT_RPEO     1
 
 
-static bool use_exec_freq = true;
+static int use_exec_freq               = true;
+static int use_late_decision   = true;
 
 typedef struct _be_pbqp_alloc_env_t {
        pbqp                                            *pbqp_inst;                     /**< PBQP instance for register allocation */
@@ -101,6 +104,7 @@ static inline int is_2addr_code(const arch_register_req_t *req)
 
 static const lc_opt_table_entry_t options[] = {
        LC_OPT_ENT_BOOL      ("exec_freq", "use exec_freq",  &use_exec_freq),
+       LC_OPT_ENT_BOOL      ("late_decision", "use late decision for register allocation",  &use_late_decision),
        LC_OPT_LAST
 };
 
@@ -122,7 +126,7 @@ static FILE *my_open(const be_chordal_env_t *env, const char *prefix, const char
        ir_snprintf(buf, sizeof(buf), "%s%s_%F_%s%s", prefix, tu_name, env->irg, env->cls->name, suffix);
        xfree(tu_name);
        result = fopen(buf, "wt");
-       if(result == NULL) {
+       if (result == NULL) {
                panic("Couldn't open '%s' for writing.", buf);
        }
 
@@ -144,8 +148,8 @@ static void create_pbqp_node(be_pbqp_alloc_env_t *pbqp_alloc_env, ir_node *irn)
 
        /* set costs depending on register constrains */
        unsigned idx;
-       for(idx = 0; idx < colors_n; idx++) {
-               if(bitset_is_set(ignored_regs, idx) || !arch_reg_out_is_allocatable(irn, arch_register_for_index(cls, idx))) {
+       for (idx = 0; idx < colors_n; idx++) {
+               if (bitset_is_set(ignored_regs, idx) || !arch_reg_out_is_allocatable(irn, arch_register_for_index(cls, idx))) {
                        vector_set(costs_vector, idx, INF_COSTS);
                        cntConstrains++;
                }
@@ -163,21 +167,21 @@ static void insert_ife_edge(be_pbqp_alloc_env_t *pbqp_alloc_env, ir_node *src_no
        pbqp_matrix                             *ife_matrix_template = pbqp_alloc_env->ife_matrix_template;
        unsigned                                        *restr_nodes         = pbqp_alloc_env->restr_nodes;
 
-       if(get_edge(pbqp, get_irn_idx(src_node), get_irn_idx(trg_node)) == NULL) {
+       if (get_edge(pbqp, get_irn_idx(src_node), get_irn_idx(trg_node)) == NULL) {
 
                /* increase ife edge counter */
                pbqp_alloc_env->ife_edge_num[get_irn_idx(src_node)]++;
                pbqp_alloc_env->ife_edge_num[get_irn_idx(trg_node)]++;
 
                /* do useful optimization to speed up pbqp solving (we can do this because we know our matrix) */
-               if(get_free_regs(restr_nodes, cls, src_node) == 1 && get_free_regs(restr_nodes, cls, trg_node) == 1) {
+               if (get_free_regs(restr_nodes, cls, src_node) == 1 && get_free_regs(restr_nodes, cls, trg_node) == 1) {
                        unsigned src_idx = vector_get_min_index(get_node(pbqp, get_irn_idx(src_node))->costs);
                        unsigned trg_idx = vector_get_min_index(get_node(pbqp, get_irn_idx(trg_node))->costs);
                        assert(src_idx != trg_idx && "Interfering nodes could not have the same register!");
                        return;
                }
-               if(get_free_regs(restr_nodes, cls, src_node) == 1 || get_free_regs(restr_nodes, cls, trg_node) == 1) {
-                       if(get_free_regs(restr_nodes, cls, src_node) == 1) {
+               if (get_free_regs(restr_nodes, cls, src_node) == 1 || get_free_regs(restr_nodes, cls, trg_node) == 1) {
+                       if (get_free_regs(restr_nodes, cls, src_node) == 1) {
                                unsigned idx = vector_get_min_index(get_node(pbqp, get_irn_idx(src_node))->costs);
                                vector_set(get_node(pbqp, get_irn_idx(trg_node))->costs, idx, INF_COSTS);
                        }
@@ -201,8 +205,8 @@ static void inser_afe_edge(be_pbqp_alloc_env_t *pbqp_alloc_env, ir_node *src_nod
        pbqp_matrix                                     *afe_matrix       = pbqp_matrix_alloc(pbqp, arch_register_class_n_regs(cls), arch_register_class_n_regs(cls));
        unsigned                                         colors_n                 = arch_register_class_n_regs(cls);
 
-       if(get_edge(pbqp, get_irn_idx(src_node), get_irn_idx(trg_node)) == NULL) {
-               if(use_exec_freq) {
+       if (get_edge(pbqp, get_irn_idx(src_node), get_irn_idx(trg_node)) == NULL) {
+               if (use_exec_freq) {
                        /* get exec_freq for copy_block */
                        ir_node *root_bl = get_nodes_block(src_node);
                        ir_node *copy_bl = is_Phi(src_node) ? get_Block_cfgpred_block(root_bl, pos) : root_bl;
@@ -210,9 +214,9 @@ static void inser_afe_edge(be_pbqp_alloc_env_t *pbqp_alloc_env, ir_node *src_nod
 
                        /* create afe-matrix */
                        unsigned row, col;
-                       for(row = 0; row < colors_n; row++) {
-                               for(col = 0; col < colors_n; col++) {
-                                       if(row != col)
+                       for (row = 0; row < colors_n; row++) {
+                               for (col = 0; col < colors_n; col++) {
+                                       if (row != col)
                                                pbqp_matrix_set(afe_matrix, row, col, (num)res);
                                }
                        }
@@ -222,11 +226,11 @@ static void inser_afe_edge(be_pbqp_alloc_env_t *pbqp_alloc_env, ir_node *src_nod
                }
 
                /* do useful optimization to speed up pbqp solving */
-               if(get_free_regs(restr_nodes, cls, src_node) == 1 && get_free_regs(restr_nodes, cls, trg_node) == 1) {
+               if (get_free_regs(restr_nodes, cls, src_node) == 1 && get_free_regs(restr_nodes, cls, trg_node) == 1) {
                        return;
                }
-               if(get_free_regs(restr_nodes, cls, src_node) == 1 || get_free_regs(restr_nodes, cls, trg_node) == 1) {
-                       if(get_free_regs(restr_nodes, cls, src_node) == 1) {
+               if (get_free_regs(restr_nodes, cls, src_node) == 1 || get_free_regs(restr_nodes, cls, trg_node) == 1) {
+                       if (get_free_regs(restr_nodes, cls, src_node) == 1) {
                                unsigned regIdx = vector_get_min_index(get_node(pbqp, get_irn_idx(src_node))->costs);
                                vector_add_matrix_col(get_node(pbqp, get_irn_idx(trg_node))->costs, afe_matrix, regIdx);
                        }
@@ -257,7 +261,7 @@ static void create_affinity_edges(ir_node *irn, void *env)
                                continue;
 
                        /* no edges to itself */
-                       if(irn == arg) {
+                       if (irn == arg) {
                                continue;
                        }
 
@@ -283,7 +287,7 @@ static void create_affinity_edges(ir_node *irn, void *env)
                                                continue;
 
                                        /* no edges to itself */
-                                       if(irn == other) {
+                                       if (irn == other) {
                                                continue;
                                        }
 
@@ -305,8 +309,11 @@ static void create_pbqp_coloring_instance(ir_node *block, void *data)
        pqueue_t                                        *queue                  = new_pqueue();
        pqueue_t                                        *restr_nodes_queue      = new_pqueue();
        plist_t                                         *temp_list              = plist_new();
+       plist_t                                         *sorted_list            = plist_new();
        ir_node                     *irn;
        ir_nodeset_t                 live_nodes;
+       plist_element_t *el;
+       ir_node *last_element = NULL;
 
        /* first, determine the pressure */
        /* (this is only for compatibility with copymin optimization, it's not needed for pbqp coloring) */
@@ -329,19 +336,19 @@ static void create_pbqp_coloring_instance(ir_node *block, void *data)
                                        continue;
 
                                /* create pbqp source node if it dosn't exist */
-                               if(get_node(pbqp_inst, get_irn_idx(proj)) == NULL) {
+                               if (get_node(pbqp_inst, get_irn_idx(proj)) == NULL) {
                                        create_pbqp_node(pbqp_alloc_env, proj);
                                }
 
                                /* create nodes and interference edges */
                                foreach_ir_nodeset(&live_nodes, live, iter) {
                                        /* create pbqp source node if it dosn't exist */
-                                       if(get_node(pbqp_inst, get_irn_idx(live)) == NULL) {
+                                       if (get_node(pbqp_inst, get_irn_idx(live)) == NULL) {
                                                create_pbqp_node(pbqp_alloc_env, live);
                                        }
 
                                        /* no edges to itself */
-                                       if(proj == live) {
+                                       if (proj == live) {
                                                continue;
                                        }
 
@@ -352,19 +359,19 @@ static void create_pbqp_coloring_instance(ir_node *block, void *data)
                else {
                        if (arch_irn_consider_in_reg_alloc(cls, irn)) {
                                /* create pbqp source node if it dosn't exist */
-                               if(get_node(pbqp_inst, get_irn_idx(irn)) == NULL) {
+                               if (get_node(pbqp_inst, get_irn_idx(irn)) == NULL) {
                                        create_pbqp_node(pbqp_alloc_env, irn);
                                }
 
                                /* create nodes and interference edges */
                                foreach_ir_nodeset(&live_nodes, live, iter) {
                                        /* create pbqp source node if it dosn't exist */
-                                       if(get_node(pbqp_inst, get_irn_idx(live)) == NULL) {
+                                       if (get_node(pbqp_inst, get_irn_idx(live)) == NULL) {
                                                create_pbqp_node(pbqp_alloc_env, live);
                                        }
 
                                        /* no edges to itself */
-                                       if(irn == live) {
+                                       if (irn == live) {
                                                continue;
                                        }
 
@@ -381,52 +388,75 @@ static void create_pbqp_coloring_instance(ir_node *block, void *data)
 
                /* order nodes for perfect elimination order */
                if (get_irn_mode(irn) == mode_T) {
-                       plist_element_t *first = plist_first(temp_list);
-                       const ir_edge_t *edge;
+                       bool allHaveIFEdges = true;
 
+                       const ir_edge_t *edge;
                        foreach_out_edge(irn, edge) {
                                ir_node *proj = get_edge_src_irn(edge);
                                if (!arch_irn_consider_in_reg_alloc(cls, proj))
                                        continue;
 
-                               // insert proj node into priority queue (descending by the number of interference edges)
-                               if(get_free_regs(restr_nodes, cls, proj) <= 4/*bitset_is_set(restr_nodes, get_irn_idx(proj))*/) {
+                               /* insert proj node into priority queue (descending by the number of interference edges) */
+                               if (get_free_regs(restr_nodes, cls, proj) <= 4) {
                                        pqueue_put(restr_nodes_queue, proj, pbqp_alloc_env->ife_edge_num[get_irn_idx(proj)]);
                                }
                                else {
-                                       pqueue_put(queue,proj, pbqp_alloc_env->ife_edge_num[get_irn_idx(proj)]);
+                                       pqueue_put(queue, proj, pbqp_alloc_env->ife_edge_num[get_irn_idx(proj)]);
+                               }
+
+                               /* skip last step if there is no last_element */
+                               if(last_element == NULL)
+                                       continue;
+
+                               /* check if proj has an if edge to last_element (at this time pbqp contains only if edges) */
+                               if(get_edge(pbqp_inst, proj->node_idx, last_element->node_idx) == NULL && get_edge(pbqp_inst, last_element->node_idx, proj->node_idx) == NULL) {
+                                       allHaveIFEdges = false; /* there is no if edge between proj and last_element */
                                }
                        }
 
-                       /* first insert all restricted nodes */
-                       while(!pqueue_empty(restr_nodes_queue)) {
-                               if(first == NULL) {
-                                       plist_insert_back(temp_list, get_node(pbqp_inst, get_irn_idx(pqueue_pop_front(restr_nodes_queue))));
-                                       first = plist_first(temp_list);
-                               } else {
-                                       plist_insert_before(temp_list, first, get_node(pbqp_inst, get_irn_idx(pqueue_pop_front(restr_nodes_queue))));
+                       if(last_element != NULL && allHaveIFEdges) {
+                               if (get_free_regs(restr_nodes, cls, last_element) <= 4) {
+                                       pqueue_put(restr_nodes_queue, last_element, pbqp_alloc_env->ife_edge_num[get_irn_idx(last_element)]);
                                }
+                               else {
+                                       pqueue_put(queue, last_element, pbqp_alloc_env->ife_edge_num[get_irn_idx(last_element)]);
+                               }
+                               plist_erase(temp_list, plist_find_value(temp_list, get_node(pbqp_inst, last_element->node_idx)));
+                               last_element = NULL;
+                       }
+
+                       /* first insert all restricted proj nodes */
+                       while (!pqueue_empty(restr_nodes_queue)) {
+                               plist_insert_front(sorted_list, get_node(pbqp_inst, get_irn_idx(pqueue_pop_front(restr_nodes_queue))));
                        }
 
                        /* insert proj nodes descending by their number of interference edges */
-                       while(!pqueue_empty(queue)) {
-                               if(first == NULL) {
-                                       plist_insert_back(temp_list, get_node(pbqp_inst, get_irn_idx(pqueue_pop_front(queue))));
-                                       first = plist_first(temp_list);
-                               } else {
-                                       plist_insert_before(temp_list, first, get_node(pbqp_inst, get_irn_idx(pqueue_pop_front(queue))));
-                               }
+                       while (!pqueue_empty(queue)) {
+                               plist_insert_front(sorted_list, get_node(pbqp_inst, get_irn_idx(pqueue_pop_front(queue))));
                        }
+
+                       /* invert sorted list */
+                       foreach_plist(sorted_list, el) {
+                               plist_insert_front(temp_list, el->data);
+                       }
+
+                       plist_clear(sorted_list);
+
                }
                else {
                        if (arch_irn_consider_in_reg_alloc(cls, irn)) {
+                               // remember last colorable node
+                               last_element = irn;
                                plist_insert_front(temp_list, get_node(pbqp_inst, get_irn_idx(irn)));
                        }
+                       else {
+                               // node not colorable, so ignore it
+                               last_element = NULL;
+                       }
                }
        }
 
        /* insert nodes into reverse perfect elimination order */
-       plist_element_t *el;
        foreach_plist(temp_list, el) {
                plist_insert_back(rpeo, el->data);
        }
@@ -434,6 +464,7 @@ static void create_pbqp_coloring_instance(ir_node *block, void *data)
        /* free reserved memory */
        ir_nodeset_destroy(&live_nodes);
        plist_free(temp_list);
+       plist_free(sorted_list);
        del_pqueue(queue);
        del_pqueue(restr_nodes_queue);
 }
@@ -471,15 +502,17 @@ static void insert_perms(ir_node *block, void *data)
        }
 }
 
-void be_pbqp_coloring(be_chordal_env_t *env)
+static void be_pbqp_coloring(be_chordal_env_t *env)
 {
-       ir_graph                      *irg  = env->irg;
-       be_irg_t                      *birg = env->birg;
-       const arch_register_class_t   *cls  = env->cls;
-       unsigned colors_n                                   = arch_register_class_n_regs(cls);
-       be_pbqp_alloc_env_t pbqp_alloc_env;
-       unsigned idx, row, col;
-       be_lv_t *lv;
+       ir_graph                        *irg                    = env->irg;
+       be_irg_t                        *birg                   = env->birg;
+       const arch_register_class_t *cls                        = env->cls;
+       be_lv_t                                         *lv                             = NULL;
+       plist_element_t                         *element                = NULL;
+       unsigned                                         colors_n               = arch_register_class_n_regs(cls);
+       be_pbqp_alloc_env_t              pbqp_alloc_env;
+       unsigned                                         row, col;
+
 
 #if TIMER
        ir_timer_t *t_ra_pbqp_alloc_create     = ir_timer_new();
@@ -521,19 +554,19 @@ void be_pbqp_coloring(be_chordal_env_t *env)
        /* create costs matrix template for interference edges */
        struct pbqp_matrix *ife_matrix = pbqp_matrix_alloc(pbqp_alloc_env.pbqp_inst, colors_n, colors_n);
        /* set costs */
-       for(row = 0, col=0; row < colors_n; row++, col++)
+       for (row = 0, col=0; row < colors_n; row++, col++)
                pbqp_matrix_set(ife_matrix, row, col, INF_COSTS);
 
        pbqp_alloc_env.ife_matrix_template = ife_matrix;
 
 
-       if(!use_exec_freq) {
+       if (!use_exec_freq) {
                /* create costs matrix template for affinity edges */
                struct pbqp_matrix *afe_matrix = pbqp_matrix_alloc(pbqp_alloc_env.pbqp_inst, colors_n, colors_n);
                /* set costs */
-               for(row = 0; row < colors_n; row++) {
-                       for(col = 0; col < colors_n; col++) {
-                               if(row != col)
+               for (row = 0; row < colors_n; row++) {
+                       for (col = 0; col < colors_n; col++) {
+                               if (row != col)
                                        pbqp_matrix_set(afe_matrix, row, col, 2);
                        }
                }
@@ -556,11 +589,10 @@ void be_pbqp_coloring(be_chordal_env_t *env)
 #if TIMER
        ir_timer_reset_and_start(t_ra_pbqp_alloc_create_aff);
 #endif
-       plist_element_t *el;
-       foreach_plist(pbqp_alloc_env.rpeo, el) {
-               pbqp_node *node                    = el->data;
-               idx                                    = node->index;
-               ir_node *irn               = get_idx_irn(irg, idx);
+       foreach_plist(pbqp_alloc_env.rpeo, element) {
+               pbqp_node       *node   = element->data;
+               ir_node         *irn    = get_idx_irn(irg, node->index);
+
                create_affinity_edges(irn, &pbqp_alloc_env);
        }
 #if TIMER
@@ -574,12 +606,27 @@ void be_pbqp_coloring(be_chordal_env_t *env)
        set_dumpfile(pbqp_alloc_env.pbqp_inst, file_before);
 #endif
 
+       /* print out reverse perfect eleminiation order */
+#if PRINT_RPEO
+       plist_element_t *elements;
+       foreach_plist(pbqp_alloc_env.rpeo, elements) {
+               pbqp_node *node                    = elements->data;
+               printf(" %d(%lu);", node->index, get_idx_irn(irg, node->index)->node_nr);
+       }
+       printf("\n");
+#endif
+
 
        /* solve pbqp instance */
 #if TIMER
        ir_timer_reset_and_start(t_ra_pbqp_alloc_solve);
 #endif
-       solve_pbqp_heuristical_co(pbqp_alloc_env.pbqp_inst,pbqp_alloc_env.rpeo);
+       if(use_late_decision) {
+               solve_pbqp_heuristical_co_ld(pbqp_alloc_env.pbqp_inst,pbqp_alloc_env.rpeo);
+       }
+       else {
+               solve_pbqp_heuristical_co(pbqp_alloc_env.pbqp_inst,pbqp_alloc_env.rpeo);
+       }
 #if TIMER
        ir_timer_stop(t_ra_pbqp_alloc_solve);
 #endif
@@ -588,13 +635,11 @@ void be_pbqp_coloring(be_chordal_env_t *env)
 
 
        /* assign colors */
-       plist_element_t *element;
        foreach_plist(pbqp_alloc_env.rpeo, element) {
-               pbqp_node *node                    = element->data;
-               idx                                    = node->index;
-               ir_node *irn               = get_idx_irn(irg, idx);
-               num color                  = get_node_solution(pbqp_alloc_env.pbqp_inst, idx);
-               const arch_register_t *reg = arch_register_for_index(cls, color);
+               pbqp_node                               *node   = element->data;
+               ir_node                                 *irn    = get_idx_irn(irg, node->index);
+               num                                      color  = get_node_solution(pbqp_alloc_env.pbqp_inst, node->index);
+               const arch_register_t   *reg    = arch_register_for_index(cls, color);
 
                arch_set_irn_register(irn, reg);
        }
@@ -625,6 +670,7 @@ void be_pbqp_coloring(be_chordal_env_t *env)
 /**
  * Initializes this module.
  */
+BE_REGISTER_MODULE_CONSTRUCTOR(be_init_pbqp_coloring);
 void be_init_pbqp_coloring(void)
 {
        lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be");
@@ -641,6 +687,4 @@ void be_init_pbqp_coloring(void)
        be_register_chordal_coloring("pbqp", &coloring);
 }
 
-BE_REGISTER_MODULE_CONSTRUCTOR(be_pbqp_alloc);
-
 #endif