X-Git-Url: http://nsz.repo.hu/git/?a=blobdiff_plain;f=ir%2Fbe%2Fbepbqpcoloring.c;h=f9de569e8a10d3c23189449d05acff5265266151;hb=12aaa3fc53618991634823a6738f50360a54633b;hp=606aa257ae793023dc3d848a0f89a73fec91d5df;hpb=69a5cd48b51a38cf6abc517bca2feef9f3a6c433;p=libfirm diff --git a/ir/be/bepbqpcoloring.c b/ir/be/bepbqpcoloring.c index 606aa257a..f9de569e8 100644 --- a/ir/be/bepbqpcoloring.c +++ b/ir/be/bepbqpcoloring.c @@ -22,22 +22,23 @@ * @brief PBQP based register allocation. * @author Thomas Bersch * @date 27.11.2009 - * @version $Id: bechordal.c 26750 2009-11-27 09:37:43Z bersch $ */ -/* miscellaneous includes */ +/* miscellaneous includes */ #include "config.h" -#ifdef FIRM_KAPS - #include "debug.h" #include "error.h" #include "irdom.h" +#include "irdump.h" #include "iredges_t.h" #include "irprintf.h" #include "irgwalk.h" +#include "irtools.h" #include "time.h" +#include "execfreq_t.h" +#include "bipartite.h" /* libfirm/ir/be includes */ #include "bearch.h" @@ -62,59 +63,63 @@ #include "vector.h" #include "vector_t.h" #include "heuristical_co.h" +#include "heuristical_co_ld.h" #include "pbqp_t.h" #include "html_dumper.h" #include "pbqp_node_t.h" #include "pbqp_node.h" +#include "pbqp_edge_t.h" -#define TIMER 0 +#define TIMER 0 +#define PRINT_RPEO 0 +#define USE_BIPARTIT_MATCHING 0 +#define DO_USEFUL_OPT 1 -static bool use_exec_freq = true; +static int use_exec_freq = true; +static int use_late_decision = false; -typedef struct _be_pbqp_alloc_env_t { - pbqp *pbqp_inst; /**< PBQP instance for register allocation */ - be_irg_t *birg; /**< Back-end IRG session. */ - ir_graph *irg; /**< The graph under examination. */ - const arch_register_class_t *cls; /**< Current processed register class */ +typedef struct be_pbqp_alloc_env_t { + pbqp_t *pbqp_inst; /**< PBQP instance for register allocation */ + ir_graph *irg; /**< The graph under examination. */ + const arch_register_class_t *cls; /**< Current processed register class */ be_lv_t *lv; - bitset_t *ignored_regs; - pbqp_matrix *ife_matrix_template; - pbqp_matrix *aff_matrix_template; - plist_t *rpeo; - unsigned *restr_nodes; - unsigned *ife_edge_num; - be_chordal_env_t *env; + bitset_t *allocatable_regs; + pbqp_matrix_t *ife_matrix_template; + pbqp_matrix_t *aff_matrix_template; + plist_t *rpeo; + unsigned *restr_nodes; + unsigned *ife_edge_num; + ir_execfreq_int_factors execfreq_factors; + be_chordal_env_t *env; } be_pbqp_alloc_env_t; -#define is_Reg_Phi(irn) (is_Phi(irn) && mode_is_data(get_irn_mode(irn))) -#define get_Perm_src(irn) (get_irn_n(get_Proj_pred(irn), get_Proj_proj(irn))) -#define is_Perm_Proj(irn) (is_Proj(irn) && be_is_Perm(get_Proj_pred(irn))) -#define insert_edge(pbqp, src_node, trg_node, template_matrix) (add_edge_costs(pbqp, get_irn_idx(src_node), get_irn_idx(trg_node), pbqp_matrix_copy(pbqp, template_matrix))) -#define get_free_regs(restr_nodes, cls, irn) (arch_register_class_n_regs(cls) - restr_nodes[get_irn_idx(irn)]) - -static inline int is_2addr_code(const arch_register_req_t *req) -{ - return (req->type & arch_register_req_type_should_be_same) != 0; -} +#define is_Reg_Phi(irn) (is_Phi(irn) && mode_is_data(get_irn_mode(irn))) +#define get_Perm_src(irn) (get_irn_n(get_Proj_pred(irn), get_Proj_proj(irn))) +#define is_Perm_Proj(irn) (is_Proj(irn) && be_is_Perm(get_Proj_pred(irn))) +#define insert_edge(pbqp, src_node, trg_node, template_matrix) (add_edge_costs(pbqp, get_irn_idx(src_node), get_irn_idx(trg_node), pbqp_matrix_copy(pbqp, template_matrix))) +#define get_free_regs(restr_nodes, cls, irn) (arch_register_class_n_regs(cls) - restr_nodes[get_irn_idx(irn)]) static const lc_opt_table_entry_t options[] = { - LC_OPT_ENT_BOOL ("exec_freq", "use exec_freq", &use_exec_freq), + LC_OPT_ENT_BOOL("exec_freq", "use exec_freq", &use_exec_freq), + LC_OPT_ENT_BOOL("late_decision", "use late decision for register allocation", &use_late_decision), LC_OPT_LAST }; #if KAPS_DUMP static FILE *my_open(const be_chordal_env_t *env, const char *prefix, const char *suffix) { - FILE *result; - char buf[1024]; - size_t i, n; - char *tu_name; - - n = strlen(env->birg->main_env->cup_name); + FILE *result; + char buf[1024]; + size_t i; + size_t n; + char *tu_name; + const char *cup_name = be_get_irg_main_env(env->irg)->cup_name; + + n = strlen(cup_name); tu_name = XMALLOCN(char, n + 1); - strcpy(tu_name, env->birg->main_env->cup_name); + strcpy(tu_name, cup_name); for (i = 0; i < n; ++i) if (tu_name[i] == '.') tu_name[i] = '_'; @@ -122,7 +127,7 @@ static FILE *my_open(const be_chordal_env_t *env, const char *prefix, const char ir_snprintf(buf, sizeof(buf), "%s%s_%F_%s%s", prefix, tu_name, env->irg, env->cls->name, suffix); xfree(tu_name); result = fopen(buf, "wt"); - if(result == NULL) { + if (result == NULL) { panic("Couldn't open '%s' for writing.", buf); } @@ -131,20 +136,25 @@ static FILE *my_open(const be_chordal_env_t *env, const char *prefix, const char #endif -static void create_pbqp_node(be_pbqp_alloc_env_t *pbqp_alloc_env, ir_node *irn) { +static void create_pbqp_node(be_pbqp_alloc_env_t *pbqp_alloc_env, ir_node *irn) +{ const arch_register_class_t *cls = pbqp_alloc_env->cls; - pbqp *pbqp_inst = pbqp_alloc_env->pbqp_inst; - bitset_t *ignored_regs = pbqp_alloc_env->ignored_regs; + pbqp_t *pbqp_inst = pbqp_alloc_env->pbqp_inst; + bitset_t *allocatable_regs = pbqp_alloc_env->allocatable_regs; unsigned colors_n = arch_register_class_n_regs(cls); unsigned cntConstrains = 0; /* create costs vector depending on register constrains */ - struct vector *costs_vector = vector_alloc(pbqp_inst, colors_n); + vector_t *costs_vector = vector_alloc(pbqp_inst, colors_n); /* set costs depending on register constrains */ unsigned idx; - for(idx = 0; idx < colors_n; idx++) { - if(bitset_is_set(ignored_regs, idx) || !arch_reg_out_is_allocatable(irn, arch_register_for_index(cls, idx))) { + for (idx = 0; idx < colors_n; idx++) { + const arch_register_req_t *req = arch_get_irn_register_req(irn); + const arch_register_t *reg = arch_register_for_index(cls, idx); + if (!bitset_is_set(allocatable_regs, idx) + || !arch_reg_is_allocatable(req, reg)) { + /* constrained */ vector_set(costs_vector, idx, INF_COSTS); cntConstrains++; } @@ -157,26 +167,27 @@ static void create_pbqp_node(be_pbqp_alloc_env_t *pbqp_alloc_env, ir_node *irn) static void insert_ife_edge(be_pbqp_alloc_env_t *pbqp_alloc_env, ir_node *src_node, ir_node *trg_node) { - pbqp *pbqp = pbqp_alloc_env->pbqp_inst; + pbqp_t *pbqp = pbqp_alloc_env->pbqp_inst; const arch_register_class_t *cls = pbqp_alloc_env->cls; - pbqp_matrix *ife_matrix_template = pbqp_alloc_env->ife_matrix_template; - unsigned *restr_nodes = pbqp_alloc_env->restr_nodes; + pbqp_matrix_t *ife_matrix_template = pbqp_alloc_env->ife_matrix_template; + unsigned *restr_nodes = pbqp_alloc_env->restr_nodes; - if(get_edge(pbqp, get_irn_idx(src_node), get_irn_idx(trg_node)) == NULL) { + if (get_edge(pbqp, get_irn_idx(src_node), get_irn_idx(trg_node)) == NULL) { /* increase ife edge counter */ pbqp_alloc_env->ife_edge_num[get_irn_idx(src_node)]++; pbqp_alloc_env->ife_edge_num[get_irn_idx(trg_node)]++; +#if DO_USEFUL_OPT || USE_BIPARTIT_MATCHING /* do useful optimization to speed up pbqp solving (we can do this because we know our matrix) */ - if(get_free_regs(restr_nodes, cls, src_node) == 1 && get_free_regs(restr_nodes, cls, trg_node) == 1) { - unsigned src_idx = vector_get_min_index(get_node(pbqp, get_irn_idx(src_node))->costs); - unsigned trg_idx = vector_get_min_index(get_node(pbqp, get_irn_idx(trg_node))->costs); - assert(src_idx != trg_idx && "Interfering nodes could not have the same register!"); + if (get_free_regs(restr_nodes, cls, src_node) == 1 && get_free_regs(restr_nodes, cls, trg_node) == 1) { + assert(vector_get_min_index(get_node(pbqp, get_irn_idx(src_node))->costs) != + vector_get_min_index(get_node(pbqp, get_irn_idx(trg_node))->costs) && + "Interfering nodes must not have the same register!"); return; } - if(get_free_regs(restr_nodes, cls, src_node) == 1 || get_free_regs(restr_nodes, cls, trg_node) == 1) { - if(get_free_regs(restr_nodes, cls, src_node) == 1) { + if (get_free_regs(restr_nodes, cls, src_node) == 1 || get_free_regs(restr_nodes, cls, trg_node) == 1) { + if (get_free_regs(restr_nodes, cls, src_node) == 1) { unsigned idx = vector_get_min_index(get_node(pbqp, get_irn_idx(src_node))->costs); vector_set(get_node(pbqp, get_irn_idx(trg_node))->costs, idx, INF_COSTS); } @@ -186,32 +197,32 @@ static void insert_ife_edge(be_pbqp_alloc_env_t *pbqp_alloc_env, ir_node *src_no } return; } - +#endif /* insert interference edge */ insert_edge(pbqp, src_node, trg_node, ife_matrix_template); } } -static void inser_afe_edge(be_pbqp_alloc_env_t *pbqp_alloc_env, ir_node *src_node, ir_node *trg_node, int pos) +static void insert_afe_edge(be_pbqp_alloc_env_t *pbqp_alloc_env, ir_node *src_node, ir_node *trg_node, int pos) { - pbqp *pbqp = pbqp_alloc_env->pbqp_inst; - const arch_register_class_t *cls = pbqp_alloc_env->cls; - unsigned *restr_nodes = pbqp_alloc_env->restr_nodes; - pbqp_matrix *afe_matrix = pbqp_matrix_alloc(pbqp, arch_register_class_n_regs(cls), arch_register_class_n_regs(cls)); - unsigned colors_n = arch_register_class_n_regs(cls); - - if(get_edge(pbqp, get_irn_idx(src_node), get_irn_idx(trg_node)) == NULL) { - if(use_exec_freq) { + pbqp_t *pbqp = pbqp_alloc_env->pbqp_inst; + const arch_register_class_t *cls = pbqp_alloc_env->cls; + unsigned *restr_nodes = pbqp_alloc_env->restr_nodes; + pbqp_matrix_t *afe_matrix = pbqp_matrix_alloc(pbqp, arch_register_class_n_regs(cls), arch_register_class_n_regs(cls)); + unsigned colors_n = arch_register_class_n_regs(cls); + + if (get_edge(pbqp, get_irn_idx(src_node), get_irn_idx(trg_node)) == NULL) { + if (use_exec_freq) { /* get exec_freq for copy_block */ ir_node *root_bl = get_nodes_block(src_node); ir_node *copy_bl = is_Phi(src_node) ? get_Block_cfgpred_block(root_bl, pos) : root_bl; - unsigned long res = get_block_execfreq_ulong(pbqp_alloc_env->birg->exec_freq, copy_bl); + int res = get_block_execfreq_int(&pbqp_alloc_env->execfreq_factors, copy_bl); /* create afe-matrix */ unsigned row, col; - for(row = 0; row < colors_n; row++) { - for(col = 0; col < colors_n; col++) { - if(row != col) + for (row = 0; row < colors_n; row++) { + for (col = 0; col < colors_n; col++) { + if (row != col) pbqp_matrix_set(afe_matrix, row, col, (num)res); } } @@ -219,13 +230,13 @@ static void inser_afe_edge(be_pbqp_alloc_env_t *pbqp_alloc_env, ir_node *src_nod else { afe_matrix = pbqp_alloc_env->aff_matrix_template; } - +#if DO_USEFUL_OPT || USE_BIPARTIT_MATCHING /* do useful optimization to speed up pbqp solving */ - if(get_free_regs(restr_nodes, cls, src_node) == 1 && get_free_regs(restr_nodes, cls, trg_node) == 1) { + if (get_free_regs(restr_nodes, cls, src_node) == 1 && get_free_regs(restr_nodes, cls, trg_node) == 1) { return; } - if(get_free_regs(restr_nodes, cls, src_node) == 1 || get_free_regs(restr_nodes, cls, trg_node) == 1) { - if(get_free_regs(restr_nodes, cls, src_node) == 1) { + if (get_free_regs(restr_nodes, cls, src_node) == 1 || get_free_regs(restr_nodes, cls, trg_node) == 1) { + if (get_free_regs(restr_nodes, cls, src_node) == 1) { unsigned regIdx = vector_get_min_index(get_node(pbqp, get_irn_idx(src_node))->costs); vector_add_matrix_col(get_node(pbqp, get_irn_idx(trg_node))->costs, afe_matrix, regIdx); } @@ -235,7 +246,7 @@ static void inser_afe_edge(be_pbqp_alloc_env_t *pbqp_alloc_env, ir_node *src_nod } return; } - +#endif /* insert interference edge */ insert_edge(pbqp, src_node, trg_node, afe_matrix); } @@ -243,24 +254,25 @@ static void inser_afe_edge(be_pbqp_alloc_env_t *pbqp_alloc_env, ir_node *src_nod static void create_affinity_edges(ir_node *irn, void *env) { - be_pbqp_alloc_env_t *pbqp_alloc_env = env; - const arch_register_class_t *cls = pbqp_alloc_env->cls; - const arch_register_req_t *req = arch_get_register_req_out(irn); - unsigned pos, max; + be_pbqp_alloc_env_t *pbqp_alloc_env = (be_pbqp_alloc_env_t*)env; + const arch_register_class_t *cls = pbqp_alloc_env->cls; + const arch_register_req_t *req = arch_get_irn_register_req(irn); + unsigned pos; + unsigned max; if (is_Reg_Phi(irn)) { /* Phis */ - for (pos=0, max=get_irn_arity(irn); posother_same; - int i; - - for (i = 0; 1U << i <= other; ++i) { - if (other & (1U << i)) { - ir_node *other = get_irn_n(skip_Proj(irn), i); - if (!arch_irn_consider_in_reg_alloc(cls, other)) - continue; - - /* no edges to itself */ - if(irn == other) { - continue; - } + insert_afe_edge(pbqp_alloc_env, irn, arg, -1); + } else if (arch_register_req_is(req, should_be_same)) { + const unsigned other = req->other_same; + int i; - inser_afe_edge(pbqp_alloc_env, irn, other, i); + for (i = 0; 1U << i <= other; ++i) { + if (other & (1U << i)) { + ir_node *other = get_irn_n(skip_Proj(irn), i); + if (!arch_irn_consider_in_reg_alloc(cls, other)) + continue; + + /* no edges to itself */ + if (irn == other) { + continue; } + + insert_afe_edge(pbqp_alloc_env, irn, other, i); } } } @@ -295,17 +304,23 @@ static void create_affinity_edges(ir_node *irn, void *env) static void create_pbqp_coloring_instance(ir_node *block, void *data) { - be_pbqp_alloc_env_t *pbqp_alloc_env = data; - be_lv_t *lv = pbqp_alloc_env->lv; - const arch_register_class_t *cls = pbqp_alloc_env->cls; - plist_t *rpeo = pbqp_alloc_env->rpeo; - pbqp *pbqp_inst = pbqp_alloc_env->pbqp_inst; - unsigned *restr_nodes = pbqp_alloc_env->restr_nodes; - pqueue_t *queue = new_pqueue(); - pqueue_t *restr_nodes_queue = new_pqueue(); - plist_t *temp_list = plist_new(); - ir_node *irn; + be_pbqp_alloc_env_t *pbqp_alloc_env = (be_pbqp_alloc_env_t*)data; + be_lv_t *lv = pbqp_alloc_env->lv; + const arch_register_class_t *cls = pbqp_alloc_env->cls; + plist_t *rpeo = pbqp_alloc_env->rpeo; + pbqp_t *pbqp_inst = pbqp_alloc_env->pbqp_inst; + plist_t *temp_list = plist_new(); + plist_element_t *el; ir_nodeset_t live_nodes; +#if USE_BIPARTIT_MATCHING + int *assignment = ALLOCAN(int, cls->n_regs); +#else + unsigned *restr_nodes = pbqp_alloc_env->restr_nodes; + pqueue_t *restr_nodes_queue = new_pqueue(); + pqueue_t *queue = new_pqueue(); + plist_t *sorted_list = plist_new(); + ir_node *last_element = NULL; +#endif /* first, determine the pressure */ /* (this is only for compatibility with copymin optimization, it's not needed for pbqp coloring) */ @@ -317,115 +332,210 @@ static void create_pbqp_coloring_instance(ir_node *block, void *data) /* create pbqp nodes, interference edges and reverse perfect elimination order */ sched_foreach_reverse(block, irn) { - ir_node *live; - ir_nodeset_iterator_t iter; + be_foreach_value(irn, value, + if (!arch_irn_consider_in_reg_alloc(cls, value)) + continue; + /* create pbqp source node if it dosn't exist */ + if (!get_node(pbqp_inst, get_irn_idx(value))) + create_pbqp_node(pbqp_alloc_env, value); + + /* create nodes and interference edges */ + foreach_ir_nodeset(&live_nodes, live, iter) { + /* create pbqp source node if it dosn't exist */ + if (!get_node(pbqp_inst, get_irn_idx(live))) + create_pbqp_node(pbqp_alloc_env, live); + + /* no edges to itself */ + if (value == live) + continue; + + insert_ife_edge(pbqp_alloc_env, value, live); + } + ); + + /* get living nodes for next step */ + if (!is_Phi(irn)) { + be_liveness_transfer(cls, irn, &live_nodes); + } + +#if USE_BIPARTIT_MATCHING if (get_irn_mode(irn) == mode_T) { - const ir_edge_t *edge; + unsigned clique_size = 0; + unsigned n_alloc = 0; + pbqp_node *clique[cls->n_regs]; + bipartite_t *bp = bipartite_new(cls->n_regs, cls->n_regs); + + /* add all proj after a perm to clique */ foreach_out_edge(irn, edge) { ir_node *proj = get_edge_src_irn(edge); + + /* ignore node if it is not necessary for register allocation */ if (!arch_irn_consider_in_reg_alloc(cls, proj)) continue; - /* create pbqp source node if it dosn't exist */ - if(get_node(pbqp_inst, get_irn_idx(proj)) == NULL) { - create_pbqp_node(pbqp_alloc_env, proj); - } + /* insert pbqp node into temp rpeo list of this block */ + plist_insert_front(temp_list, get_node(pbqp_inst, get_irn_idx(proj))); - /* create nodes and interference edges */ - foreach_ir_nodeset(&live_nodes, live, iter) { - /* create pbqp source node if it dosn't exist */ - if(get_node(pbqp_inst, get_irn_idx(live)) == NULL) { - create_pbqp_node(pbqp_alloc_env, live); - } + if(is_Perm_Proj(proj)) { + /* add proj to clique */ + pbqp_node *clique_member = get_node(pbqp_inst,proj->node_idx); + vector *costs = clique_member->costs; + unsigned idx = 0; + + clique[clique_size] = clique_member; - /* no edges to itself */ - if(proj == live) { - continue; + for(idx = 0; idx < costs->len; idx++) { + if(costs->entries[idx].data != INF_COSTS) { + bipartite_add(bp, clique_size, idx); + } } - insert_ife_edge(pbqp_alloc_env, proj, live); + /* increase node counter */ + clique_size++; + n_alloc++; } } - } - else { - if (arch_irn_consider_in_reg_alloc(cls, irn)) { - /* create pbqp source node if it dosn't exist */ - if(get_node(pbqp_inst, get_irn_idx(irn)) == NULL) { - create_pbqp_node(pbqp_alloc_env, irn); - } - /* create nodes and interference edges */ - foreach_ir_nodeset(&live_nodes, live, iter) { - /* create pbqp source node if it dosn't exist */ - if(get_node(pbqp_inst, get_irn_idx(live)) == NULL) { - create_pbqp_node(pbqp_alloc_env, live); + if(clique_size > 0) { + plist_element_t *listElement; + foreach_plist(temp_list, listElement) { + pbqp_node *clique_candidate = listElement->data; + unsigned idx = 0; + bool isMember = true; + + /* clique size not bigger then register class size */ + if(clique_size >= cls->n_regs) break; + + for(idx = 0; idx < clique_size; idx++) { + pbqp_node *member = clique[idx]; + + if(member == clique_candidate) { + isMember = false; + break; + } + + if(get_edge(pbqp_inst, member->index, clique_candidate->index) == NULL && get_edge(pbqp_inst, clique_candidate->index, member->index) == NULL) { + isMember = false; + break; + } } - /* no edges to itself */ - if(irn == live) { - continue; + /* goto next list element if current node is not a member of the clique */ + if(!isMember) { continue; } + + /* add candidate to clique */ + clique[clique_size] = clique_candidate; + + vector *costs = clique_candidate->costs; + for(idx = 0; idx < costs->len; idx++) { + if(costs->entries[idx].data != INF_COSTS) { + bipartite_add(bp, clique_size, idx); + } } - /* insert interference edge */ - insert_ife_edge(pbqp_alloc_env, irn, live); + /* increase node counter */ + clique_size++; } } - } - /* get living nodes for next step */ - if (!is_Phi(irn)) { - be_liveness_transfer(cls, irn, &live_nodes); - } + /* solve bipartite matching */ + bipartite_matching(bp, assignment); + + /* assign colors */ + unsigned nodeIdx = 0; + for(nodeIdx = 0; nodeIdx < clique_size; nodeIdx++) { + vector *costs = clique[nodeIdx]->costs; + int idx; + for(idx = 0; idx < (int)costs->len; idx++) { + if(assignment[nodeIdx] != idx) { + costs->entries[idx].data = INF_COSTS; + } + } + assert(assignment[nodeIdx] >= 0 && "there must have been a register assigned (node not register pressure faithful?)"); + } + /* free memory */ + bipartite_free(bp); + } + else { + if (arch_irn_consider_in_reg_alloc(cls, irn)) { + plist_insert_front(temp_list, get_node(pbqp_inst, get_irn_idx(irn))); + } + } +#else /* order nodes for perfect elimination order */ if (get_irn_mode(irn) == mode_T) { - plist_element_t *first = plist_first(temp_list); - const ir_edge_t *edge; - + bool allHaveIFEdges = true; foreach_out_edge(irn, edge) { ir_node *proj = get_edge_src_irn(edge); if (!arch_irn_consider_in_reg_alloc(cls, proj)) continue; - // insert proj node into priority queue (descending by the number of interference edges) - if(get_free_regs(restr_nodes, cls, proj) <= 4/*bitset_is_set(restr_nodes, get_irn_idx(proj))*/) { + /* insert proj node into priority queue (descending by the number of interference edges) */ + if (get_free_regs(restr_nodes, cls, proj) <= 4) { pqueue_put(restr_nodes_queue, proj, pbqp_alloc_env->ife_edge_num[get_irn_idx(proj)]); } else { - pqueue_put(queue,proj, pbqp_alloc_env->ife_edge_num[get_irn_idx(proj)]); + pqueue_put(queue, proj, pbqp_alloc_env->ife_edge_num[get_irn_idx(proj)]); + } + + /* skip last step if there is no last_element */ + if(last_element == NULL) + continue; + + /* check if proj has an if edge to last_element (at this time pbqp contains only if edges) */ + if(get_edge(pbqp_inst, proj->node_idx, last_element->node_idx) == NULL && get_edge(pbqp_inst, last_element->node_idx, proj->node_idx) == NULL) { + allHaveIFEdges = false; /* there is no if edge between proj and last_element */ } } - /* first insert all restricted nodes */ - while(!pqueue_empty(restr_nodes_queue)) { - if(first == NULL) { - plist_insert_back(temp_list, get_node(pbqp_inst, get_irn_idx(pqueue_pop_front(restr_nodes_queue)))); - first = plist_first(temp_list); - } else { - plist_insert_before(temp_list, first, get_node(pbqp_inst, get_irn_idx(pqueue_pop_front(restr_nodes_queue)))); + if(last_element != NULL && allHaveIFEdges) { + if (get_free_regs(restr_nodes, cls, last_element) <= 4) { + pqueue_put(restr_nodes_queue, last_element, pbqp_alloc_env->ife_edge_num[get_irn_idx(last_element)]); + } + else { + pqueue_put(queue, last_element, pbqp_alloc_env->ife_edge_num[get_irn_idx(last_element)]); } + plist_erase(temp_list, plist_find_value(temp_list, get_node(pbqp_inst, last_element->node_idx))); + last_element = NULL; + } + + /* first insert all restricted proj nodes */ + while (!pqueue_empty(restr_nodes_queue)) { + ir_node *node = (ir_node*)pqueue_pop_front(restr_nodes_queue); + plist_insert_front(sorted_list, get_node(pbqp_inst, get_irn_idx(node))); } /* insert proj nodes descending by their number of interference edges */ - while(!pqueue_empty(queue)) { - if(first == NULL) { - plist_insert_back(temp_list, get_node(pbqp_inst, get_irn_idx(pqueue_pop_front(queue)))); - first = plist_first(temp_list); - } else { - plist_insert_before(temp_list, first, get_node(pbqp_inst, get_irn_idx(pqueue_pop_front(queue)))); - } + while (!pqueue_empty(queue)) { + ir_node *node = (ir_node*)pqueue_pop_front(queue); + plist_insert_front(sorted_list, get_node(pbqp_inst, get_irn_idx(node))); + } + + /* invert sorted list */ + foreach_plist(sorted_list, el) { + plist_insert_front(temp_list, el->data); } + + plist_clear(sorted_list); + } else { if (arch_irn_consider_in_reg_alloc(cls, irn)) { + // remember last colorable node + last_element = irn; plist_insert_front(temp_list, get_node(pbqp_inst, get_irn_idx(irn))); } + else { + // node not colorable, so ignore it + last_element = NULL; + } } +#endif } - /* insert nodes into reverse perfect elimination order */ - plist_element_t *el; + /* add the temp rpeo list of this block to the global reverse perfect elimination order list*/ foreach_plist(temp_list, el) { plist_insert_back(rpeo, el->data); } @@ -433,53 +543,44 @@ static void create_pbqp_coloring_instance(ir_node *block, void *data) /* free reserved memory */ ir_nodeset_destroy(&live_nodes); plist_free(temp_list); +#if USE_BIPARTIT_MATCHING +#else + plist_free(sorted_list); del_pqueue(queue); del_pqueue(restr_nodes_queue); +#endif } static void insert_perms(ir_node *block, void *data) { - /* - * Start silent in the start block. - * The silence remains until the first barrier is seen. - * Each other block is begun loud. - */ - be_chordal_env_t *env = data; + be_chordal_env_t *env = (be_chordal_env_t*)data; ir_node *irn; - int silent = block == get_irg_start_block(get_irn_irg(block)); - /* - * If the block is the start block search the barrier and - * start handling constraints from there. - */ for (irn = sched_first(block); !sched_is_end(irn);) { - int silent_old = silent; /* store old silent value */ - if (be_is_Barrier(irn)) - silent = !silent; /* toggle silent flag */ + ir_node *const next = sched_next(irn); + be_insn_t * insn = be_scan_insn(env, irn); + if (insn) + pre_process_constraints(env, &insn); - be_insn_t *insn = chordal_scan_insn(env, irn); - irn = insn->next_insn; - - if (silent_old) - continue; - - if (!insn->has_constraints) - continue; - - pre_process_constraints(env, &insn); + irn = next; } } -void be_pbqp_coloring(be_chordal_env_t *env) +static void be_pbqp_coloring(be_chordal_env_t *env) { - ir_graph *irg = env->irg; - be_irg_t *birg = env->birg; - const arch_register_class_t *cls = env->cls; - unsigned colors_n = arch_register_class_n_regs(cls); - be_pbqp_alloc_env_t pbqp_alloc_env; - unsigned idx, row, col; - be_lv_t *lv; - + ir_graph *irg = env->irg; + const arch_register_class_t *cls = env->cls; + be_lv_t *lv = NULL; + plist_element_t *element = NULL; + unsigned colors_n = arch_register_class_n_regs(cls); + be_pbqp_alloc_env_t pbqp_alloc_env; + unsigned col; + unsigned row; + pbqp_matrix_t *ife_matrix; + num solution; +#if KAPS_DUMP + FILE *file_before; +#endif #if TIMER ir_timer_t *t_ra_pbqp_alloc_create = ir_timer_new(); ir_timer_t *t_ra_pbqp_alloc_solve = ir_timer_new(); @@ -487,9 +588,8 @@ void be_pbqp_coloring(be_chordal_env_t *env) printf("#### ----- === Allocating registers of %s (%s) ===\n", cls->name, get_entity_name(get_irg_entity(irg))); #endif - lv = be_assure_liveness(birg); - be_liveness_assure_sets(lv); - be_liveness_assure_chk(lv); + be_assure_live_sets(irg); + lv = be_get_irg_liveness(irg); /* insert perms */ assure_doms(irg); @@ -499,40 +599,40 @@ void be_pbqp_coloring(be_chordal_env_t *env) if (env->opts->dump_flags & BE_CH_DUMP_CONSTR) { char buf[256]; snprintf(buf, sizeof(buf), "-%s-constr", cls->name); - be_dump(irg, buf, dump_ir_block_graph_sched); + dump_ir_graph(irg, buf); } + ir_calculate_execfreq_int_factors(&pbqp_alloc_env.execfreq_factors, irg); /* initialize pbqp allocation data structure */ - pbqp_alloc_env.pbqp_inst = alloc_pbqp(get_irg_last_idx(irg)); /* initialize pbqp instance */ - pbqp_alloc_env.birg = birg; - pbqp_alloc_env.cls = cls; - pbqp_alloc_env.irg = irg; - pbqp_alloc_env.lv = lv; - pbqp_alloc_env.ignored_regs = bitset_malloc(colors_n); - pbqp_alloc_env.rpeo = plist_new(); - pbqp_alloc_env.restr_nodes = XMALLOCNZ(unsigned, get_irg_last_idx(irg)); - pbqp_alloc_env.ife_edge_num = XMALLOCNZ(unsigned, get_irg_last_idx(irg)); - pbqp_alloc_env.env = env; - be_put_ignore_regs(birg, cls, pbqp_alloc_env.ignored_regs); /* get ignored registers */ + pbqp_alloc_env.pbqp_inst = alloc_pbqp(get_irg_last_idx(irg)); /* initialize pbqp instance */ + pbqp_alloc_env.cls = cls; + pbqp_alloc_env.irg = irg; + pbqp_alloc_env.lv = lv; + pbqp_alloc_env.allocatable_regs = bitset_malloc(colors_n); + pbqp_alloc_env.rpeo = plist_new(); + pbqp_alloc_env.restr_nodes = XMALLOCNZ(unsigned, get_irg_last_idx(irg)); + pbqp_alloc_env.ife_edge_num = XMALLOCNZ(unsigned, get_irg_last_idx(irg)); + pbqp_alloc_env.env = env; + be_put_allocatable_regs(irg, cls, pbqp_alloc_env.allocatable_regs); /* create costs matrix template for interference edges */ - struct pbqp_matrix *ife_matrix = pbqp_matrix_alloc(pbqp_alloc_env.pbqp_inst, colors_n, colors_n); + ife_matrix = pbqp_matrix_alloc(pbqp_alloc_env.pbqp_inst, colors_n, colors_n); /* set costs */ - for(row = 0, col=0; row < colors_n; row++, col++) + for (row = 0, col = 0; row < colors_n; row++, col++) pbqp_matrix_set(ife_matrix, row, col, INF_COSTS); pbqp_alloc_env.ife_matrix_template = ife_matrix; - if(!use_exec_freq) { + if (!use_exec_freq) { /* create costs matrix template for affinity edges */ - struct pbqp_matrix *afe_matrix = pbqp_matrix_alloc(pbqp_alloc_env.pbqp_inst, colors_n, colors_n); + pbqp_matrix_t *afe_matrix = pbqp_matrix_alloc(pbqp_alloc_env.pbqp_inst, colors_n, colors_n); /* set costs */ - for(row = 0; row < colors_n; row++) { - for(col = 0; col < colors_n; col++) { - if(row != col) + for (row = 0; row < colors_n; row++) { + for (col = 0; col < colors_n; col++) { + if (row != col) pbqp_matrix_set(afe_matrix, row, col, 2); } } @@ -555,11 +655,10 @@ void be_pbqp_coloring(be_chordal_env_t *env) #if TIMER ir_timer_reset_and_start(t_ra_pbqp_alloc_create_aff); #endif - plist_element_t *el; - foreach_plist(pbqp_alloc_env.rpeo, el) { - pbqp_node *node = el->data; - idx = node->index; - ir_node *irn = get_idx_irn(irg, idx); + foreach_plist(pbqp_alloc_env.rpeo, element) { + pbqp_node_t *node = (pbqp_node_t*)element->data; + ir_node *irn = get_idx_irn(irg, node->index); + create_affinity_edges(irn, &pbqp_alloc_env); } #if TIMER @@ -569,42 +668,59 @@ void be_pbqp_coloring(be_chordal_env_t *env) #if KAPS_DUMP // dump graph before solving pbqp - FILE *file_before = my_open(env, "", "-pbqp_coloring.html"); + file_before = my_open(env, "", "-pbqp_coloring.html"); set_dumpfile(pbqp_alloc_env.pbqp_inst, file_before); #endif + /* print out reverse perfect elimination order */ +#if PRINT_RPEO + { + plist_element_t *elements; + foreach_plist(pbqp_alloc_env.rpeo, elements) { + pbqp_node_t *node = elements->data; + printf(" %d(%ld);", node->index, get_idx_irn(irg, node->index)->node_nr); + } + printf("\n"); + } +#endif /* solve pbqp instance */ #if TIMER ir_timer_reset_and_start(t_ra_pbqp_alloc_solve); #endif - solve_pbqp_heuristical_co(pbqp_alloc_env.pbqp_inst,pbqp_alloc_env.rpeo); + if(use_late_decision) { + solve_pbqp_heuristical_co_ld(pbqp_alloc_env.pbqp_inst,pbqp_alloc_env.rpeo); + } + else { + solve_pbqp_heuristical_co(pbqp_alloc_env.pbqp_inst,pbqp_alloc_env.rpeo); + } #if TIMER ir_timer_stop(t_ra_pbqp_alloc_solve); #endif - num solution = get_solution(pbqp_alloc_env.pbqp_inst); - assert(solution != INF_COSTS && "No PBQP solution found"); + + + solution = get_solution(pbqp_alloc_env.pbqp_inst); + if (solution == INF_COSTS) + panic("No PBQP solution found"); /* assign colors */ - plist_element_t *element; foreach_plist(pbqp_alloc_env.rpeo, element) { - pbqp_node *node = element->data; - idx = node->index; - ir_node *irn = get_idx_irn(irg, idx); - num color = get_node_solution(pbqp_alloc_env.pbqp_inst, idx); - const arch_register_t *reg = arch_register_for_index(cls, color); + pbqp_node_t *node = (pbqp_node_t*)element->data; + ir_node *irn = get_idx_irn(irg, node->index); + num color = get_node_solution(pbqp_alloc_env.pbqp_inst, node->index); + const arch_register_t *reg = arch_register_for_index(cls, color); arch_set_irn_register(irn, reg); } #if TIMER - printf("%-20s: %8.3lf msec\n", "pbqp alloc create", + printf("PBQP alloc create: %10.3lf msec\n", (double)ir_timer_elapsed_usec(t_ra_pbqp_alloc_create) / 1000.0); - printf("%-20s: %8.3lf msec\n", "pbqp alloc solve", + printf("PBQP alloc solve: %10.3lf msec\n", (double)ir_timer_elapsed_usec(t_ra_pbqp_alloc_solve) / 1000.0); - printf("%-20s: %8.3lf msec\n", "pbqp alloc create aff", + printf("PBQP alloc create aff: %10.3lf msec\n", (double)ir_timer_elapsed_usec(t_ra_pbqp_alloc_create_aff) / 1000.0); #endif @@ -613,7 +729,7 @@ void be_pbqp_coloring(be_chordal_env_t *env) #if KAPS_DUMP fclose(file_before); #endif - bitset_free(pbqp_alloc_env.ignored_regs); + bitset_free(pbqp_alloc_env.allocatable_regs); free_pbqp(pbqp_alloc_env.pbqp_inst); plist_free(pbqp_alloc_env.rpeo); xfree(pbqp_alloc_env.restr_nodes); @@ -624,13 +740,14 @@ void be_pbqp_coloring(be_chordal_env_t *env) /** * Initializes this module. */ +BE_REGISTER_MODULE_CONSTRUCTOR(be_init_pbqp_coloring) void be_init_pbqp_coloring(void) { - lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be"); - lc_opt_entry_t *ra_grp = lc_opt_get_grp(be_grp, "ra"); - lc_opt_entry_t *chordal_grp = lc_opt_get_grp(ra_grp, "chordal"); + lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be"); + lc_opt_entry_t *ra_grp = lc_opt_get_grp(be_grp, "ra"); + lc_opt_entry_t *chordal_grp = lc_opt_get_grp(ra_grp, "chordal"); lc_opt_entry_t *coloring_grp = lc_opt_get_grp(chordal_grp, "coloring"); - lc_opt_entry_t *pbqp_grp = lc_opt_get_grp(coloring_grp, "pbqp"); + lc_opt_entry_t *pbqp_grp = lc_opt_get_grp(coloring_grp, "pbqp"); static be_ra_chordal_coloring_t coloring = { be_pbqp_coloring @@ -639,7 +756,3 @@ void be_init_pbqp_coloring(void) lc_opt_add_table(pbqp_grp, options); be_register_chordal_coloring("pbqp", &coloring); } - -BE_REGISTER_MODULE_CONSTRUCTOR(be_pbqp_alloc); - -#endif