X-Git-Url: http://nsz.repo.hu/git/?a=blobdiff_plain;f=ir%2Fbe%2Fbepbqpcoloring.c;h=99aadb39e61b0ca0d3269d323cb649a982ed8465;hb=0318dc1a48ce72b311592c28affc31fabc95f026;hp=2a578d4fa4fd4778245e39f5bcf6b023bd2b8cf8;hpb=9f994428c17afccf0eddcede6f37e1ae41e5a0b8;p=libfirm diff --git a/ir/be/bepbqpcoloring.c b/ir/be/bepbqpcoloring.c index 2a578d4fa..99aadb39e 100644 --- a/ir/be/bepbqpcoloring.c +++ b/ir/be/bepbqpcoloring.c @@ -25,7 +25,7 @@ * @version $Id: bechordal.c 26750 2009-11-27 09:37:43Z bersch $ */ -/* miscellaneous includes */ +/* miscellaneous includes */ #include "config.h" #ifdef FIRM_KAPS @@ -34,11 +34,15 @@ #include "error.h" #include "irdom.h" +#include "irdump.h" #include "iredges_t.h" #include "irprintf.h" #include "irgwalk.h" #include "time.h" +/* libfirm/ir/adt includes */ +#include "bipartite.h" + /* libfirm/ir/be includes */ #include "bearch.h" #include "beirg.h" @@ -61,38 +65,43 @@ #include "matrix.h" #include "vector.h" #include "vector_t.h" -#include "heuristical.h" +#include "heuristical_co.h" +#include "heuristical_co_ld.h" #include "pbqp_t.h" #include "html_dumper.h" #include "pbqp_node_t.h" #include "pbqp_node.h" +#include "pbqp_edge_t.h" -#define TIMER 0 +#define TIMER 0 +#define PRINT_RPEO 0 +#define USE_BIPARTIT_MATCHING 0 +#define DO_USEFUL_OPT 1 -static bool use_exec_freq = true; +static int use_exec_freq = true; +static int use_late_decision = false; -typedef struct _be_pbqp_alloc_env_t { - pbqp *pbqp_inst; /**< PBQP instance for register allocation */ - be_irg_t *birg; /**< Back-end IRG session. */ - ir_graph *irg; /**< The graph under examination. */ - const arch_register_class_t *cls; /**< Current processed register class */ +typedef struct be_pbqp_alloc_env_t { + pbqp_t *pbqp_inst; /**< PBQP instance for register allocation */ + ir_graph *irg; /**< The graph under examination. */ + const arch_register_class_t *cls; /**< Current processed register class */ be_lv_t *lv; - bitset_t *ignored_regs; - pbqp_matrix *ife_matrix_template; - pbqp_matrix *aff_matrix_template; - plist_t *rpeo; - unsigned *restr_nodes; - unsigned *ife_edge_num; - be_chordal_env_t *env; + bitset_t *allocatable_regs; + pbqp_matrix_t *ife_matrix_template; + pbqp_matrix_t *aff_matrix_template; + plist_t *rpeo; + unsigned *restr_nodes; + unsigned *ife_edge_num; + be_chordal_env_t *env; } be_pbqp_alloc_env_t; -#define is_Reg_Phi(irn) (is_Phi(irn) && mode_is_data(get_irn_mode(irn))) -#define get_Perm_src(irn) (get_irn_n(get_Proj_pred(irn), get_Proj_proj(irn))) -#define is_Perm_Proj(irn) (is_Proj(irn) && be_is_Perm(get_Proj_pred(irn))) -#define insert_edge(pbqp, src_node, trg_node, template_matrix) (add_edge_costs(pbqp, get_irn_idx(src_node), get_irn_idx(trg_node), pbqp_matrix_copy(pbqp, template_matrix))) -#define get_free_regs(restr_nodes, cls, irn) (arch_register_class_n_regs(cls) - restr_nodes[get_irn_idx(irn)]) +#define is_Reg_Phi(irn) (is_Phi(irn) && mode_is_data(get_irn_mode(irn))) +#define get_Perm_src(irn) (get_irn_n(get_Proj_pred(irn), get_Proj_proj(irn))) +#define is_Perm_Proj(irn) (is_Proj(irn) && be_is_Perm(get_Proj_pred(irn))) +#define insert_edge(pbqp, src_node, trg_node, template_matrix) (add_edge_costs(pbqp, get_irn_idx(src_node), get_irn_idx(trg_node), pbqp_matrix_copy(pbqp, template_matrix))) +#define get_free_regs(restr_nodes, cls, irn) (arch_register_class_n_regs(cls) - restr_nodes[get_irn_idx(irn)]) static inline int is_2addr_code(const arch_register_req_t *req) { @@ -100,21 +109,24 @@ static inline int is_2addr_code(const arch_register_req_t *req) } static const lc_opt_table_entry_t options[] = { - LC_OPT_ENT_BOOL ("exec_freq", "use exec_freq", &use_exec_freq), + LC_OPT_ENT_BOOL("exec_freq", "use exec_freq", &use_exec_freq), + LC_OPT_ENT_BOOL("late_decision", "use late decision for register allocation", &use_late_decision), LC_OPT_LAST }; #if KAPS_DUMP static FILE *my_open(const be_chordal_env_t *env, const char *prefix, const char *suffix) { - FILE *result; - char buf[1024]; - size_t i, n; - char *tu_name; - - n = strlen(env->birg->main_env->cup_name); + FILE *result; + char buf[1024]; + size_t i; + size_t n; + char *tu_name; + const char *cup_name = be_get_irg_main_env(env->irg)->cup_name; + + n = strlen(cup_name); tu_name = XMALLOCN(char, n + 1); - strcpy(tu_name, env->birg->main_env->cup_name); + strcpy(tu_name, cup_name); for (i = 0; i < n; ++i) if (tu_name[i] == '.') tu_name[i] = '_'; @@ -122,7 +134,7 @@ static FILE *my_open(const be_chordal_env_t *env, const char *prefix, const char ir_snprintf(buf, sizeof(buf), "%s%s_%F_%s%s", prefix, tu_name, env->irg, env->cls->name, suffix); xfree(tu_name); result = fopen(buf, "wt"); - if(result == NULL) { + if (result == NULL) { panic("Couldn't open '%s' for writing.", buf); } @@ -131,20 +143,22 @@ static FILE *my_open(const be_chordal_env_t *env, const char *prefix, const char #endif -static void create_pbqp_node(be_pbqp_alloc_env_t *pbqp_alloc_env, ir_node *irn) { +static void create_pbqp_node(be_pbqp_alloc_env_t *pbqp_alloc_env, ir_node *irn) +{ const arch_register_class_t *cls = pbqp_alloc_env->cls; - pbqp *pbqp_inst = pbqp_alloc_env->pbqp_inst; - bitset_t *ignored_regs = pbqp_alloc_env->ignored_regs; + pbqp_t *pbqp_inst = pbqp_alloc_env->pbqp_inst; + bitset_t *allocatable_regs = pbqp_alloc_env->allocatable_regs; unsigned colors_n = arch_register_class_n_regs(cls); unsigned cntConstrains = 0; /* create costs vector depending on register constrains */ - struct vector *costs_vector = vector_alloc(pbqp_inst, colors_n); + vector_t *costs_vector = vector_alloc(pbqp_inst, colors_n); /* set costs depending on register constrains */ unsigned idx; - for(idx = 0; idx < colors_n; idx++) { - if(bitset_is_set(ignored_regs, idx) || !arch_reg_out_is_allocatable(irn, arch_register_for_index(cls, idx))) { + for (idx = 0; idx < colors_n; idx++) { + if (!bitset_is_set(allocatable_regs, idx) || !arch_reg_out_is_allocatable(irn, arch_register_for_index(cls, idx))) { + /* constrained */ vector_set(costs_vector, idx, INF_COSTS); cntConstrains++; } @@ -155,27 +169,29 @@ static void create_pbqp_node(be_pbqp_alloc_env_t *pbqp_alloc_env, ir_node *irn) pbqp_alloc_env->restr_nodes[get_irn_idx(irn)] = cntConstrains; } -static void insert_ife_edge(be_pbqp_alloc_env_t *pbqp_alloc_env, ir_node *src_node, ir_node *trg_node) { - pbqp *pbqp = pbqp_alloc_env->pbqp_inst; +static void insert_ife_edge(be_pbqp_alloc_env_t *pbqp_alloc_env, ir_node *src_node, ir_node *trg_node) +{ + pbqp_t *pbqp = pbqp_alloc_env->pbqp_inst; const arch_register_class_t *cls = pbqp_alloc_env->cls; - pbqp_matrix *ife_matrix_template = pbqp_alloc_env->ife_matrix_template; - unsigned *restr_nodes = pbqp_alloc_env->restr_nodes; + pbqp_matrix_t *ife_matrix_template = pbqp_alloc_env->ife_matrix_template; + unsigned *restr_nodes = pbqp_alloc_env->restr_nodes; - if(get_edge(pbqp, get_irn_idx(src_node), get_irn_idx(trg_node)) == NULL) { + if (get_edge(pbqp, get_irn_idx(src_node), get_irn_idx(trg_node)) == NULL) { /* increase ife edge counter */ pbqp_alloc_env->ife_edge_num[get_irn_idx(src_node)]++; pbqp_alloc_env->ife_edge_num[get_irn_idx(trg_node)]++; +#if DO_USEFUL_OPT || USE_BIPARTIT_MATCHING /* do useful optimization to speed up pbqp solving (we can do this because we know our matrix) */ - if(get_free_regs(restr_nodes, cls, src_node) == 1 && get_free_regs(restr_nodes, cls, trg_node) == 1) { - unsigned src_idx = vector_get_min_index(get_node(pbqp, get_irn_idx(src_node))->costs); - unsigned trg_idx = vector_get_min_index(get_node(pbqp, get_irn_idx(trg_node))->costs); - assert(src_idx != trg_idx && "Interfering nodes could not have the same register!"); + if (get_free_regs(restr_nodes, cls, src_node) == 1 && get_free_regs(restr_nodes, cls, trg_node) == 1) { + assert(vector_get_min_index(get_node(pbqp, get_irn_idx(src_node))->costs) != + vector_get_min_index(get_node(pbqp, get_irn_idx(trg_node))->costs) && + "Interfering nodes must not have the same register!"); return; } - if(get_free_regs(restr_nodes, cls, src_node) == 1 || get_free_regs(restr_nodes, cls, trg_node) == 1) { - if(get_free_regs(restr_nodes, cls, src_node) == 1) { + if (get_free_regs(restr_nodes, cls, src_node) == 1 || get_free_regs(restr_nodes, cls, trg_node) == 1) { + if (get_free_regs(restr_nodes, cls, src_node) == 1) { unsigned idx = vector_get_min_index(get_node(pbqp, get_irn_idx(src_node))->costs); vector_set(get_node(pbqp, get_irn_idx(trg_node))->costs, idx, INF_COSTS); } @@ -185,31 +201,33 @@ static void insert_ife_edge(be_pbqp_alloc_env_t *pbqp_alloc_env, ir_node *src_no } return; } - +#endif /* insert interference edge */ insert_edge(pbqp, src_node, trg_node, ife_matrix_template); } } -static void inser_afe_edge(be_pbqp_alloc_env_t *pbqp_alloc_env, ir_node *src_node, ir_node *trg_node, int pos) { - pbqp *pbqp = pbqp_alloc_env->pbqp_inst; - const arch_register_class_t *cls = pbqp_alloc_env->cls; - unsigned *restr_nodes = pbqp_alloc_env->restr_nodes; - pbqp_matrix *afe_matrix = pbqp_matrix_alloc(pbqp, arch_register_class_n_regs(cls), arch_register_class_n_regs(cls)); - unsigned colors_n = arch_register_class_n_regs(cls); - - if(get_edge(pbqp, get_irn_idx(src_node), get_irn_idx(trg_node)) == NULL) { - if(use_exec_freq) { +static void insert_afe_edge(be_pbqp_alloc_env_t *pbqp_alloc_env, ir_node *src_node, ir_node *trg_node, int pos) +{ + pbqp_t *pbqp = pbqp_alloc_env->pbqp_inst; + const arch_register_class_t *cls = pbqp_alloc_env->cls; + unsigned *restr_nodes = pbqp_alloc_env->restr_nodes; + pbqp_matrix_t *afe_matrix = pbqp_matrix_alloc(pbqp, arch_register_class_n_regs(cls), arch_register_class_n_regs(cls)); + unsigned colors_n = arch_register_class_n_regs(cls); + + if (get_edge(pbqp, get_irn_idx(src_node), get_irn_idx(trg_node)) == NULL) { + if (use_exec_freq) { /* get exec_freq for copy_block */ - ir_node *root_bl = get_nodes_block(src_node); - ir_node *copy_bl = is_Phi(src_node) ? get_Block_cfgpred_block(root_bl, pos) : root_bl; - unsigned long res = get_block_execfreq_ulong(pbqp_alloc_env->birg->exec_freq, copy_bl); + ir_node *root_bl = get_nodes_block(src_node); + ir_node *copy_bl = is_Phi(src_node) ? get_Block_cfgpred_block(root_bl, pos) : root_bl; + ir_exec_freq *exec_freq = be_get_irg_exec_freq(pbqp_alloc_env->irg); + unsigned long res = get_block_execfreq_ulong(exec_freq, copy_bl); /* create afe-matrix */ unsigned row, col; - for(row = 0; row < colors_n; row++) { - for(col = 0; col < colors_n; col++) { - if(row != col) + for (row = 0; row < colors_n; row++) { + for (col = 0; col < colors_n; col++) { + if (row != col) pbqp_matrix_set(afe_matrix, row, col, (num)res); } } @@ -217,13 +235,13 @@ static void inser_afe_edge(be_pbqp_alloc_env_t *pbqp_alloc_env, ir_node *src_nod else { afe_matrix = pbqp_alloc_env->aff_matrix_template; } - +#if DO_USEFUL_OPT || USE_BIPARTIT_MATCHING /* do useful optimization to speed up pbqp solving */ - if(get_free_regs(restr_nodes, cls, src_node) == 1 && get_free_regs(restr_nodes, cls, trg_node) == 1) { + if (get_free_regs(restr_nodes, cls, src_node) == 1 && get_free_regs(restr_nodes, cls, trg_node) == 1) { return; } - if(get_free_regs(restr_nodes, cls, src_node) == 1 || get_free_regs(restr_nodes, cls, trg_node) == 1) { - if(get_free_regs(restr_nodes, cls, src_node) == 1) { + if (get_free_regs(restr_nodes, cls, src_node) == 1 || get_free_regs(restr_nodes, cls, trg_node) == 1) { + if (get_free_regs(restr_nodes, cls, src_node) == 1) { unsigned regIdx = vector_get_min_index(get_node(pbqp, get_irn_idx(src_node))->costs); vector_add_matrix_col(get_node(pbqp, get_irn_idx(trg_node))->costs, afe_matrix, regIdx); } @@ -233,31 +251,33 @@ static void inser_afe_edge(be_pbqp_alloc_env_t *pbqp_alloc_env, ir_node *src_nod } return; } - +#endif /* insert interference edge */ insert_edge(pbqp, src_node, trg_node, afe_matrix); } } -static void create_affinity_edges(ir_node *irn, void *env) { - be_pbqp_alloc_env_t *pbqp_alloc_env = env; - const arch_register_class_t *cls = pbqp_alloc_env->cls; - const arch_register_req_t *req = arch_get_register_req_out(irn); - unsigned pos, max; +static void create_affinity_edges(ir_node *irn, void *env) +{ + be_pbqp_alloc_env_t *pbqp_alloc_env = (be_pbqp_alloc_env_t*)env; + const arch_register_class_t *cls = pbqp_alloc_env->cls; + const arch_register_req_t *req = arch_get_register_req_out(irn); + unsigned pos; + unsigned max; if (is_Reg_Phi(irn)) { /* Phis */ - for (pos=0, max=get_irn_arity(irn); posother_same; - int i; + int i; for (i = 0; 1U << i <= other; ++i) { if (other & (1U << i)) { @@ -279,29 +299,37 @@ static void create_affinity_edges(ir_node *irn, void *env) { continue; /* no edges to itself */ - if(irn == other) { + if (irn == other) { continue; } - inser_afe_edge(pbqp_alloc_env, irn, other, i); + insert_afe_edge(pbqp_alloc_env, irn, other, i); } } } } } -static void create_pbqp_coloring_instance(ir_node *block, void *data) { - be_pbqp_alloc_env_t *pbqp_alloc_env = data; - be_lv_t *lv = pbqp_alloc_env->lv; - const arch_register_class_t *cls = pbqp_alloc_env->cls; - plist_t *rpeo = pbqp_alloc_env->rpeo; - pbqp *pbqp_inst = pbqp_alloc_env->pbqp_inst; - unsigned *restr_nodes = pbqp_alloc_env->restr_nodes; - pqueue_t *queue = new_pqueue(); - pqueue_t *restr_nodes_queue = new_pqueue(); - plist_t *temp_list = plist_new(); +static void create_pbqp_coloring_instance(ir_node *block, void *data) +{ + be_pbqp_alloc_env_t *pbqp_alloc_env = (be_pbqp_alloc_env_t*)data; + be_lv_t *lv = pbqp_alloc_env->lv; + const arch_register_class_t *cls = pbqp_alloc_env->cls; + plist_t *rpeo = pbqp_alloc_env->rpeo; + pbqp_t *pbqp_inst = pbqp_alloc_env->pbqp_inst; + plist_t *temp_list = plist_new(); + plist_element_t *el; ir_node *irn; ir_nodeset_t live_nodes; +#if USE_BIPARTIT_MATCHING + int *assignment = ALLOCAN(int, cls->n_regs); +#else + unsigned *restr_nodes = pbqp_alloc_env->restr_nodes; + pqueue_t *restr_nodes_queue = new_pqueue(); + pqueue_t *queue = new_pqueue(); + plist_t *sorted_list = plist_new(); + ir_node *last_element = NULL; +#endif /* first, determine the pressure */ /* (this is only for compatibility with copymin optimization, it's not needed for pbqp coloring) */ @@ -313,8 +341,8 @@ static void create_pbqp_coloring_instance(ir_node *block, void *data) { /* create pbqp nodes, interference edges and reverse perfect elimination order */ sched_foreach_reverse(block, irn) { - ir_node *live; - ir_nodeset_iterator_t iter; + ir_node *live; + ir_nodeset_iterator_t iter; if (get_irn_mode(irn) == mode_T) { const ir_edge_t *edge; @@ -324,19 +352,19 @@ static void create_pbqp_coloring_instance(ir_node *block, void *data) { continue; /* create pbqp source node if it dosn't exist */ - if(get_node(pbqp_inst, get_irn_idx(proj)) == NULL) { + if (get_node(pbqp_inst, get_irn_idx(proj)) == NULL) { create_pbqp_node(pbqp_alloc_env, proj); } /* create nodes and interference edges */ foreach_ir_nodeset(&live_nodes, live, iter) { /* create pbqp source node if it dosn't exist */ - if(get_node(pbqp_inst, get_irn_idx(live)) == NULL) { + if (get_node(pbqp_inst, get_irn_idx(live)) == NULL) { create_pbqp_node(pbqp_alloc_env, live); } /* no edges to itself */ - if(proj == live) { + if (proj == live) { continue; } @@ -347,19 +375,19 @@ static void create_pbqp_coloring_instance(ir_node *block, void *data) { else { if (arch_irn_consider_in_reg_alloc(cls, irn)) { /* create pbqp source node if it dosn't exist */ - if(get_node(pbqp_inst, get_irn_idx(irn)) == NULL) { + if (get_node(pbqp_inst, get_irn_idx(irn)) == NULL) { create_pbqp_node(pbqp_alloc_env, irn); } /* create nodes and interference edges */ foreach_ir_nodeset(&live_nodes, live, iter) { /* create pbqp source node if it dosn't exist */ - if(get_node(pbqp_inst, get_irn_idx(live)) == NULL) { + if (get_node(pbqp_inst, get_irn_idx(live)) == NULL) { create_pbqp_node(pbqp_alloc_env, live); } /* no edges to itself */ - if(irn == live) { + if (irn == live) { continue; } @@ -374,9 +402,115 @@ static void create_pbqp_coloring_instance(ir_node *block, void *data) { be_liveness_transfer(cls, irn, &live_nodes); } +#if USE_BIPARTIT_MATCHING + if (get_irn_mode(irn) == mode_T) { + unsigned clique_size = 0; + unsigned n_alloc = 0; + pbqp_node *clique[cls->n_regs]; + bipartite_t *bp = bipartite_new(cls->n_regs, cls->n_regs); + + /* add all proj after a perm to clique */ + const ir_edge_t *edge; + foreach_out_edge(irn, edge) { + ir_node *proj = get_edge_src_irn(edge); + + /* ignore node if it is not necessary for register allocation */ + if (!arch_irn_consider_in_reg_alloc(cls, proj)) + continue; + + /* insert pbqp node into temp rpeo list of this block */ + plist_insert_front(temp_list, get_node(pbqp_inst, get_irn_idx(proj))); + + if(is_Perm_Proj(proj)) { + /* add proj to clique */ + pbqp_node *clique_member = get_node(pbqp_inst,proj->node_idx); + vector *costs = clique_member->costs; + unsigned idx = 0; + + clique[clique_size] = clique_member; + + for(idx = 0; idx < costs->len; idx++) { + if(costs->entries[idx].data != INF_COSTS) { + bipartite_add(bp, clique_size, idx); + } + } + + /* increase node counter */ + clique_size++; + n_alloc++; + } + } + + if(clique_size > 0) { + plist_element_t *listElement; + foreach_plist(temp_list, listElement) { + pbqp_node *clique_candidate = listElement->data; + unsigned idx = 0; + bool isMember = true; + + /* clique size not bigger then register class size */ + if(clique_size >= cls->n_regs) break; + + for(idx = 0; idx < clique_size; idx++) { + pbqp_node *member = clique[idx]; + + if(member == clique_candidate) { + isMember = false; + break; + } + + if(get_edge(pbqp_inst, member->index, clique_candidate->index) == NULL && get_edge(pbqp_inst, clique_candidate->index, member->index) == NULL) { + isMember = false; + break; + } + } + + /* goto next list element if current node is not a member of the clique */ + if(!isMember) { continue; } + + /* add candidate to clique */ + clique[clique_size] = clique_candidate; + + vector *costs = clique_candidate->costs; + for(idx = 0; idx < costs->len; idx++) { + if(costs->entries[idx].data != INF_COSTS) { + bipartite_add(bp, clique_size, idx); + } + } + + /* increase node counter */ + clique_size++; + } + } + + /* solve bipartite matching */ + bipartite_matching(bp, assignment); + + /* assign colors */ + unsigned nodeIdx = 0; + for(nodeIdx = 0; nodeIdx < clique_size; nodeIdx++) { + vector *costs = clique[nodeIdx]->costs; + int idx; + for(idx = 0; idx < (int)costs->len; idx++) { + if(assignment[nodeIdx] != idx) { + costs->entries[idx].data = INF_COSTS; + } + } + assert(assignment[nodeIdx] >= 0 && "there must have been a register assigned (node not register pressure faithful?)"); + } + + /* free memory */ + bipartite_free(bp); + } + else { + if (arch_irn_consider_in_reg_alloc(cls, irn)) { + plist_insert_front(temp_list, get_node(pbqp_inst, get_irn_idx(irn))); + } + } +#else /* order nodes for perfect elimination order */ if (get_irn_mode(irn) == mode_T) { - plist_element_t *first = plist_first(temp_list); + bool allHaveIFEdges = true; const ir_edge_t *edge; foreach_out_edge(irn, edge) { @@ -384,44 +518,70 @@ static void create_pbqp_coloring_instance(ir_node *block, void *data) { if (!arch_irn_consider_in_reg_alloc(cls, proj)) continue; - // insert proj node into priority queue (descending by the number of interference edges) - if(get_free_regs(restr_nodes, cls, proj) <= 4/*bitset_is_set(restr_nodes, get_irn_idx(proj))*/) { + /* insert proj node into priority queue (descending by the number of interference edges) */ + if (get_free_regs(restr_nodes, cls, proj) <= 4) { pqueue_put(restr_nodes_queue, proj, pbqp_alloc_env->ife_edge_num[get_irn_idx(proj)]); } else { - pqueue_put(queue,proj, pbqp_alloc_env->ife_edge_num[get_irn_idx(proj)]); + pqueue_put(queue, proj, pbqp_alloc_env->ife_edge_num[get_irn_idx(proj)]); + } + + /* skip last step if there is no last_element */ + if(last_element == NULL) + continue; + + /* check if proj has an if edge to last_element (at this time pbqp contains only if edges) */ + if(get_edge(pbqp_inst, proj->node_idx, last_element->node_idx) == NULL && get_edge(pbqp_inst, last_element->node_idx, proj->node_idx) == NULL) { + allHaveIFEdges = false; /* there is no if edge between proj and last_element */ } } - /* first insert all restricted nodes */ - while(!pqueue_empty(restr_nodes_queue)) { - if(first == NULL) { - plist_insert_back(temp_list, get_node(pbqp_inst, get_irn_idx(pqueue_pop_front(restr_nodes_queue)))); - first = plist_first(temp_list); - } else { - plist_insert_before(temp_list, first, get_node(pbqp_inst, get_irn_idx(pqueue_pop_front(restr_nodes_queue)))); + if(last_element != NULL && allHaveIFEdges) { + if (get_free_regs(restr_nodes, cls, last_element) <= 4) { + pqueue_put(restr_nodes_queue, last_element, pbqp_alloc_env->ife_edge_num[get_irn_idx(last_element)]); + } + else { + pqueue_put(queue, last_element, pbqp_alloc_env->ife_edge_num[get_irn_idx(last_element)]); } + plist_erase(temp_list, plist_find_value(temp_list, get_node(pbqp_inst, last_element->node_idx))); + last_element = NULL; + } + + /* first insert all restricted proj nodes */ + while (!pqueue_empty(restr_nodes_queue)) { + ir_node *node = (ir_node*)pqueue_pop_front(restr_nodes_queue); + plist_insert_front(sorted_list, get_node(pbqp_inst, get_irn_idx(node))); } /* insert proj nodes descending by their number of interference edges */ - while(!pqueue_empty(queue)) { - if(first == NULL) { - plist_insert_back(temp_list, get_node(pbqp_inst, get_irn_idx(pqueue_pop_front(queue)))); - first = plist_first(temp_list); - } else { - plist_insert_before(temp_list, first, get_node(pbqp_inst, get_irn_idx(pqueue_pop_front(queue)))); - } + while (!pqueue_empty(queue)) { + ir_node *node = (ir_node*)pqueue_pop_front(queue); + plist_insert_front(sorted_list, get_node(pbqp_inst, get_irn_idx(node))); + } + + /* invert sorted list */ + foreach_plist(sorted_list, el) { + plist_insert_front(temp_list, el->data); } + + plist_clear(sorted_list); + } else { if (arch_irn_consider_in_reg_alloc(cls, irn)) { + // remember last colorable node + last_element = irn; plist_insert_front(temp_list, get_node(pbqp_inst, get_irn_idx(irn))); } + else { + // node not colorable, so ignore it + last_element = NULL; + } } +#endif } - /* insert nodes into reverse perfect elimination order */ - plist_element_t *el; + /* add the temp rpeo list of this block to the global reverse perfect elimination order list*/ foreach_plist(temp_list, el) { plist_insert_back(rpeo, el->data); } @@ -429,34 +589,22 @@ static void create_pbqp_coloring_instance(ir_node *block, void *data) { /* free reserved memory */ ir_nodeset_destroy(&live_nodes); plist_free(temp_list); +#if USE_BIPARTIT_MATCHING +#else + plist_free(sorted_list); del_pqueue(queue); del_pqueue(restr_nodes_queue); +#endif } -static void insert_perms(ir_node *block, void *data) { - /* - * Start silent in the start block. - * The silence remains until the first barrier is seen. - * Each other block is begun loud. - */ - be_chordal_env_t *env = data; +static void insert_perms(ir_node *block, void *data) +{ + be_chordal_env_t *env = (be_chordal_env_t*)data; ir_node *irn; - int silent = block == get_irg_start_block(get_irn_irg(block)); - /* - * If the block is the start block search the barrier and - * start handling constraints from there. - */ for (irn = sched_first(block); !sched_is_end(irn);) { - int silent_old = silent; /* store old silent value */ - if (be_is_Barrier(irn)) - silent = !silent; /* toggle silent flag */ - - be_insn_t *insn = chordal_scan_insn(env, irn); - irn = insn->next_insn; - - if (silent_old) - continue; + be_insn_t *insn = chordal_scan_insn(env, irn); + irn = insn->next_insn; if (!insn->has_constraints) continue; @@ -465,23 +613,25 @@ static void insert_perms(ir_node *block, void *data) { } } -void be_pbqp_coloring(be_chordal_env_t *env) { - ir_graph *irg = env->irg; - be_irg_t *birg = env->birg; - const arch_register_class_t *cls = env->cls; - unsigned colors_n = arch_register_class_n_regs(cls); - be_pbqp_alloc_env_t pbqp_alloc_env; - unsigned idx, row, col; - be_lv_t *lv; +static void be_pbqp_coloring(be_chordal_env_t *env) +{ + ir_graph *irg = env->irg; + const arch_register_class_t *cls = env->cls; + be_lv_t *lv = NULL; + plist_element_t *element = NULL; + unsigned colors_n = arch_register_class_n_regs(cls); + be_pbqp_alloc_env_t pbqp_alloc_env; + unsigned col; + unsigned row; #if TIMER - ir_timer_t *t_ra_pbqp_alloc_create = ir_timer_register("be_pbqp_alloc_create", "pbqp alloc create"); - ir_timer_t *t_ra_pbqp_alloc_solve = ir_timer_register("be_pbqp_alloc_solve", "pbqp alloc solve"); - ir_timer_t *t_ra_pbqp_alloc_create_aff = ir_timer_register("be_pbqp_alloc_create_aff", "pbqp alloc create aff"); + ir_timer_t *t_ra_pbqp_alloc_create = ir_timer_new(); + ir_timer_t *t_ra_pbqp_alloc_solve = ir_timer_new(); + ir_timer_t *t_ra_pbqp_alloc_create_aff = ir_timer_new(); printf("#### ----- === Allocating registers of %s (%s) ===\n", cls->name, get_entity_name(get_irg_entity(irg))); #endif - lv = be_assure_liveness(birg); + lv = be_assure_liveness(irg); be_liveness_assure_sets(lv); be_liveness_assure_chk(lv); @@ -493,40 +643,39 @@ void be_pbqp_coloring(be_chordal_env_t *env) { if (env->opts->dump_flags & BE_CH_DUMP_CONSTR) { char buf[256]; snprintf(buf, sizeof(buf), "-%s-constr", cls->name); - be_dump(irg, buf, dump_ir_block_graph_sched); + dump_ir_graph(irg, buf); } /* initialize pbqp allocation data structure */ - pbqp_alloc_env.pbqp_inst = alloc_pbqp(get_irg_last_idx(irg)); /* initialize pbqp instance */ - pbqp_alloc_env.birg = birg; - pbqp_alloc_env.cls = cls; - pbqp_alloc_env.irg = irg; - pbqp_alloc_env.lv = lv; - pbqp_alloc_env.ignored_regs = bitset_malloc(colors_n); - pbqp_alloc_env.rpeo = plist_new(); - pbqp_alloc_env.restr_nodes = XMALLOCNZ(unsigned, get_irg_last_idx(irg)); - pbqp_alloc_env.ife_edge_num = XMALLOCNZ(unsigned, get_irg_last_idx(irg)); - pbqp_alloc_env.env = env; - be_put_ignore_regs(birg, cls, pbqp_alloc_env.ignored_regs); /* get ignored registers */ + pbqp_alloc_env.pbqp_inst = alloc_pbqp(get_irg_last_idx(irg)); /* initialize pbqp instance */ + pbqp_alloc_env.cls = cls; + pbqp_alloc_env.irg = irg; + pbqp_alloc_env.lv = lv; + pbqp_alloc_env.allocatable_regs = bitset_malloc(colors_n); + pbqp_alloc_env.rpeo = plist_new(); + pbqp_alloc_env.restr_nodes = XMALLOCNZ(unsigned, get_irg_last_idx(irg)); + pbqp_alloc_env.ife_edge_num = XMALLOCNZ(unsigned, get_irg_last_idx(irg)); + pbqp_alloc_env.env = env; + be_put_allocatable_regs(irg, cls, pbqp_alloc_env.allocatable_regs); /* create costs matrix template for interference edges */ - struct pbqp_matrix *ife_matrix = pbqp_matrix_alloc(pbqp_alloc_env.pbqp_inst, colors_n, colors_n); + pbqp_matrix_t *ife_matrix = pbqp_matrix_alloc(pbqp_alloc_env.pbqp_inst, colors_n, colors_n); /* set costs */ - for(row = 0, col=0; row < colors_n; row++, col++) + for (row = 0, col = 0; row < colors_n; row++, col++) pbqp_matrix_set(ife_matrix, row, col, INF_COSTS); pbqp_alloc_env.ife_matrix_template = ife_matrix; - if(!use_exec_freq) { + if (!use_exec_freq) { /* create costs matrix template for affinity edges */ - struct pbqp_matrix *afe_matrix = pbqp_matrix_alloc(pbqp_alloc_env.pbqp_inst, colors_n, colors_n); + pbqp_matrix_t *afe_matrix = pbqp_matrix_alloc(pbqp_alloc_env.pbqp_inst, colors_n, colors_n); /* set costs */ - for(row = 0; row < colors_n; row++) { - for(col = 0; col < colors_n; col++) { - if(row != col) + for (row = 0; row < colors_n; row++) { + for (col = 0; col < colors_n; col++) { + if (row != col) pbqp_matrix_set(afe_matrix, row, col, 2); } } @@ -549,11 +698,10 @@ void be_pbqp_coloring(be_chordal_env_t *env) { #if TIMER ir_timer_reset_and_start(t_ra_pbqp_alloc_create_aff); #endif - plist_element_t *el; - foreach_plist(pbqp_alloc_env.rpeo, el) { - pbqp_node *node = el->data; - idx = node->index; - ir_node *irn = get_idx_irn(irg, idx); + foreach_plist(pbqp_alloc_env.rpeo, element) { + pbqp_node_t *node = (pbqp_node_t*)element->data; + ir_node *irn = get_idx_irn(irg, node->index); + create_affinity_edges(irn, &pbqp_alloc_env); } #if TIMER @@ -567,36 +715,54 @@ void be_pbqp_coloring(be_chordal_env_t *env) { set_dumpfile(pbqp_alloc_env.pbqp_inst, file_before); #endif + /* print out reverse perfect eleminiation order */ +#if PRINT_RPEO + plist_element_t *elements; + foreach_plist(pbqp_alloc_env.rpeo, elements) { + pbqp_node *node = elements->data; + printf(" %d(%lu);", node->index, get_idx_irn(irg, node->index)->node_nr); + } + printf("\n"); +#endif /* solve pbqp instance */ #if TIMER ir_timer_reset_and_start(t_ra_pbqp_alloc_solve); #endif - solve_pbqp_heuristical_co(pbqp_alloc_env.pbqp_inst,pbqp_alloc_env.rpeo); + if(use_late_decision) { + solve_pbqp_heuristical_co_ld(pbqp_alloc_env.pbqp_inst,pbqp_alloc_env.rpeo); + } + else { + solve_pbqp_heuristical_co(pbqp_alloc_env.pbqp_inst,pbqp_alloc_env.rpeo); + } #if TIMER ir_timer_stop(t_ra_pbqp_alloc_solve); #endif + + num solution = get_solution(pbqp_alloc_env.pbqp_inst); - assert(solution != INF_COSTS && "No PBQP solution found"); + if (solution == INF_COSTS) + panic("No PBQP solution found"); /* assign colors */ - plist_element_t *element; foreach_plist(pbqp_alloc_env.rpeo, element) { - pbqp_node *node = element->data; - idx = node->index; - ir_node *irn = get_idx_irn(irg, idx); - num color = get_node_solution(pbqp_alloc_env.pbqp_inst, idx); - const arch_register_t *reg = arch_register_for_index(cls, color); + pbqp_node_t *node = (pbqp_node_t*)element->data; + ir_node *irn = get_idx_irn(irg, node->index); + num color = get_node_solution(pbqp_alloc_env.pbqp_inst, node->index); + const arch_register_t *reg = arch_register_for_index(cls, color); arch_set_irn_register(irn, reg); } #if TIMER - printf("%-20s: %8.3lf msec\n" , ir_timer_get_description(t_ra_pbqp_alloc_create), (double)ir_timer_elapsed_usec(t_ra_pbqp_alloc_create) / 1000.0); - printf("%-20s: %8.3lf msec\n" , ir_timer_get_description(t_ra_pbqp_alloc_solve), (double)ir_timer_elapsed_usec(t_ra_pbqp_alloc_solve) / 1000.0); - printf("%-20s: %8.3lf msec\n" , ir_timer_get_description(t_ra_pbqp_alloc_create_aff), (double)ir_timer_elapsed_usec(t_ra_pbqp_alloc_create_aff) / 1000.0); + printf("PBQP alloc create: %10.3lf msec\n", + (double)ir_timer_elapsed_usec(t_ra_pbqp_alloc_create) / 1000.0); + printf("PBQP alloc solve: %10.3lf msec\n", + (double)ir_timer_elapsed_usec(t_ra_pbqp_alloc_solve) / 1000.0); + printf("PBQP alloc create aff: %10.3lf msec\n", + (double)ir_timer_elapsed_usec(t_ra_pbqp_alloc_create_aff) / 1000.0); #endif @@ -604,7 +770,7 @@ void be_pbqp_coloring(be_chordal_env_t *env) { #if KAPS_DUMP fclose(file_before); #endif - bitset_free(pbqp_alloc_env.ignored_regs); + bitset_free(pbqp_alloc_env.allocatable_regs); free_pbqp(pbqp_alloc_env.pbqp_inst); plist_free(pbqp_alloc_env.rpeo); xfree(pbqp_alloc_env.restr_nodes); @@ -615,12 +781,14 @@ void be_pbqp_coloring(be_chordal_env_t *env) { /** * Initializes this module. */ -void be_init_pbqp_coloring(void) { - lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be"); - lc_opt_entry_t *ra_grp = lc_opt_get_grp(be_grp, "ra"); - lc_opt_entry_t *chordal_grp = lc_opt_get_grp(ra_grp, "chordal"); +BE_REGISTER_MODULE_CONSTRUCTOR(be_init_pbqp_coloring); +void be_init_pbqp_coloring(void) +{ + lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be"); + lc_opt_entry_t *ra_grp = lc_opt_get_grp(be_grp, "ra"); + lc_opt_entry_t *chordal_grp = lc_opt_get_grp(ra_grp, "chordal"); lc_opt_entry_t *coloring_grp = lc_opt_get_grp(chordal_grp, "coloring"); - lc_opt_entry_t *pbqp_grp = lc_opt_get_grp(coloring_grp, "pbqp"); + lc_opt_entry_t *pbqp_grp = lc_opt_get_grp(coloring_grp, "pbqp"); static be_ra_chordal_coloring_t coloring = { be_pbqp_coloring @@ -630,6 +798,4 @@ void be_init_pbqp_coloring(void) { be_register_chordal_coloring("pbqp", &coloring); } -BE_REGISTER_MODULE_CONSTRUCTOR(be_pbqp_alloc); - #endif