X-Git-Url: http://nsz.repo.hu/git/?a=blobdiff_plain;f=ir%2Fbe%2Fbeprefalloc.c;h=338b7a4380f042b33abd5d4c2fcbd5860242f206;hb=5474a1c188c9d59eea2c915515980cd9cbab58d8;hp=164262de005e7fd0b7ce83baa384c9cf569ad3b2;hpb=60f23bcfd104eaae39b47dd13e839273104d4a15;p=libfirm diff --git a/ir/be/beprefalloc.c b/ir/be/beprefalloc.c index 164262de0..338b7a438 100644 --- a/ir/be/beprefalloc.c +++ b/ir/be/beprefalloc.c @@ -22,7 +22,6 @@ * @brief Preference Guided Register Assignment * @author Matthias Braun * @date 14.2.2009 - * @version $Id$ * * The idea is to allocate registers in 2 passes: * 1. A first pass to determine "preferred" registers for live-ranges. This @@ -42,6 +41,7 @@ #include #include #include +#include "lpp.h" #include "error.h" #include "execfreq.h" @@ -53,6 +53,8 @@ #include "irnode_t.h" #include "irprintf.h" #include "irdump.h" +#include "irtools.h" +#include "util.h" #include "obst.h" #include "raw_bitset.h" #include "unionfind.h" @@ -270,7 +272,7 @@ static void give_penalties_for_limits(const ir_nodeset_t *live_nodes, static void check_defs(const ir_nodeset_t *live_nodes, float weight, ir_node *node) { - const arch_register_req_t *req = arch_get_register_req_out(node); + const arch_register_req_t *req = arch_get_irn_register_req(node); if (req->type & arch_register_req_type_limited) { const unsigned *limited = req->limited; float penalty = weight * DEF_FACTOR; @@ -351,7 +353,7 @@ static void analyze_block(ir_node *block, void *data) info = get_allocation_info(node); for (i = 0; i < arity; ++i) { ir_node *op = get_irn_n(node, i); - const arch_register_req_t *req = arch_get_register_req_out(op); + const arch_register_req_t *req = arch_get_irn_register_req(op); if (req->cls != cls) continue; @@ -373,7 +375,7 @@ static void analyze_block(ir_node *block, void *data) if (!arch_irn_consider_in_reg_alloc(cls, op)) continue; - req = arch_get_register_req(node, i); + req = arch_get_irn_register_req_in(node, i); if (!(req->type & arch_register_req_type_limited)) continue; @@ -389,7 +391,7 @@ static void analyze_block(ir_node *block, void *data) static void congruence_def(ir_nodeset_t *live_nodes, const ir_node *node) { - const arch_register_req_t *req = arch_get_register_req_out(node); + const arch_register_req_t *req = arch_get_irn_register_req(node); /* should be same constraint? */ if (req->type & arch_register_req_type_should_be_same) { @@ -649,7 +651,7 @@ static bool try_optimistic_split(ir_node *to_split, ir_node *before, * (so we don't split away the values produced because of * must_be_different constraints) */ original_insn = skip_Proj(info->original_value); - if (arch_irn_get_flags(original_insn) & arch_irn_flags_dont_spill) + if (arch_get_irn_flags(original_insn) & arch_irn_flags_dont_spill) return false; from_reg = arch_get_irn_register(to_split); @@ -720,7 +722,7 @@ static bool try_optimistic_split(ir_node *to_split, ir_node *before, return false; reg = arch_register_for_index(cls, r); - copy = be_new_Copy(cls, block, to_split); + copy = be_new_Copy(block, to_split); mark_as_copy_of(copy, to_split); /* hacky, but correct here */ if (assignments[arch_register_get_index(from_reg)] == to_split) @@ -758,7 +760,7 @@ static void assign_reg(const ir_node *block, ir_node *node, return; } - req = arch_get_register_req_out(node); + req = arch_get_irn_register_req(node); /* ignore reqs must be preassigned */ assert (! (req->type & arch_register_req_type_ignore)); @@ -881,7 +883,7 @@ static void assign_reg(const ir_node *block, ir_node *node, * registers. */ static void permute_values(ir_nodeset_t *live_nodes, ir_node *before, - unsigned *permutation) + unsigned *permutation) { unsigned *n_used = ALLOCANZ(unsigned, n_regs); ir_node *block; @@ -921,7 +923,7 @@ static void permute_values(ir_nodeset_t *live_nodes, ir_node *before, /* create a copy */ src = assignments[old_r]; - copy = be_new_Copy(cls, block, src); + copy = be_new_Copy(block, src); sched_add_before(before, copy); reg = arch_register_for_index(cls, r); DB((dbg, LEVEL_2, "Copy %+F (from %+F, before %+F) -> %s\n", @@ -1105,6 +1107,151 @@ static void determine_live_through_regs(unsigned *bitset, ir_node *node) } } +static void solve_lpp(ir_nodeset_t *live_nodes, ir_node *node, + unsigned *forbidden_regs, unsigned *live_through_regs) +{ + unsigned *forbidden_edges = rbitset_malloc(n_regs * n_regs); + int *lpp_vars = XMALLOCNZ(int, n_regs*n_regs); + int arity = get_irn_arity(node); + int i; + unsigned l; + unsigned r; + + lpp_t *lpp = lpp_new("prefalloc", lpp_minimize); + //lpp_set_time_limit(lpp, 20); + lpp_set_log(lpp, stdout); + + /** mark some edges as forbidden */ + for (i = 0; i < arity; ++i) { + ir_node *op = get_irn_n(node, i); + const arch_register_t *reg; + const arch_register_req_t *req; + const unsigned *limited; + unsigned current_reg; + + if (!arch_irn_consider_in_reg_alloc(cls, op)) + continue; + + req = arch_get_irn_register_req_in(node, i); + if (!(req->type & arch_register_req_type_limited)) + continue; + + limited = req->limited; + reg = arch_get_irn_register(op); + current_reg = arch_register_get_index(reg); + for (r = 0; r < n_regs; ++r) { + if (rbitset_is_set(limited, r)) + continue; + + rbitset_set(forbidden_edges, current_reg*n_regs + r); + } + } + + /* add all combinations, except for not allowed ones */ + for (l = 0; l < n_regs; ++l) { + if (!rbitset_is_set(normal_regs, l)) { + char name[15]; + snprintf(name, sizeof(name), "%u_to_%u", l, l); + lpp_vars[l*n_regs+l] = lpp_add_var(lpp, name, lpp_binary, 1); + continue; + } + + for (r = 0; r < n_regs; ++r) { + if (!rbitset_is_set(normal_regs, r)) + continue; + if (rbitset_is_set(forbidden_edges, l*n_regs + r)) + continue; + /* livethrough values may not use constrained output registers */ + if (rbitset_is_set(live_through_regs, l) + && rbitset_is_set(forbidden_regs, r)) + continue; + + char name[15]; + snprintf(name, sizeof(name), "%u_to_%u", l, r); + + double costs = l==r ? 9 : 8; + lpp_vars[l*n_regs+r] + = lpp_add_var(lpp, name, lpp_binary, costs); + assert(lpp_vars[l*n_regs+r] > 0); + } + } + /* add constraints */ + for (l = 0; l < n_regs; ++l) { + int constraint; + /* only 1 destination per register */ + constraint = -1; + for (r = 0; r < n_regs; ++r) { + int var = lpp_vars[l*n_regs+r]; + if (var == 0) + continue; + if (constraint < 0) { + char name[64]; + snprintf(name, sizeof(name), "%u_to_dest", l); + constraint = lpp_add_cst(lpp, name, lpp_equal, 1); + } + lpp_set_factor_fast(lpp, constraint, var, 1); + } + /* each destination used by at most 1 value */ + constraint = -1; + for (r = 0; r < n_regs; ++r) { + int var = lpp_vars[r*n_regs+l]; + if (var == 0) + continue; + if (constraint < 0) { + char name[64]; + snprintf(name, sizeof(name), "one_to_%u", l); + constraint = lpp_add_cst(lpp, name, lpp_less_equal, 1); + } + lpp_set_factor_fast(lpp, constraint, var, 1); + } + } + + lpp_dump_plain(lpp, fopen("lppdump.txt", "w")); + + /* solve lpp */ + { + ir_graph *irg = get_irn_irg(node); + be_options_t *options = be_get_irg_options(irg); + unsigned *assignment; + lpp_solve(lpp, options->ilp_server, options->ilp_solver); + if (!lpp_is_sol_valid(lpp)) + panic("ilp solution not valid!"); + + assignment = ALLOCAN(unsigned, n_regs); + for (l = 0; l < n_regs; ++l) { + unsigned dest_reg = (unsigned)-1; + for (r = 0; r < n_regs; ++r) { + int var = lpp_vars[l*n_regs+r]; + if (var == 0) + continue; + double val = lpp_get_var_sol(lpp, var); + if (val == 1) { + assert(dest_reg == (unsigned)-1); + dest_reg = r; + } + } + assert(dest_reg != (unsigned)-1); + assignment[dest_reg] = l; + } + + fprintf(stderr, "Assignment: "); + for (l = 0; l < n_regs; ++l) { + fprintf(stderr, "%u ", assignment[l]); + } + fprintf(stderr, "\n"); + fflush(stdout); + permute_values(live_nodes, node, assignment); + } + lpp_free(lpp); +} + +static bool is_aligned(unsigned num, unsigned alignment) +{ + unsigned mask = alignment-1; + assert(is_po2(alignment)); + return (num&mask) == 0; +} + /** * Enforce constraints at a node by live range splits. * @@ -1124,7 +1271,9 @@ static void enforce_constraints(ir_nodeset_t *live_nodes, ir_node *node, /* construct a list of register occupied by live-through values */ unsigned *live_through_regs = NULL; - /* see if any use constraints are not met */ + /* see if any use constraints are not met and whether double-width + * values are involved */ + bool double_width = false; bool good = true; for (i = 0; i < arity; ++i) { ir_node *op = get_irn_n(node, i); @@ -1137,22 +1286,32 @@ static void enforce_constraints(ir_nodeset_t *live_nodes, ir_node *node, continue; /* are there any limitations for the i'th operand? */ - req = arch_get_register_req(node, i); + req = arch_get_irn_register_req_in(node, i); + if (req->width > 1) + double_width = true; + reg = arch_get_irn_register(op); + reg_index = arch_register_get_index(reg); + if (req->type & arch_register_req_type_aligned) { + if (!is_aligned(reg_index, req->width)) { + good = false; + continue; + } + } if (!(req->type & arch_register_req_type_limited)) continue; limited = req->limited; - reg = arch_get_irn_register(op); - reg_index = arch_register_get_index(reg); if (!rbitset_is_set(limited, reg_index)) { /* found an assignment outside the limited set */ good = false; - break; + continue; } } /* is any of the live-throughs using a constrained output register? */ be_foreach_definition(node, cls, value, + if (req_->width > 1) + double_width = true; if (! (req_->type & arch_register_req_type_limited)) continue; if (live_through_regs == NULL) { @@ -1172,6 +1331,12 @@ static void enforce_constraints(ir_nodeset_t *live_nodes, ir_node *node, rbitset_alloca(live_through_regs, n_regs); } + if (double_width) { + /* only the ILP variant can solve this yet */ + solve_lpp(live_nodes, node, forbidden_regs, live_through_regs); + return; + } + /* at this point we have to construct a bipartite matching problem to see * which values should go to which registers * Note: We're building the matrix in "reverse" - source registers are @@ -1209,7 +1374,7 @@ static void enforce_constraints(ir_nodeset_t *live_nodes, ir_node *node, if (!arch_irn_consider_in_reg_alloc(cls, op)) continue; - req = arch_get_register_req(node, i); + req = arch_get_irn_register_req_in(node, i); if (!(req->type & arch_register_req_type_limited)) continue; @@ -1565,7 +1730,7 @@ static void allocate_coalesce_block(ir_node *block, void *data) int p; node = be_lv_get_irn(lv, block, i); - req = arch_get_register_req_out(node); + req = arch_get_irn_register_req(node); if (req->cls != cls) continue; @@ -1845,8 +2010,8 @@ static void be_pref_alloc_cls(void) { size_t i; - lv = be_assure_liveness(irg); - be_liveness_assure_sets(lv); + be_assure_live_sets(irg); + lv = be_get_irg_liveness(irg); ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK); @@ -1882,7 +2047,7 @@ static void spill(void) be_pre_spill_prepare_constr(irg, cls); be_timer_pop(T_RA_CONSTR); - dump(DUMP_RA, irg, "-spillprepare"); + dump(DUMP_RA, irg, "spillprepare"); /* spill */ be_timer_push(T_RA_SPILL); @@ -1893,7 +2058,7 @@ static void spill(void) check_for_memory_operands(irg); be_timer_pop(T_RA_SPILL_APPLY); - dump(DUMP_RA, irg, "-spill"); + dump(DUMP_RA, irg, "spill"); } /** @@ -1944,8 +2109,7 @@ static void be_pref_alloc(ir_graph *new_irg) /* we most probably constructed new Phis so liveness info is invalid * now */ - /* TODO: test liveness_introduce */ - be_liveness_invalidate(lv); + be_invalidate_live_sets(irg); free(normal_regs); stat_ev_ctx_pop("regcls");