X-Git-Url: http://nsz.repo.hu/git/?a=blobdiff_plain;f=ir%2Fbe%2Fbeprefalloc.c;h=af2ece4eca9bb30a1452aa0ab66b51b858554b4d;hb=2232b14b4acf810ae96a69d1d2a33cf150b695d9;hp=feefba35af5223eacdad921c6a2f44c89c84133d;hpb=f6d087a414f0541ada64323431ae6444f32fddbf;p=libfirm diff --git a/ir/be/beprefalloc.c b/ir/be/beprefalloc.c index feefba35a..af2ece4ec 100644 --- a/ir/be/beprefalloc.c +++ b/ir/be/beprefalloc.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved. + * Copyright (C) 1995-2011 University of Karlsruhe. All right reserved. * * This file is part of libFirm. * @@ -42,6 +42,7 @@ #include #include #include +#include "lpp.h" #include "error.h" #include "execfreq.h" @@ -52,6 +53,7 @@ #include "irgwalk.h" #include "irnode_t.h" #include "irprintf.h" +#include "irdump.h" #include "obst.h" #include "raw_bitset.h" #include "unionfind.h" @@ -71,6 +73,7 @@ #include "bespillutil.h" #include "beverify.h" #include "beutil.h" +#include "bestack.h" #define USE_FACTOR 1.0f #define DEF_FACTOR 1.0f @@ -83,17 +86,15 @@ DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;) static struct obstack obst; -static be_irg_t *birg; static ir_graph *irg; static const arch_register_class_t *cls; -static const arch_register_req_t *default_cls_req; static be_lv_t *lv; static const ir_exec_freq *execfreqs; static unsigned n_regs; static unsigned *normal_regs; static int *congruence_classes; static ir_node **block_order; -static int n_block_order; +static size_t n_block_order; static int create_preferences = true; static int create_congruence_classes = true; static int propagate_phi_registers = true; @@ -114,7 +115,7 @@ static ir_node **assignments; * the information is per firm-node. */ struct allocation_info_t { - unsigned last_uses; /**< bitset indicating last uses (input pos) */ + unsigned last_uses[2]; /**< bitset indicating last uses (input pos) */ ir_node *current_value; /**< copy of the value that should be used */ ir_node *original_value; /**< for copies point to original value */ float prefs[0]; /**< register preferences */ @@ -141,7 +142,7 @@ typedef struct block_info_t block_info_t; */ static allocation_info_t *get_allocation_info(ir_node *node) { - allocation_info_t *info = get_irn_link(node); + allocation_info_t *info = (allocation_info_t*)get_irn_link(node); if (info == NULL) { info = OALLOCFZ(&obst, allocation_info_t, prefs, n_regs); info->current_value = node; @@ -162,7 +163,7 @@ static allocation_info_t *try_get_allocation_info(const ir_node *node) */ static block_info_t *get_block_info(ir_node *block) { - block_info_t *info = get_irn_link(block); + block_info_t *info = (block_info_t*)get_irn_link(block); assert(is_Block(block)); if (info == NULL) { @@ -173,23 +174,6 @@ static block_info_t *get_block_info(ir_node *block) return info; } -/** - * Get default register requirement for the current register class - */ -static const arch_register_req_t *get_default_req_current_cls(void) -{ - if (default_cls_req == NULL) { - struct obstack *obst = get_irg_obstack(irg); - arch_register_req_t *req = OALLOCZ(obst, arch_register_req_t); - - req->type = arch_register_req_type_normal; - req->cls = cls; - - default_cls_req = req; - } - return default_cls_req; -} - /** * Link the allocation info of a node to a copy. * Afterwards, both nodes uses the same allocation info. @@ -235,7 +219,7 @@ static void give_penalties_for_limits(const ir_nodeset_t *live_nodes, { ir_nodeset_iterator_t iter; unsigned r; - unsigned n_allowed; + size_t n_allowed; allocation_info_t *info = get_allocation_info(node); ir_node *neighbor; @@ -252,7 +236,7 @@ static void give_penalties_for_limits(const ir_nodeset_t *live_nodes, return; penalty *= NEIGHBOR_FACTOR; - n_allowed = rbitset_popcnt(limited, n_regs); + n_allowed = rbitset_popcount(limited, n_regs); if (n_allowed > 1) { /* only create a very weak penalty if multiple regs are allowed */ penalty = (penalty * 0.8f) / n_allowed; @@ -287,21 +271,7 @@ static void give_penalties_for_limits(const ir_nodeset_t *live_nodes, static void check_defs(const ir_nodeset_t *live_nodes, float weight, ir_node *node) { - const arch_register_req_t *req; - - if (get_irn_mode(node) == mode_T) { - const ir_edge_t *edge; - foreach_out_edge(node, edge) { - ir_node *proj = get_edge_src_irn(edge); - check_defs(live_nodes, weight, proj); - } - return; - } - - if (!arch_irn_consider_in_reg_alloc(cls, node)) - return; - - req = arch_get_register_req_out(node); + const arch_register_req_t *req = arch_get_irn_register_req(node); if (req->type & arch_register_req_type_limited) { const unsigned *limited = req->limited; float penalty = weight * DEF_FACTOR; @@ -314,7 +284,7 @@ static void check_defs(const ir_nodeset_t *live_nodes, float weight, int arity = get_irn_arity(insn); int i; - float factor = 1.0f / rbitset_popcnt(&req->other_same, arity); + float factor = 1.0f / rbitset_popcount(&req->other_same, arity); for (i = 0; i < arity; ++i) { ir_node *op; unsigned r; @@ -345,7 +315,7 @@ static void check_defs(const ir_nodeset_t *live_nodes, float weight, */ static void analyze_block(ir_node *block, void *data) { - float weight = get_block_execfreq(execfreqs, block); + float weight = (float)get_block_execfreq(execfreqs, block); ir_nodeset_t live_nodes; ir_node *node; (void) data; @@ -361,8 +331,12 @@ static void analyze_block(ir_node *block, void *data) if (is_Phi(node)) break; - if (create_preferences) - check_defs(&live_nodes, weight, node); + if (create_preferences) { + ir_node *value; + be_foreach_definition(node, cls, value, + check_defs(&live_nodes, weight, value); + ); + } /* mark last uses */ arity = get_irn_arity(node); @@ -370,20 +344,21 @@ static void analyze_block(ir_node *block, void *data) /* the allocation info node currently only uses 1 unsigned value to mark last used inputs. So we will fail for a node with more than 32 inputs. */ - if (arity >= (int) sizeof(unsigned) * 8) { + if (arity >= (int) sizeof(info->last_uses) * 8) { panic("Node with more than %d inputs not supported yet", - (int) sizeof(unsigned) * 8); + (int) sizeof(info->last_uses) * 8); } info = get_allocation_info(node); for (i = 0; i < arity; ++i) { - ir_node *op = get_irn_n(node, i); - if (!arch_irn_consider_in_reg_alloc(cls, op)) + ir_node *op = get_irn_n(node, i); + const arch_register_req_t *req = arch_get_irn_register_req(op); + if (req->cls != cls) continue; /* last usage of a value? */ if (!ir_nodeset_contains(&live_nodes, op)) { - rbitset_set(&info->last_uses, i); + rbitset_set(info->last_uses, i); } } @@ -399,7 +374,7 @@ static void analyze_block(ir_node *block, void *data) if (!arch_irn_consider_in_reg_alloc(cls, op)) continue; - req = arch_get_register_req(node, i); + req = arch_get_irn_register_req_in(node, i); if (!(req->type & arch_register_req_type_limited)) continue; @@ -413,26 +388,13 @@ static void analyze_block(ir_node *block, void *data) ir_nodeset_destroy(&live_nodes); } -static void congruence_def(ir_nodeset_t *live_nodes, ir_node *node) +static void congruence_def(ir_nodeset_t *live_nodes, const ir_node *node) { - const arch_register_req_t *req; - - if (get_irn_mode(node) == mode_T) { - const ir_edge_t *edge; - foreach_out_edge(node, edge) { - ir_node *def = get_edge_src_irn(edge); - congruence_def(live_nodes, def); - } - return; - } - - if (!arch_irn_consider_in_reg_alloc(cls, node)) - return; + const arch_register_req_t *req = arch_get_irn_register_req(node); /* should be same constraint? */ - req = arch_get_register_req_out(node); if (req->type & arch_register_req_type_should_be_same) { - ir_node *insn = skip_Proj(node); + const ir_node *insn = skip_Proj_const(node); int arity = get_irn_arity(insn); int i; unsigned node_idx = get_irn_idx(node); @@ -485,10 +447,13 @@ static void create_congruence_class(ir_node *block, void *data) /* check should be same constraints */ sched_foreach_reverse(block, node) { + ir_node *value; if (is_Phi(node)) break; - congruence_def(&live_nodes, node); + be_foreach_definition(node, cls, value, + congruence_def(&live_nodes, value); + ); be_liveness_transfer(cls, node, &live_nodes); } @@ -605,8 +570,6 @@ static void combine_congruence_classes(void) - - /** * Assign register reg to the given node. * @@ -673,12 +636,12 @@ static bool try_optimistic_split(ir_node *to_split, ir_node *before, ir_node *original_insn; ir_node *block; ir_node *copy; - unsigned r; + unsigned r = 0; unsigned from_r; unsigned i; allocation_info_t *info = get_allocation_info(to_split); reg_pref_t *prefs; - float delta; + float delta = 0; float split_threshold; (void) pref; @@ -687,13 +650,13 @@ static bool try_optimistic_split(ir_node *to_split, ir_node *before, * (so we don't split away the values produced because of * must_be_different constraints) */ original_insn = skip_Proj(info->original_value); - if (arch_irn_get_flags(original_insn) & arch_irn_flags_dont_spill) + if (arch_get_irn_flags(original_insn) & arch_irn_flags_dont_spill) return false; from_reg = arch_get_irn_register(to_split); from_r = arch_register_get_index(from_reg); block = get_nodes_block(before); - split_threshold = get_block_execfreq(execfreqs, block) * SPLIT_DELTA; + split_threshold = (float)get_block_execfreq(execfreqs, block) * SPLIT_DELTA; if (pref_delta < split_threshold*0.5) return false; @@ -737,7 +700,7 @@ static bool try_optimistic_split(ir_node *to_split, ir_node *before, apref_delta = i+1 < n_regs ? apref - prefs[i+1].pref : 0; apref_delta += pref_delta - split_threshold; - /* our source register isn't a usefull destination for recursive + /* our source register isn't a useful destination for recursive splits */ old_source_state = rbitset_is_set(forbidden_regs, from_r); rbitset_set(forbidden_regs, from_r); @@ -758,7 +721,7 @@ static bool try_optimistic_split(ir_node *to_split, ir_node *before, return false; reg = arch_register_for_index(cls, r); - copy = be_new_Copy(cls, block, to_split); + copy = be_new_Copy(block, to_split); mark_as_copy_of(copy, to_split); /* hacky, but correct here */ if (assignments[arch_register_get_index(from_reg)] == to_split) @@ -778,33 +741,33 @@ static bool try_optimistic_split(ir_node *to_split, ir_node *before, static void assign_reg(const ir_node *block, ir_node *node, unsigned *forbidden_regs) { - const arch_register_t *reg; + const arch_register_t *final_reg; allocation_info_t *info; const arch_register_req_t *req; reg_pref_t *reg_prefs; ir_node *in_node; - unsigned i; - const unsigned *allowed_regs; unsigned r; + const unsigned *allowed_regs; + unsigned final_reg_index = 0; assert(!is_Phi(node)); - assert(arch_irn_consider_in_reg_alloc(cls, node)); - /* preassigned register? */ - reg = arch_get_irn_register(node); - if (reg != NULL) { - DB((dbg, LEVEL_2, "Preassignment %+F -> %s\n", node, reg->name)); - use_reg(node, reg); + final_reg = arch_get_irn_register(node); + if (final_reg != NULL) { + DB((dbg, LEVEL_2, "Preassignment %+F -> %s\n", node, final_reg->name)); + use_reg(node, final_reg); return; } - /* give should_be_same boni */ - info = get_allocation_info(node); - req = arch_get_register_req_out(node); + req = arch_get_irn_register_req(node); + /* ignore reqs must be preassigned */ + assert (! (req->type & arch_register_req_type_ignore)); + /* give should_be_same boni */ + info = get_allocation_info(node); in_node = skip_Proj(node); if (req->type & arch_register_req_type_should_be_same) { - float weight = get_block_execfreq(execfreqs, block); + float weight = (float)get_block_execfreq(execfreqs, block); int arity = get_irn_arity(in_node); int i; @@ -812,37 +775,36 @@ static void assign_reg(const ir_node *block, ir_node *node, for (i = 0; i < arity; ++i) { ir_node *in; const arch_register_t *reg; - unsigned r; + unsigned reg_index; if (!rbitset_is_set(&req->other_same, i)) continue; in = get_irn_n(in_node, i); reg = arch_get_irn_register(in); assert(reg != NULL); - r = arch_register_get_index(reg); + reg_index = arch_register_get_index(reg); /* if the value didn't die here then we should not propagate the * should_be_same info */ - if (assignments[r] == in) + if (assignments[reg_index] == in) continue; - info->prefs[r] += weight * AFF_SHOULD_BE_SAME; + info->prefs[reg_index] += weight * AFF_SHOULD_BE_SAME; } } /* create list of register candidates and sort by their preference */ DB((dbg, LEVEL_2, "Candidates for %+F:", node)); - reg_prefs = alloca(n_regs * sizeof(reg_prefs[0])); + reg_prefs = ALLOCAN(reg_pref_t, n_regs); fill_sort_candidates(reg_prefs, info); - for (i = 0; i < n_regs; ++i) { - unsigned num = reg_prefs[i].num; + for (r = 0; r < n_regs; ++r) { + unsigned num = reg_prefs[r].num; const arch_register_t *reg; if (!rbitset_is_set(normal_regs, num)) continue; - reg = arch_register_for_index(cls, num); - DB((dbg, LEVEL_2, " %s(%f)", reg->name, reg_prefs[i].pref)); + DB((dbg, LEVEL_2, " %s(%f)", reg->name, reg_prefs[r].pref)); } DB((dbg, LEVEL_2, "\n")); @@ -851,31 +813,38 @@ static void assign_reg(const ir_node *block, ir_node *node, allowed_regs = req->limited; } - for (i = 0; i < n_regs; ++i) { + for (r = 0; r < n_regs; ++r) { float pref, delta; ir_node *before; bool res; - r = reg_prefs[i].num; - if (!rbitset_is_set(allowed_regs, r)) + final_reg_index = reg_prefs[r].num; + if (!rbitset_is_set(allowed_regs, final_reg_index)) continue; - if (assignments[r] == NULL) + /* alignment constraint? */ + if (req->width > 1 && (req->type & arch_register_req_type_aligned) + && (final_reg_index % req->width) != 0) + continue; + + if (assignments[final_reg_index] == NULL) break; - pref = reg_prefs[i].pref; - delta = i+1 < n_regs ? pref - reg_prefs[i+1].pref : 0; + pref = reg_prefs[r].pref; + delta = r+1 < n_regs ? pref - reg_prefs[r+1].pref : 0; before = skip_Proj(node); - res = try_optimistic_split(assignments[r], before, + res = try_optimistic_split(assignments[final_reg_index], before, pref, delta, forbidden_regs, 0); if (res) break; } - if (i >= n_regs) { + if (r >= n_regs) { + /* the common reason to hit this panic is when 1 of your nodes is not + * register pressure faithful */ panic("No register left for %+F\n", node); } - reg = arch_register_for_index(cls, r); - DB((dbg, LEVEL_2, "Assign %+F -> %s\n", node, reg->name)); - use_reg(node, reg); + final_reg = arch_register_for_index(cls, final_reg_index); + DB((dbg, LEVEL_2, "Assign %+F -> %s\n", node, final_reg->name)); + use_reg(node, final_reg); } /** @@ -895,7 +864,7 @@ static void assign_reg(const ir_node *block, ir_node *node, * First we count how many destinations a single value has. At the same time * we can be sure that each destination register has at most 1 source register * (it can have 0 which means we don't care what value is in it). - * We ignore all fullfilled permuations (like 7->7) + * We ignore all fulfilled permuations (like 7->7) * In a first pass we create as much copy instructions as possible as they * are generally cheaper than exchanges. We do this by counting into how many * destinations a register has to be copied (in the example it's 2 for register @@ -903,8 +872,8 @@ static void assign_reg(const ir_node *block, ir_node *node, * We can then create a copy into every destination register when the usecount * of that register is 0 (= noone else needs the value in the register). * - * After this step we should have cycles left. We implement a cyclic permutation - * of n registers with n-1 transpositions. + * After this step we should only have cycles left. We implement a cyclic + * permutation of n registers with n-1 transpositions. * * @param live_nodes the set of live nodes, updated due to live range split * @param before the node before we add the permutation @@ -913,7 +882,7 @@ static void assign_reg(const ir_node *block, ir_node *node, * registers. */ static void permute_values(ir_nodeset_t *live_nodes, ir_node *before, - unsigned *permutation) + unsigned *permutation) { unsigned *n_used = ALLOCANZ(unsigned, n_regs); ir_node *block; @@ -953,7 +922,7 @@ static void permute_values(ir_nodeset_t *live_nodes, ir_node *before, /* create a copy */ src = assignments[old_r]; - copy = be_new_Copy(cls, block, src); + copy = be_new_Copy(block, src); sched_add_before(before, copy); reg = arch_register_for_index(cls, r); DB((dbg, LEVEL_2, "Copy %+F (from %+F, before %+F) -> %s\n", @@ -1021,12 +990,12 @@ static void permute_values(ir_nodeset_t *live_nodes, ir_node *before, DB((dbg, LEVEL_2, "Perm %+F (perm %+F,%+F, before %+F)\n", perm, in[0], in[1], before)); - proj0 = new_r_Proj(block, perm, get_irn_mode(in[0]), 0); + proj0 = new_r_Proj(perm, get_irn_mode(in[0]), 0); mark_as_copy_of(proj0, in[0]); reg = arch_register_for_index(cls, old_r); use_reg(proj0, reg); - proj1 = new_r_Proj(block, perm, get_irn_mode(in[1]), 1); + proj1 = new_r_Proj(perm, get_irn_mode(in[1]), 1); mark_as_copy_of(proj1, in[1]); reg = arch_register_for_index(cls, r2); use_reg(proj1, reg); @@ -1061,10 +1030,10 @@ static void permute_values(ir_nodeset_t *live_nodes, ir_node *before, */ static void free_last_uses(ir_nodeset_t *live_nodes, ir_node *node) { - allocation_info_t *info = get_allocation_info(node); - const unsigned *last_uses = &info->last_uses; - int arity = get_irn_arity(node); - int i; + allocation_info_t *info = get_allocation_info(node); + const unsigned *last_uses = info->last_uses; + int arity = get_irn_arity(node); + int i; for (i = 0; i < arity; ++i) { ir_node *op; @@ -1128,7 +1097,7 @@ static void determine_live_through_regs(unsigned *bitset, ir_node *node) ir_node *op; const arch_register_t *reg; - if (!rbitset_is_set(&info->last_uses, i)) + if (!rbitset_is_set(info->last_uses, i)) continue; op = get_irn_n(node, i); @@ -1137,6 +1106,144 @@ static void determine_live_through_regs(unsigned *bitset, ir_node *node) } } +static void solve_lpp(ir_nodeset_t *live_nodes, ir_node *node, + unsigned *forbidden_regs, unsigned *live_through_regs) +{ + unsigned *forbidden_edges = rbitset_malloc(n_regs * n_regs); + int *lpp_vars = XMALLOCNZ(int, n_regs*n_regs); + int arity = get_irn_arity(node); + int i; + unsigned l; + unsigned r; + + lpp_t *lpp = lpp_new("prefalloc", lpp_minimize); + //lpp_set_time_limit(lpp, 20); + lpp_set_log(lpp, stdout); + + /** mark some edges as forbidden */ + for (i = 0; i < arity; ++i) { + ir_node *op = get_irn_n(node, i); + const arch_register_t *reg; + const arch_register_req_t *req; + const unsigned *limited; + unsigned current_reg; + + if (!arch_irn_consider_in_reg_alloc(cls, op)) + continue; + + req = arch_get_irn_register_req_in(node, i); + if (!(req->type & arch_register_req_type_limited)) + continue; + + limited = req->limited; + reg = arch_get_irn_register(op); + current_reg = arch_register_get_index(reg); + for (r = 0; r < n_regs; ++r) { + if (rbitset_is_set(limited, r)) + continue; + + rbitset_set(forbidden_edges, current_reg*n_regs + r); + } + } + + /* add all combinations, except for not allowed ones */ + for (l = 0; l < n_regs; ++l) { + if (!rbitset_is_set(normal_regs, l)) { + char name[15]; + snprintf(name, sizeof(name), "%u_to_%u", l, l); + lpp_vars[l*n_regs+l] = lpp_add_var(lpp, name, lpp_binary, 1); + continue; + } + + for (r = 0; r < n_regs; ++r) { + if (!rbitset_is_set(normal_regs, r)) + continue; + if (rbitset_is_set(forbidden_edges, l*n_regs + r)) + continue; + /* livethrough values may not use constrained output registers */ + if (rbitset_is_set(live_through_regs, l) + && rbitset_is_set(forbidden_regs, r)) + continue; + + char name[15]; + snprintf(name, sizeof(name), "%u_to_%u", l, r); + + double costs = l==r ? 9 : 8; + lpp_vars[l*n_regs+r] + = lpp_add_var(lpp, name, lpp_binary, costs); + assert(lpp_vars[l*n_regs+r] > 0); + } + } + /* add constraints */ + for (l = 0; l < n_regs; ++l) { + int constraint; + /* only 1 destination per register */ + constraint = -1; + for (r = 0; r < n_regs; ++r) { + int var = lpp_vars[l*n_regs+r]; + if (var == 0) + continue; + if (constraint < 0) { + char name[64]; + snprintf(name, sizeof(name), "%u_to_dest", l); + constraint = lpp_add_cst(lpp, name, lpp_equal, 1); + } + lpp_set_factor_fast(lpp, constraint, var, 1); + } + /* each destination used by at most 1 value */ + constraint = -1; + for (r = 0; r < n_regs; ++r) { + int var = lpp_vars[r*n_regs+l]; + if (var == 0) + continue; + if (constraint < 0) { + char name[64]; + snprintf(name, sizeof(name), "one_to_%u", l); + constraint = lpp_add_cst(lpp, name, lpp_less_equal, 1); + } + lpp_set_factor_fast(lpp, constraint, var, 1); + } + } + + lpp_dump_plain(lpp, fopen("lppdump.txt", "w")); + + /* solve lpp */ + { + ir_graph *irg = get_irn_irg(node); + be_options_t *options = be_get_irg_options(irg); + unsigned *assignment; + lpp_solve(lpp, options->ilp_server, options->ilp_solver); + if (!lpp_is_sol_valid(lpp)) + panic("ilp solution not valid!"); + + assignment = ALLOCAN(unsigned, n_regs); + for (l = 0; l < n_regs; ++l) { + unsigned dest_reg = (unsigned)-1; + for (r = 0; r < n_regs; ++r) { + int var = lpp_vars[l*n_regs+r]; + if (var == 0) + continue; + double val = lpp_get_var_sol(lpp, var); + if (val == 1) { + assert(dest_reg == (unsigned)-1); + dest_reg = r; + } + } + assert(dest_reg != (unsigned)-1); + assignment[dest_reg] = l; + } + + fprintf(stderr, "Assignment: "); + for (l = 0; l < n_regs; ++l) { + fprintf(stderr, "%u ", assignment[l]); + } + fprintf(stderr, "\n"); + fflush(stdout); + permute_values(live_nodes, node, assignment); + } + lpp_free(lpp); +} + /** * Enforce constraints at a node by live range splits. * @@ -1151,31 +1258,35 @@ static void enforce_constraints(ir_nodeset_t *live_nodes, ir_node *node, hungarian_problem_t *bp; unsigned l, r; unsigned *assignment; + ir_node *value; /* construct a list of register occupied by live-through values */ unsigned *live_through_regs = NULL; /* see if any use constraints are not met */ + bool double_width = false; bool good = true; for (i = 0; i < arity; ++i) { ir_node *op = get_irn_n(node, i); const arch_register_t *reg; const arch_register_req_t *req; const unsigned *limited; - unsigned r; + unsigned reg_index; if (!arch_irn_consider_in_reg_alloc(cls, op)) continue; /* are there any limitations for the i'th operand? */ - req = arch_get_register_req(node, i); + req = arch_get_irn_register_req_in(node, i); + if (req->width > 1) + double_width = true; if (!(req->type & arch_register_req_type_limited)) continue; - limited = req->limited; - reg = arch_get_irn_register(op); - r = arch_register_get_index(reg); - if (!rbitset_is_set(limited, r)) { + limited = req->limited; + reg = arch_get_irn_register(op); + reg_index = arch_register_get_index(reg); + if (!rbitset_is_set(limited, reg_index)) { /* found an assignment outside the limited set */ good = false; break; @@ -1183,43 +1294,19 @@ static void enforce_constraints(ir_nodeset_t *live_nodes, ir_node *node, } /* is any of the live-throughs using a constrained output register? */ - if (get_irn_mode(node) == mode_T) { - const ir_edge_t *edge; - - foreach_out_edge(node, edge) { - ir_node *proj = get_edge_src_irn(edge); - const arch_register_req_t *req; - - if (!arch_irn_consider_in_reg_alloc(cls, proj)) - continue; - - req = arch_get_register_req_out(proj); - if (!(req->type & arch_register_req_type_limited)) - continue; - - if (live_through_regs == NULL) { - rbitset_alloca(live_through_regs, n_regs); - determine_live_through_regs(live_through_regs, node); - } - - rbitset_or(forbidden_regs, req->limited, n_regs); - if (rbitsets_have_common(req->limited, live_through_regs, n_regs)) { - good = false; - } - } - } else { - if (arch_irn_consider_in_reg_alloc(cls, node)) { - const arch_register_req_t *req = arch_get_register_req_out(node); - if (req->type & arch_register_req_type_limited) { - rbitset_alloca(live_through_regs, n_regs); - determine_live_through_regs(live_through_regs, node); - if (rbitsets_have_common(req->limited, live_through_regs, n_regs)) { - good = false; - rbitset_or(forbidden_regs, req->limited, n_regs); - } - } + be_foreach_definition(node, cls, value, + if (req_->width > 1) + double_width = true; + if (! (req_->type & arch_register_req_type_limited)) + continue; + if (live_through_regs == NULL) { + rbitset_alloca(live_through_regs, n_regs); + determine_live_through_regs(live_through_regs, node); } - } + rbitset_or(forbidden_regs, req_->limited, n_regs); + if (rbitsets_have_common(req_->limited, live_through_regs, n_regs)) + good = false; + ); if (good) return; @@ -1229,6 +1316,12 @@ static void enforce_constraints(ir_nodeset_t *live_nodes, ir_node *node, rbitset_alloca(live_through_regs, n_regs); } + if (double_width) { + /* only the ILP variant can solve this yet */ + solve_lpp(live_nodes, node, forbidden_regs, live_through_regs); + return; + } + /* at this point we have to construct a bipartite matching problem to see * which values should go to which registers * Note: We're building the matrix in "reverse" - source registers are @@ -1266,7 +1359,7 @@ static void enforce_constraints(ir_nodeset_t *live_nodes, ir_node *node, if (!arch_irn_consider_in_reg_alloc(cls, op)) continue; - req = arch_get_register_req(node, i); + req = arch_get_irn_register_req_in(node, i); if (!(req->type & arch_register_req_type_limited)) continue; @@ -1276,7 +1369,7 @@ static void enforce_constraints(ir_nodeset_t *live_nodes, ir_node *node, for (r = 0; r < n_regs; ++r) { if (rbitset_is_set(limited, r)) continue; - hungarian_remv(bp, r, current_reg); + hungarian_remove(bp, r, current_reg); } } @@ -1284,7 +1377,7 @@ static void enforce_constraints(ir_nodeset_t *live_nodes, ir_node *node, hungarian_prepare_cost_matrix(bp, HUNGARIAN_MODE_MAXIMIZE_UTIL); assignment = ALLOCAN(unsigned, n_regs); - res = hungarian_solve(bp, (int*) assignment, NULL, 0); + res = hungarian_solve(bp, assignment, NULL, 0); assert(res == 0); #if 0 @@ -1300,7 +1393,7 @@ static void enforce_constraints(ir_nodeset_t *live_nodes, ir_node *node, permute_values(live_nodes, node, assignment); } -/** test wether a node @p n is a copy of the value of node @p of */ +/** test whether a node @p n is a copy of the value of node @p of */ static bool is_copy_of(ir_node *value, ir_node *test_value) { allocation_info_t *test_info; @@ -1322,9 +1415,9 @@ static bool is_copy_of(ir_node *value, ir_node *test_value) static int find_value_in_block_info(block_info_t *info, ir_node *value) { unsigned r; - ir_node **assignments = info->assignments; + ir_node **end_assignments = info->assignments; for (r = 0; r < n_regs; ++r) { - ir_node *a_value = assignments[r]; + ir_node *a_value = end_assignments[r]; if (a_value == NULL) continue; @@ -1345,7 +1438,7 @@ static void add_phi_permutations(ir_node *block, int p) unsigned *permutation; ir_node **old_assignments; bool need_permutation; - ir_node *node; + ir_node *phi; ir_node *pred = get_Block_cfgpred_block(block, p); block_info_t *pred_info = get_block_info(pred); @@ -1361,29 +1454,36 @@ static void add_phi_permutations(ir_node *block, int p) /* check phi nodes */ need_permutation = false; - node = sched_first(block); - for ( ; is_Phi(node); node = sched_next(node)) { + phi = sched_first(block); + for ( ; is_Phi(phi); phi = sched_next(phi)) { const arch_register_t *reg; + const arch_register_t *op_reg; int regn; int a; ir_node *op; - if (!arch_irn_consider_in_reg_alloc(cls, node)) - continue; - - op = get_Phi_pred(node, p); - if (!arch_irn_consider_in_reg_alloc(cls, op)) + if (!arch_irn_consider_in_reg_alloc(cls, phi)) continue; - a = find_value_in_block_info(pred_info, op); + op = get_Phi_pred(phi, p); + a = find_value_in_block_info(pred_info, op); assert(a >= 0); - reg = arch_get_irn_register(node); + reg = arch_get_irn_register(phi); regn = arch_register_get_index(reg); - if (regn != a) { - permutation[regn] = a; - need_permutation = true; - } + /* same register? nothing to do */ + if (regn == a) + continue; + + op = pred_info->assignments[a]; + op_reg = arch_get_irn_register(op); + /* virtual or joker registers are ok too */ + if ((op_reg->type & arch_register_type_joker) + || (op_reg->type & arch_register_type_virtual)) + continue; + + permutation[regn] = a; + need_permutation = true; } if (need_permutation) { @@ -1391,30 +1491,27 @@ static void add_phi_permutations(ir_node *block, int p) old_assignments = assignments; assignments = pred_info->assignments; permute_values(NULL, be_get_end_of_block_insertion_point(pred), - permutation); + permutation); assignments = old_assignments; } /* change phi nodes to use the copied values */ - node = sched_first(block); - for ( ; is_Phi(node); node = sched_next(node)) { + phi = sched_first(block); + for ( ; is_Phi(phi); phi = sched_next(phi)) { int a; ir_node *op; - if (!arch_irn_consider_in_reg_alloc(cls, node)) + if (!arch_irn_consider_in_reg_alloc(cls, phi)) continue; - op = get_Phi_pred(node, p); - /* no need to do anything for Unknown inputs */ - if (!arch_irn_consider_in_reg_alloc(cls, op)) - continue; + op = get_Phi_pred(phi, p); /* we have permuted all values into the correct registers so we can simply query which value occupies the phis register in the predecessor */ - a = arch_register_get_index(arch_get_irn_register(node)); + a = arch_register_get_index(arch_get_irn_register(phi)); op = pred_info->assignments[a]; - set_Phi_pred(node, p, op); + set_Phi_pred(phi, p, op); } } @@ -1448,7 +1545,7 @@ static void adapt_phi_prefs(ir_node *phi) continue; /* give bonus for already assigned register */ - weight = get_block_execfreq(execfreqs, pred_block); + weight = (float)get_block_execfreq(execfreqs, pred_block); r = arch_register_get_index(reg); info->prefs[r] += weight * AFF_PHI; } @@ -1470,7 +1567,7 @@ static void propagate_phi_register(ir_node *phi, unsigned assigned_r) ir_node *pred_block = get_Block_cfgpred_block(block, i); unsigned r; float weight - = get_block_execfreq(execfreqs, pred_block) * AFF_PHI; + = (float)get_block_execfreq(execfreqs, pred_block) * AFF_PHI; if (info->prefs[assigned_r] >= weight) continue; @@ -1493,7 +1590,7 @@ static void assign_phi_registers(ir_node *block) int n_phis = 0; int n; int res; - int *assignment; + unsigned *assignment; ir_node *node; hungarian_problem_t *bp; @@ -1536,7 +1633,7 @@ static void assign_phi_registers(ir_node *block) costs = costs < 0 ? -logf(-costs+1) : logf(costs+1); costs *= 100; costs += 10000; - hungarian_add(bp, n, r, costs); + hungarian_add(bp, n, r, (int)costs); DB((dbg, LEVEL_3, " %s(%f)", arch_register_for_index(cls, r)->name, info->prefs[r])); } @@ -1547,7 +1644,7 @@ static void assign_phi_registers(ir_node *block) //hungarian_print_cost_matrix(bp, 7); hungarian_prepare_cost_matrix(bp, HUNGARIAN_MODE_MAXIMIZE_UTIL); - assignment = ALLOCAN(int, n_regs); + assignment = ALLOCAN(unsigned, n_regs); res = hungarian_solve(bp, assignment, NULL, 0); assert(res == 0); @@ -1562,7 +1659,7 @@ static void assign_phi_registers(ir_node *block) if (!arch_irn_consider_in_reg_alloc(cls, node)) continue; - r = assignment[n++]; + r = assignment[n++]; assert(rbitset_is_set(normal_regs, r)); reg = arch_register_for_index(cls, r); DB((dbg, LEVEL_2, "Assign %+F -> %s\n", node, reg->name)); @@ -1580,14 +1677,14 @@ static void assign_phi_registers(ir_node *block) */ static void allocate_coalesce_block(ir_node *block, void *data) { - int i; - ir_nodeset_t live_nodes; - ir_node *node; - int n_preds; - block_info_t *block_info; - block_info_t **pred_block_infos; - ir_node **phi_ins; - unsigned *forbidden_regs; /**< collects registers which must + int i; + ir_nodeset_t live_nodes; + ir_node *node; + int n_preds; + block_info_t *block_info; + block_info_t **pred_block_infos; + ir_node **phi_ins; + unsigned *forbidden_regs; /**< collects registers which must not be used for optimistic splits */ (void) data; @@ -1600,8 +1697,8 @@ static void allocate_coalesce_block(ir_node *block, void *data) ir_nodeset_init(&live_nodes); /* gather regalloc infos of predecessor blocks */ - n_preds = get_Block_n_cfgpreds(block); - pred_block_infos = ALLOCAN(block_info_t*, n_preds); + n_preds = get_Block_n_cfgpreds(block); + pred_block_infos = ALLOCAN(block_info_t*, n_preds); for (i = 0; i < n_preds; ++i) { ir_node *pred = get_Block_cfgpred_block(block, i); block_info_t *pred_info = get_block_info(pred); @@ -1612,13 +1709,25 @@ static void allocate_coalesce_block(ir_node *block, void *data) /* collect live-in nodes and preassigned values */ be_lv_foreach(lv, block, be_lv_state_in, i) { - const arch_register_t *reg; - int p; - bool need_phi = false; + bool need_phi = false; + const arch_register_req_t *req; + const arch_register_t *reg; + int p; node = be_lv_get_irn(lv, block, i); - if (!arch_irn_consider_in_reg_alloc(cls, node)) + req = arch_get_irn_register_req(node); + if (req->cls != cls) + continue; + + if (req->type & arch_register_req_type_ignore) { + allocation_info_t *info = get_allocation_info(node); + info->current_value = node; + + reg = arch_get_irn_register(node); + assert(reg != NULL); /* ignore values must be preassigned */ + use_reg(node, reg); continue; + } /* check all predecessors for this value, if it is not everywhere the same or unknown then we have to construct a phi @@ -1644,19 +1753,15 @@ static void allocate_coalesce_block(ir_node *block, void *data) } if (need_phi) { - ir_mode *mode = get_irn_mode(node); - const arch_register_req_t *req = get_default_req_current_cls(); - ir_node *phi; - - phi = new_r_Phi(block, n_preds, phi_ins, mode); - be_set_phi_reg_req(phi, req); + ir_mode *mode = get_irn_mode(node); + ir_node *phi = be_new_Phi(block, n_preds, phi_ins, mode, cls); DB((dbg, LEVEL_3, "Create Phi %+F (for %+F) -", phi, node)); #ifdef DEBUG_libfirm { - int i; - for (i = 0; i < n_preds; ++i) { - DB((dbg, LEVEL_3, " %+F", phi_ins[i])); + int pi; + for (pi = 0; pi < n_preds; ++pi) { + DB((dbg, LEVEL_3, " %+F", phi_ins[pi])); } DB((dbg, LEVEL_3, "\n")); } @@ -1703,8 +1808,8 @@ static void allocate_coalesce_block(ir_node *block, void *data) /* assign instructions in the block */ sched_foreach(block, node) { - int i; int arity; + ir_node *value; /* phis are already assigned */ if (is_Phi(node)) @@ -1734,18 +1839,9 @@ static void allocate_coalesce_block(ir_node *block, void *data) free_last_uses(&live_nodes, node); /* assign output registers */ - /* TODO: 2 phases: first: pre-assigned ones, 2nd real regs */ - if (get_irn_mode(node) == mode_T) { - const ir_edge_t *edge; - foreach_out_edge(node, edge) { - ir_node *proj = get_edge_src_irn(edge); - if (!arch_irn_consider_in_reg_alloc(cls, proj)) - continue; - assign_reg(block, proj, forbidden_regs); - } - } else if (arch_irn_consider_in_reg_alloc(cls, node)) { - assign_reg(block, node, forbidden_regs); - } + be_foreach_definition_(node, cls, value, + assign_reg(block, value, forbidden_regs); + ); } ir_nodeset_destroy(&live_nodes); @@ -1784,43 +1880,43 @@ struct block_costs_t { static int cmp_block_costs(const void *d1, const void *d2) { - const ir_node * const *block1 = d1; - const ir_node * const *block2 = d2; - const block_costs_t *info1 = get_irn_link(*block1); - const block_costs_t *info2 = get_irn_link(*block2); + const ir_node * const *block1 = (const ir_node**)d1; + const ir_node * const *block2 = (const ir_node**)d2; + const block_costs_t *info1 = (const block_costs_t*)get_irn_link(*block1); + const block_costs_t *info2 = (const block_costs_t*)get_irn_link(*block2); return QSORT_CMP(info2->costs, info1->costs); } static void determine_block_order(void) { - int i; + size_t p; ir_node **blocklist = be_get_cfgpostorder(irg); - int n_blocks = ARR_LEN(blocklist); + size_t n_blocks = ARR_LEN(blocklist); int dfs_num = 0; pdeq *worklist = new_pdeq(); ir_node **order = XMALLOCN(ir_node*, n_blocks); - int order_p = 0; + size_t order_p = 0; /* clear block links... */ - for (i = 0; i < n_blocks; ++i) { - ir_node *block = blocklist[i]; + for (p = 0; p < n_blocks; ++p) { + ir_node *block = blocklist[p]; set_irn_link(block, NULL); } /* walk blocks in reverse postorder, the costs for each block are the * sum of the costs of its predecessors (excluding the costs on backedges * which we can't determine) */ - for (i = n_blocks-1; i >= 0; --i) { + for (p = n_blocks; p > 0;) { block_costs_t *cost_info; - ir_node *block = blocklist[i]; + ir_node *block = blocklist[--p]; - float execfreq = get_block_execfreq(execfreqs, block); + float execfreq = (float)get_block_execfreq(execfreqs, block); float costs = execfreq; int n_cfgpreds = get_Block_n_cfgpreds(block); - int p; - for (p = 0; p < n_cfgpreds; ++p) { - ir_node *pred_block = get_Block_cfgpred_block(block, p); - block_costs_t *pred_costs = get_irn_link(pred_block); + int p2; + for (p2 = 0; p2 < n_cfgpreds; ++p2) { + ir_node *pred_block = get_Block_cfgpred_block(block, p2); + block_costs_t *pred_costs = (block_costs_t*)get_irn_link(pred_block); /* we don't have any info for backedges */ if (pred_costs == NULL) continue; @@ -1839,15 +1935,15 @@ static void determine_block_order(void) ir_reserve_resources(irg, IR_RESOURCE_BLOCK_VISITED); inc_irg_block_visited(irg); - for (i = 0; i < n_blocks; ++i) { - ir_node *block = blocklist[i]; + for (p = 0; p < n_blocks; ++p) { + ir_node *block = blocklist[p]; if (Block_block_visited(block)) continue; /* continually add predecessors with highest costs to worklist * (without using backedges) */ do { - block_costs_t *info = get_irn_link(block); + block_costs_t *info = (block_costs_t*)get_irn_link(block); ir_node *best_pred = NULL; float best_costs = -1; int n_cfgpred = get_Block_n_cfgpreds(block); @@ -1857,7 +1953,7 @@ static void determine_block_order(void) mark_Block_block_visited(block); for (i = 0; i < n_cfgpred; ++i) { ir_node *pred_block = get_Block_cfgpred_block(block, i); - block_costs_t *pred_info = get_irn_link(pred_block); + block_costs_t *pred_info = (block_costs_t*)get_irn_link(pred_block); /* ignore backedges */ if (pred_info->dfs_num > info->dfs_num) @@ -1869,11 +1965,11 @@ static void determine_block_order(void) } } block = best_pred; - } while(block != NULL && !Block_block_visited(block)); + } while (block != NULL && !Block_block_visited(block)); /* now put all nodes in the worklist in our final order */ while (!pdeq_empty(worklist)) { - ir_node *pblock = pdeq_getr(worklist); + ir_node *pblock = (ir_node*)pdeq_getr(worklist); assert(order_p < n_blocks); order[order_p++] = pblock; } @@ -1897,9 +1993,9 @@ static void determine_block_order(void) */ static void be_pref_alloc_cls(void) { - int i; + size_t i; - lv = be_assure_liveness(birg); + lv = be_assure_liveness(irg); be_liveness_assure_sets(lv); ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK); @@ -1920,11 +2016,10 @@ static void be_pref_alloc_cls(void) ir_free_resources(irg, IR_RESOURCE_IRN_LINK); } -static void dump(int mask, ir_graph *irg, const char *suffix, - void (*dumper)(ir_graph *, const char *)) +static void dump(int mask, ir_graph *irg, const char *suffix) { - if(birg->main_env->options->dump_flags & mask) - be_dump(irg, suffix, dumper); + if (be_get_irg_options(irg)->dump_flags & mask) + dump_ir_graph(irg, suffix); } /** @@ -1934,44 +2029,42 @@ static void spill(void) { /* make sure all nodes show their real register pressure */ be_timer_push(T_RA_CONSTR); - be_pre_spill_prepare_constr(birg, cls); + be_pre_spill_prepare_constr(irg, cls); be_timer_pop(T_RA_CONSTR); - dump(DUMP_RA, irg, "-spillprepare", dump_ir_block_graph_sched); + dump(DUMP_RA, irg, "spillprepare"); /* spill */ be_timer_push(T_RA_SPILL); - be_do_spill(birg, cls); + be_do_spill(irg, cls); be_timer_pop(T_RA_SPILL); be_timer_push(T_RA_SPILL_APPLY); check_for_memory_operands(irg); be_timer_pop(T_RA_SPILL_APPLY); - dump(DUMP_RA, irg, "-spill", dump_ir_block_graph_sched); + dump(DUMP_RA, irg, "spill"); } /** * The pref register allocator for a whole procedure. */ -static void be_pref_alloc(be_irg_t *new_birg) +static void be_pref_alloc(ir_graph *new_irg) { - const arch_env_t *arch_env = new_birg->main_env->arch_env; - int n_cls = arch_env_get_n_reg_class(arch_env); + const arch_env_t *arch_env = be_get_irg_arch_env(new_irg); + int n_cls = arch_env->n_register_classes; int c; obstack_init(&obst); - birg = new_birg; - irg = be_get_birg_irg(birg); - execfreqs = birg->exec_freq; + irg = new_irg; + execfreqs = be_get_irg_exec_freq(irg); /* determine a good coloring order */ determine_block_order(); for (c = 0; c < n_cls; ++c) { - cls = arch_env_get_reg_class(arch_env, c); - default_cls_req = NULL; + cls = &arch_env->register_classes[c]; if (arch_register_class_flags(cls) & arch_register_class_flag_manual_ra) continue; @@ -1979,18 +2072,18 @@ static void be_pref_alloc(be_irg_t *new_birg) n_regs = arch_register_class_n_regs(cls); normal_regs = rbitset_malloc(n_regs); - be_abi_set_non_ignore_regs(birg->abi, cls, normal_regs); + be_set_allocatable_regs(irg, cls, normal_regs); spill(); /* verify schedule and register pressure */ be_timer_push(T_VERIFY); - if (birg->main_env->options->vrfy_option == BE_VRFY_WARN) { - be_verify_schedule(birg); - be_verify_register_pressure(birg, cls, irg); - } else if (birg->main_env->options->vrfy_option == BE_VRFY_ASSERT) { - assert(be_verify_schedule(birg) && "Schedule verification failed"); - assert(be_verify_register_pressure(birg, cls, irg) + if (be_get_irg_options(irg)->verify_option == BE_VERIFY_WARN) { + be_verify_schedule(irg); + be_verify_register_pressure(irg, cls); + } else if (be_get_irg_options(irg)->verify_option == BE_VERIFY_ASSERT) { + assert(be_verify_schedule(irg) && "Schedule verification failed"); + assert(be_verify_register_pressure(irg, cls) && "Register pressure verification failed"); } be_timer_pop(T_VERIFY); @@ -2009,24 +2102,22 @@ static void be_pref_alloc(be_irg_t *new_birg) } be_timer_push(T_RA_SPILL_APPLY); - be_abi_fix_stack_nodes(birg->abi); + be_abi_fix_stack_nodes(irg); be_timer_pop(T_RA_SPILL_APPLY); be_timer_push(T_VERIFY); - if (birg->main_env->options->vrfy_option == BE_VRFY_WARN) { - be_verify_register_allocation(birg); - } else if (birg->main_env->options->vrfy_option == BE_VRFY_ASSERT) { - assert(be_verify_register_allocation(birg) - && "Register allocation invalid"); + if (be_get_irg_options(irg)->verify_option == BE_VERIFY_WARN) { + be_verify_register_allocation(irg); + } else if (be_get_irg_options(irg)->verify_option == BE_VERIFY_ASSERT) { + assert(be_verify_register_allocation(irg) + && "Register allocation invalid"); } be_timer_pop(T_VERIFY); obstack_free(&obst, NULL); } -/** - * Initializes this module. - */ +BE_REGISTER_MODULE_CONSTRUCTOR(be_init_pref_alloc) void be_init_pref_alloc(void) { static be_ra_t be_ra_pref = { @@ -2039,5 +2130,3 @@ void be_init_pref_alloc(void) be_register_allocator("pref", &be_ra_pref); FIRM_DBG_REGISTER(dbg, "firm.be.prefalloc"); } - -BE_REGISTER_MODULE_CONSTRUCTOR(be_init_pref_alloc);