X-Git-Url: http://nsz.repo.hu/git/?a=blobdiff_plain;f=ir%2Fbe%2Fbenewalloc.c;h=2c709a75b49a09ca1ad7c6486b781f6bcbd14a7e;hb=c6af4586a08198d22de066573b3d0375c4e76a8a;hp=d90b47fee745e89e7f9b7aeffd93c8d0255084a9;hpb=c7404adf986f7a1c4956adc1fb497070013dd0aa;p=libfirm diff --git a/ir/be/benewalloc.c b/ir/be/benewalloc.c index d90b47fee..2c709a75b 100644 --- a/ir/be/benewalloc.c +++ b/ir/be/benewalloc.c @@ -39,11 +39,7 @@ * add copies and split live-ranges. * * TODO: - * - make use of free registers in the permutate_values code - * - We have to pessimistically construct Phi_0s when not all predecessors - * of a block are known. - * - Phi color assignment should give bonus points towards registers already - * assigned at predecessors. + * - make use of free registers in the permute_values code * - think about a smarter sequence of visiting the blocks. Sorted by * execfreq might be good, or looptree from inner to outermost loops going * over blocks in a reverse postorder @@ -53,6 +49,7 @@ #include #include +#include #include "error.h" #include "execfreq.h" @@ -62,8 +59,12 @@ #include "irgraph_t.h" #include "irgwalk.h" #include "irnode_t.h" +#include "irprintf.h" #include "obst.h" #include "raw_bitset.h" +#include "unionfind.h" +#include "pdeq.h" +#include "hungarian.h" #include "beabi.h" #include "bechordal_t.h" @@ -77,14 +78,15 @@ #include "bespill.h" #include "bespillutil.h" #include "beverify.h" +#include "beutil.h" -#include "hungarian.h" - -#define USE_FACTOR 1.0f -#define DEF_FACTOR 1.0f -#define NEIGHBOR_FACTOR 0.2f -#define AFF_SHOULD_BE_SAME 1.0f -#define AFF_PHI 1.0f +#define USE_FACTOR 1.0f +#define DEF_FACTOR 1.0f +#define NEIGHBOR_FACTOR 0.2f +#define AFF_SHOULD_BE_SAME 0.5f +#define AFF_PHI 1.0f +#define SPLIT_DELTA 1.0f +#define MAX_OPTIMISTIC_SPLIT_RECURSION 2 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;) @@ -97,15 +99,13 @@ static be_lv_t *lv; static const ir_exec_freq *execfreqs; static unsigned n_regs; static unsigned *normal_regs; +static int *congruence_classes; +static ir_node **block_order; +static int n_block_order; -/** info about the current assignment for a register */ -struct assignment_t { - ir_node *value; /**< currently assigned value */ -}; -typedef struct assignment_t assignment_t; - -/** currently active assignments (while processing a basic block) */ -static assignment_t *assignments; +/** currently active assignments (while processing a basic block) + * maps registers to values(their current copies) */ +static ir_node **assignments; /** * allocation information: last_uses, register preferences @@ -128,8 +128,8 @@ typedef struct reg_pref_t reg_pref_t; /** per basic-block information */ struct block_info_t { - bool processed; /**< indicate wether block is processed */ - assignment_t assignments[0]; /**< register assignments at end of block */ + bool processed; /**< indicate wether block is processed */ + ir_node *assignments[0]; /**< register assignments at end of block */ }; typedef struct block_info_t block_info_t; @@ -139,16 +139,12 @@ typedef struct block_info_t block_info_t; */ static allocation_info_t *get_allocation_info(ir_node *node) { - allocation_info_t *info; - if (!irn_visited_else_mark(node)) { - size_t size = sizeof(info[0]) + n_regs * sizeof(info->prefs[0]); - info = obstack_alloc(&obst, size); - memset(info, 0, size); + allocation_info_t *info = get_irn_link(node); + if (info == NULL) { + info = OALLOCFZ(&obst, allocation_info_t, prefs, n_regs); info->current_value = node; info->original_value = node; set_irn_link(node, info); - } else { - info = get_irn_link(node); } return info; @@ -159,16 +155,12 @@ static allocation_info_t *get_allocation_info(ir_node *node) */ static block_info_t *get_block_info(ir_node *block) { - block_info_t *info; + block_info_t *info = get_irn_link(block); assert(is_Block(block)); - if (!irn_visited_else_mark(block)) { - size_t size = sizeof(info[0]) + n_regs * sizeof(info->assignments[0]); - info = obstack_alloc(&obst, size); - memset(info, 0, size); + if (info == NULL) { + info = OALLOCFZ(&obst, block_info_t, assignments, n_regs); set_irn_link(block, info); - } else { - info = get_irn_link(block); } return info; @@ -181,8 +173,7 @@ static const arch_register_req_t *get_default_req_current_cls(void) { if (default_cls_req == NULL) { struct obstack *obst = get_irg_obstack(irg); - arch_register_req_t *req = obstack_alloc(obst, sizeof(*req)); - memset(req, 0, sizeof(*req)); + arch_register_req_t *req = OALLOCZ(obst, arch_register_req_t); req->type = arch_register_req_type_normal; req->cls = cls; @@ -217,9 +208,10 @@ static void mark_as_copy_of(ir_node *copy, ir_node *value) /* the copy should not be linked to something else yet */ assert(copy_info->original_value == copy); + copy_info->original_value = original; + /* copy over allocation preferences */ memcpy(copy_info->prefs, info->prefs, n_regs * sizeof(copy_info->prefs[0])); - copy_info->original_value = original; } /** @@ -236,6 +228,7 @@ static void give_penalties_for_limits(const ir_nodeset_t *live_nodes, { ir_nodeset_iterator_t iter; unsigned r; + unsigned n_allowed; allocation_info_t *info = get_allocation_info(node); ir_node *neighbor; @@ -251,8 +244,12 @@ static void give_penalties_for_limits(const ir_nodeset_t *live_nodes, if (live_nodes == NULL) return; - /* TODO: reduce penalty if there are multiple allowed registers... */ - penalty *= NEIGHBOR_FACTOR; + penalty *= NEIGHBOR_FACTOR; + n_allowed = rbitset_popcnt(limited, n_regs); + if (n_allowed > 1) { + /* only create a very weak penalty if multiple regs are allowed */ + penalty = (penalty * 0.8) / n_allowed; + } foreach_ir_nodeset(live_nodes, neighbor, iter) { allocation_info_t *neighbor_info; @@ -319,7 +316,14 @@ static void check_defs(const ir_nodeset_t *live_nodes, float weight, if (!rbitset_is_set(&req->other_same, i)) continue; - op = get_irn_n(insn, i); + op = get_irn_n(insn, i); + + /* if we the value at the should_be_same input doesn't die at the + * node, then it is no use to propagate the constraints (since a + * copy will emerge anyway) */ + if (ir_nodeset_contains(live_nodes, op)) + continue; + op_info = get_allocation_info(op); for (r = 0; r < n_regs; ++r) { op_info->prefs[r] += info->prefs[r] * factor; @@ -350,7 +354,6 @@ static void analyze_block(ir_node *block, void *data) if (is_Phi(node)) break; - /* TODO give/take penalties for should_be_same/different) */ check_defs(&live_nodes, weight, node); /* mark last uses */ @@ -391,8 +394,6 @@ static void analyze_block(ir_node *block, void *data) if (!(req->type & arch_register_req_type_limited)) continue; - /* TODO: give penalties to neighbors for precolored nodes! */ - limited = req->limited; give_penalties_for_limits(&live_nodes, weight * USE_FACTOR, limited, op); @@ -402,6 +403,212 @@ static void analyze_block(ir_node *block, void *data) ir_nodeset_destroy(&live_nodes); } +static void congruence_def(ir_nodeset_t *live_nodes, ir_node *node) +{ + const arch_register_req_t *req; + + if (get_irn_mode(node) == mode_T) { + const ir_edge_t *edge; + foreach_out_edge(node, edge) { + ir_node *def = get_edge_src_irn(edge); + congruence_def(live_nodes, def); + } + return; + } + + if (!arch_irn_consider_in_reg_alloc(cls, node)) + return; + + /* should be same constraint? */ + req = arch_get_register_req_out(node); + if (req->type & arch_register_req_type_should_be_same) { + ir_node *insn = skip_Proj(node); + int arity = get_irn_arity(insn); + int i; + unsigned node_idx = get_irn_idx(node); + node_idx = uf_find(congruence_classes, node_idx); + + for (i = 0; i < arity; ++i) { + ir_node *live; + ir_node *op; + int op_idx; + ir_nodeset_iterator_t iter; + bool interferes = false; + + if (!rbitset_is_set(&req->other_same, i)) + continue; + + op = get_irn_n(insn, i); + op_idx = get_irn_idx(op); + op_idx = uf_find(congruence_classes, op_idx); + + /* do we interfere with the value */ + foreach_ir_nodeset(live_nodes, live, iter) { + int lv_idx = get_irn_idx(live); + lv_idx = uf_find(congruence_classes, lv_idx); + if (lv_idx == op_idx) { + interferes = true; + break; + } + } + /* don't put in same affinity class if we interfere */ + if (interferes) + continue; + + node_idx = uf_union(congruence_classes, node_idx, op_idx); + DB((dbg, LEVEL_3, "Merge %+F and %+F congruence classes\n", + node, op)); + /* one should_be_same is enough... */ + break; + } + } +} + +static void create_congurence_class(ir_node *block, void *data) +{ + ir_nodeset_t live_nodes; + ir_node *node; + + (void) data; + ir_nodeset_init(&live_nodes); + be_liveness_end_of_block(lv, cls, block, &live_nodes); + + /* check should be same constraints */ + sched_foreach_reverse(block, node) { + if (is_Phi(node)) + break; + + congruence_def(&live_nodes, node); + be_liveness_transfer(cls, node, &live_nodes); + } + + /* check phi congruence classes */ + sched_foreach_reverse_from(node, node) { + int i; + int arity; + int node_idx; + assert(is_Phi(node)); + + if (!arch_irn_consider_in_reg_alloc(cls, node)) + continue; + + node_idx = get_irn_idx(node); + node_idx = uf_find(congruence_classes, node_idx); + + arity = get_irn_arity(node); + for (i = 0; i < arity; ++i) { + bool interferes = false; + ir_nodeset_iterator_t iter; + ir_node *live; + ir_node *phi; + ir_node *op = get_Phi_pred(node, i); + int op_idx = get_irn_idx(op); + op_idx = uf_find(congruence_classes, op_idx); + + /* do we interfere with the value */ + foreach_ir_nodeset(&live_nodes, live, iter) { + int lv_idx = get_irn_idx(live); + lv_idx = uf_find(congruence_classes, lv_idx); + if (lv_idx == op_idx) { + interferes = true; + break; + } + } + /* don't put in same affinity class if we interfere */ + if (interferes) + continue; + /* any other phi has the same input? */ + sched_foreach(block, phi) { + ir_node *oop; + int oop_idx; + if (!is_Phi(phi)) + break; + if (!arch_irn_consider_in_reg_alloc(cls, phi)) + continue; + oop = get_Phi_pred(phi, i); + if (oop == op) + continue; + oop_idx = get_irn_idx(oop); + oop_idx = uf_find(congruence_classes, oop_idx); + if (oop_idx == op_idx) { + interferes = true; + break; + } + } + if (interferes) + continue; + + node_idx = uf_union(congruence_classes, node_idx, op_idx); + DB((dbg, LEVEL_3, "Merge %+F and %+F congruence classes\n", + node, op)); + } + } +} + +static void merge_congruence_prefs(ir_node *node, void *data) +{ + allocation_info_t *info; + allocation_info_t *head_info; + unsigned node_idx = get_irn_idx(node); + unsigned node_set = uf_find(congruence_classes, node_idx); + unsigned r; + + (void) data; + + /* head of congruence class or not in any class */ + if (node_set == node_idx) + return; + + if (!arch_irn_consider_in_reg_alloc(cls, node)) + return; + + head_info = get_allocation_info(get_idx_irn(irg, node_set)); + info = get_allocation_info(node); + + for (r = 0; r < n_regs; ++r) { + head_info->prefs[r] += info->prefs[r]; + } +} + +static void set_congruence_prefs(ir_node *node, void *data) +{ + allocation_info_t *info; + allocation_info_t *head_info; + unsigned node_idx = get_irn_idx(node); + unsigned node_set = uf_find(congruence_classes, node_idx); + + (void) data; + + /* head of congruence class or not in any class */ + if (node_set == node_idx) + return; + + if (!arch_irn_consider_in_reg_alloc(cls, node)) + return; + + head_info = get_allocation_info(get_idx_irn(irg, node_set)); + info = get_allocation_info(node); + + memcpy(info->prefs, head_info->prefs, n_regs * sizeof(info->prefs[0])); +} + +static void combine_congruence_classes(void) +{ + size_t n = get_irg_last_idx(irg); + congruence_classes = XMALLOCN(int, n); + uf_init(congruence_classes, n); + + /* create congruence classes */ + irg_block_walk_graph(irg, create_congurence_class, NULL, NULL); + /* merge preferences */ + irg_walk_graph(irg, merge_congruence_prefs, NULL, NULL); + irg_walk_graph(irg, set_congruence_prefs, NULL, NULL); +} + + + + + /** * Assign register reg to the given node. * @@ -411,10 +618,26 @@ static void analyze_block(ir_node *block, void *data) static void use_reg(ir_node *node, const arch_register_t *reg) { unsigned r = arch_register_get_index(reg); - assignments[r].value = node; + assignments[r] = node; arch_set_irn_register(node, reg); } +static void free_reg_of_value(ir_node *node) +{ + const arch_register_t *reg; + unsigned r; + + if (!arch_irn_consider_in_reg_alloc(cls, node)) + return; + + reg = arch_get_irn_register(node); + r = arch_register_get_index(reg); + /* assignment->value may be NULL if a value is used at 2 inputs + so it gets freed twice. */ + assert(assignments[r] == node || assignments[r] == NULL); + assignments[r] = NULL; +} + /** * Compare two register preferences in decreasing order. */ @@ -443,10 +666,119 @@ static void fill_sort_candidates(reg_pref_t *regprefs, qsort(regprefs, n_regs, sizeof(regprefs[0]), compare_reg_pref); } +static bool try_optimistic_split(ir_node *to_split, ir_node *before, + float pref, float pref_delta, + unsigned *forbidden_regs, int recursion) +{ + const arch_register_t *from_reg; + const arch_register_t *reg; + ir_node *original_insn; + ir_node *block; + ir_node *copy; + unsigned r; + unsigned from_r; + unsigned i; + allocation_info_t *info = get_allocation_info(to_split); + reg_pref_t *prefs; + float delta; + float split_threshold; + + (void) pref; + + /* stupid hack: don't optimisticallt split don't spill nodes... + * (so we don't split away the values produced because of + * must_be_different constraints) */ + original_insn = skip_Proj(info->original_value); + if (arch_irn_get_flags(original_insn) & arch_irn_flags_dont_spill) + return false; + + from_reg = arch_get_irn_register(to_split); + from_r = arch_register_get_index(from_reg); + block = get_nodes_block(before); + split_threshold = get_block_execfreq(execfreqs, block) * SPLIT_DELTA; + + if (pref_delta < split_threshold*0.5) + return false; + + /* find the best free position where we could move to */ + prefs = ALLOCAN(reg_pref_t, n_regs); + fill_sort_candidates(prefs, info); + for (i = 0; i < n_regs; ++i) { + float apref; + float apref_delta; + bool res; + bool old_source_state; + + /* we need a normal register which is not an output register + an different from the current register of to_split */ + r = prefs[i].num; + if (!rbitset_is_set(normal_regs, r)) + continue; + if (rbitset_is_set(forbidden_regs, r)) + continue; + if (r == from_r) + continue; + + /* is the split worth it? */ + delta = pref_delta + prefs[i].pref; + if (delta < split_threshold) { + DB((dbg, LEVEL_3, "Not doing optimistical split of %+F (depth %d), win %f too low\n", + to_split, recursion, delta)); + return false; + } + + /* if the register is free then we can do the split */ + if (assignments[r] == NULL) + break; + + /* otherwise we might try recursively calling optimistic_split */ + if (recursion+1 > MAX_OPTIMISTIC_SPLIT_RECURSION) + continue; + + apref = prefs[i].pref; + apref_delta = i+1 < n_regs ? apref - prefs[i+1].pref : 0; + apref_delta += pref_delta - split_threshold; + + /* our source register isn't a usefull destination for recursive + splits */ + old_source_state = rbitset_is_set(forbidden_regs, from_r); + rbitset_set(forbidden_regs, from_r); + /* try recursive split */ + res = try_optimistic_split(assignments[r], before, apref, + apref_delta, forbidden_regs, recursion+1); + /* restore our destination */ + if (old_source_state) { + rbitset_set(forbidden_regs, from_r); + } else { + rbitset_clear(forbidden_regs, from_r); + } + + if (res) + break; + } + if (i >= n_regs) + return false; + + reg = arch_register_for_index(cls, r); + copy = be_new_Copy(cls, block, to_split); + mark_as_copy_of(copy, to_split); + /* hacky, but correct here */ + if (assignments[arch_register_get_index(from_reg)] == to_split) + free_reg_of_value(to_split); + use_reg(copy, reg); + sched_add_before(before, copy); + + DB((dbg, LEVEL_3, + "Optimistic live-range split %+F move %+F(%s) -> %s before %+F (win %f, depth %d)\n", + copy, to_split, from_reg->name, reg->name, before, delta, recursion)); + return true; +} + /** * Determine and assign a register for node @p node */ -static void assign_reg(const ir_node *block, ir_node *node) +static void assign_reg(const ir_node *block, ir_node *node, + unsigned *forbidden_regs) { const arch_register_t *reg; allocation_info_t *info; @@ -455,7 +787,9 @@ static void assign_reg(const ir_node *block, ir_node *node) ir_node *in_node; unsigned i; const unsigned *allowed_regs; + unsigned r; + assert(!is_Phi(node)); assert(arch_irn_consider_in_reg_alloc(cls, node)); /* preassigned register? */ @@ -488,16 +822,28 @@ static void assign_reg(const ir_node *block, ir_node *node) reg = arch_get_irn_register(in); assert(reg != NULL); r = arch_register_get_index(reg); + + /* if the value didn't die here then we should not propagate the + * should_be_same info */ + if (assignments[r] == in) + continue; + info->prefs[r] += weight * AFF_SHOULD_BE_SAME; } } + /* create list of register candidates and sort by their preference */ DB((dbg, LEVEL_2, "Candidates for %+F:", node)); reg_prefs = alloca(n_regs * sizeof(reg_prefs[0])); fill_sort_candidates(reg_prefs, info); for (i = 0; i < n_regs; ++i) { - unsigned num = reg_prefs[i].num; - const arch_register_t *reg = arch_register_for_index(cls, num); + unsigned num = reg_prefs[i].num; + const arch_register_t *reg; + + if (!rbitset_is_set(normal_regs, num)) + continue; + + reg = arch_register_for_index(cls, num); DB((dbg, LEVEL_2, " %s(%f)", reg->name, reg_prefs[i].pref)); } DB((dbg, LEVEL_2, "\n")); @@ -508,37 +854,26 @@ static void assign_reg(const ir_node *block, ir_node *node) } for (i = 0; i < n_regs; ++i) { - unsigned r = reg_prefs[i].num; - /* already used? - TODO: It might be better to copy the value occupying the register - around here instead of trying the next one, find out when... */ - if (assignments[r].value != NULL) - continue; + r = reg_prefs[i].num; if (!rbitset_is_set(allowed_regs, r)) continue; - reg = arch_register_for_index(cls, r); - DB((dbg, LEVEL_2, "Assign %+F -> %s\n", node, reg->name)); - use_reg(node, reg); - break; + if (assignments[r] == NULL) + break; + float pref = reg_prefs[i].pref; + float delta = i+1 < n_regs ? pref - reg_prefs[i+1].pref : 0; + ir_node *before = skip_Proj(node); + bool res = try_optimistic_split(assignments[r], before, + pref, delta, forbidden_regs, 0); + if (res) + break; + } + if (i >= n_regs) { + panic("No register left for %+F\n", node); } -} - -static void free_reg_of_value(ir_node *node) -{ - assignment_t *assignment; - const arch_register_t *reg; - unsigned r; - - if (!arch_irn_consider_in_reg_alloc(cls, node)) - return; - reg = arch_get_irn_register(node); - r = arch_register_get_index(reg); - assignment = &assignments[r]; - /* assignment->value may be NULL if a value is used at 2 inputs - so it gets freed twice. */ - assert(assignment->value == node || assignment->value == NULL); - assignment->value = NULL; + reg = arch_register_for_index(cls, r); + DB((dbg, LEVEL_2, "Assign %+F -> %s\n", node, reg->name)); + use_reg(node, reg); } /** @@ -575,7 +910,7 @@ static void free_reg_of_value(ir_node *node) * registers, the values in the array are the source * registers. */ -static void permutate_values(ir_nodeset_t *live_nodes, ir_node *before, +static void permute_values(ir_nodeset_t *live_nodes, ir_node *before, unsigned *permutation) { unsigned *n_used = ALLOCANZ(unsigned, n_regs); @@ -587,7 +922,7 @@ static void permutate_values(ir_nodeset_t *live_nodes, ir_node *before, unsigned old_reg = permutation[r]; ir_node *value; - value = assignments[old_reg].value; + value = assignments[old_reg]; if (value == NULL) { /* nothing to do here, reg is not live. Mark it as fixpoint * so we ignore it in the next steps */ @@ -615,7 +950,7 @@ static void permutate_values(ir_nodeset_t *live_nodes, ir_node *before, } /* create a copy */ - src = assignments[old_r].value; + src = assignments[old_r]; copy = be_new_Copy(cls, block, src); sched_add_before(before, copy); reg = arch_register_for_index(cls, r); @@ -677,8 +1012,8 @@ static void permutate_values(ir_nodeset_t *live_nodes, ir_node *before, /* exchange old_r and r2; after that old_r is a fixed point */ r2 = permutation[old_r]; - in[0] = assignments[r2].value; - in[1] = assignments[old_r].value; + in[0] = assignments[r2]; + in[1] = assignments[old_r]; perm = be_new_Perm(cls, block, 2, in); sched_add_before(before, perm); DB((dbg, LEVEL_2, "Perm %+F (perm %+F,%+F, before %+F)\n", @@ -724,10 +1059,11 @@ static void permutate_values(ir_nodeset_t *live_nodes, ir_node *before, */ static void free_last_uses(ir_nodeset_t *live_nodes, ir_node *node) { - allocation_info_t *info = get_allocation_info(node); - const unsigned *last_uses = &info->last_uses; - int arity = get_irn_arity(node); - int i; + allocation_info_t *info = get_allocation_info(node); + const unsigned *last_uses = &info->last_uses; + int arity = get_irn_arity(node); + int i; + for (i = 0; i < arity; ++i) { ir_node *op; @@ -777,8 +1113,7 @@ static void determine_live_through_regs(unsigned *bitset, ir_node *node) /* mark all used registers as potentially live-through */ for (r = 0; r < n_regs; ++r) { - const assignment_t *assignment = &assignments[r]; - if (assignment->value == NULL) + if (assignments[r] == NULL) continue; if (!rbitset_is_set(normal_regs, r)) continue; @@ -807,17 +1142,17 @@ static void determine_live_through_regs(unsigned *bitset, ir_node *node) * @param live_nodes the set of live nodes, might be changed * @param node the current node */ -static void enforce_constraints(ir_nodeset_t *live_nodes, ir_node *node) +static void enforce_constraints(ir_nodeset_t *live_nodes, ir_node *node, + unsigned *forbidden_regs) { int arity = get_irn_arity(node); - int i, dummy, res; + int i, res; hungarian_problem_t *bp; unsigned l, r; unsigned *assignment; /* construct a list of register occupied by live-through values */ unsigned *live_through_regs = NULL; - unsigned *output_regs = NULL; /* see if any use constraints are not met */ bool good = true; @@ -864,11 +1199,9 @@ static void enforce_constraints(ir_nodeset_t *live_nodes, ir_node *node) if (live_through_regs == NULL) { rbitset_alloca(live_through_regs, n_regs); determine_live_through_regs(live_through_regs, node); - - rbitset_alloca(output_regs, n_regs); } - rbitset_or(output_regs, req->limited, n_regs); + rbitset_or(forbidden_regs, req->limited, n_regs); if (rbitsets_have_common(req->limited, live_through_regs, n_regs)) { good = false; } @@ -881,9 +1214,7 @@ static void enforce_constraints(ir_nodeset_t *live_nodes, ir_node *node) determine_live_through_regs(live_through_regs, node); if (rbitsets_have_common(req->limited, live_through_regs, n_regs)) { good = false; - - rbitset_alloca(output_regs, n_regs); - rbitset_or(output_regs, req->limited, n_regs); + rbitset_or(forbidden_regs, req->limited, n_regs); } } } @@ -893,18 +1224,15 @@ static void enforce_constraints(ir_nodeset_t *live_nodes, ir_node *node) return; /* create these arrays if we haven't yet */ - if (output_regs == NULL) { - if (live_through_regs == NULL) { - rbitset_alloca(live_through_regs, n_regs); - } - rbitset_alloca(output_regs, n_regs); + if (live_through_regs == NULL) { + rbitset_alloca(live_through_regs, n_regs); } /* at this point we have to construct a bipartite matching problem to see * which values should go to which registers * Note: We're building the matrix in "reverse" - source registers are - * right, destinations at l because this will produce the solution - * in the format required for permutate_values. + * right, destinations left because this will produce the solution + * in the format required for permute_values. */ bp = hungarian_new(n_regs, n_regs, HUNGARIAN_MATCH_PERFECT); @@ -920,7 +1248,7 @@ static void enforce_constraints(ir_nodeset_t *live_nodes, ir_node *node) continue; /* livethrough values may not use constrainted output registers */ if (rbitset_is_set(live_through_regs, l) - && rbitset_is_set(output_regs, r)) + && rbitset_is_set(forbidden_regs, r)) continue; hungarian_add(bp, r, l, l == r ? 9 : 8); @@ -951,11 +1279,11 @@ static void enforce_constraints(ir_nodeset_t *live_nodes, ir_node *node) } } - //hungarian_print_costmatrix(bp, 1); + //hungarian_print_cost_matrix(bp, 1); hungarian_prepare_cost_matrix(bp, HUNGARIAN_MODE_MAXIMIZE_UTIL); assignment = ALLOCAN(unsigned, n_regs); - res = hungarian_solve(bp, (int*) assignment, &dummy, 0); + res = hungarian_solve(bp, (int*) assignment, NULL, 0); assert(res == 0); #if 0 @@ -968,7 +1296,7 @@ static void enforce_constraints(ir_nodeset_t *live_nodes, ir_node *node) hungarian_free(bp); - permutate_values(live_nodes, node, assignment); + permute_values(live_nodes, node, assignment); } /** test wether a node @p n is a copy of the value of node @p of */ @@ -992,11 +1320,10 @@ static bool is_copy_of(ir_node *value, ir_node *test_value) */ static int find_value_in_block_info(block_info_t *info, ir_node *value) { - unsigned r; - assignment_t *assignments = info->assignments; + unsigned r; + ir_node **assignments = info->assignments; for (r = 0; r < n_regs; ++r) { - const assignment_t *assignment = &assignments[r]; - ir_node *a_value = assignment->value; + ir_node *a_value = assignments[r]; if (a_value == NULL) continue; @@ -1013,12 +1340,12 @@ static int find_value_in_block_info(block_info_t *info, ir_node *value) */ static void add_phi_permutations(ir_node *block, int p) { - unsigned r; - unsigned *permutation; - assignment_t *old_assignments; - bool need_permutation; - ir_node *node; - ir_node *pred = get_Block_cfgpred_block(block, p); + unsigned r; + unsigned *permutation; + ir_node **old_assignments; + bool need_permutation; + ir_node *node; + ir_node *pred = get_Block_cfgpred_block(block, p); block_info_t *pred_info = get_block_info(pred); @@ -1047,7 +1374,7 @@ static void add_phi_permutations(ir_node *block, int p) if (!arch_irn_consider_in_reg_alloc(cls, op)) continue; - a = find_value_in_block_info(pred_info, op); + a = find_value_in_block_info(pred_info, op); assert(a >= 0); reg = arch_get_irn_register(node); @@ -1059,10 +1386,10 @@ static void add_phi_permutations(ir_node *block, int p) } if (need_permutation) { - /* permutate values at end of predecessor */ + /* permute values at end of predecessor */ old_assignments = assignments; assignments = pred_info->assignments; - permutate_values(NULL, be_get_end_of_block_insertion_point(pred), + permute_values(NULL, be_get_end_of_block_insertion_point(pred), permutation); assignments = old_assignments; } @@ -1076,16 +1403,25 @@ static void add_phi_permutations(ir_node *block, int p) if (!arch_irn_consider_in_reg_alloc(cls, node)) continue; - /* we have permutated all values into the correct registers so we can + op = get_Phi_pred(node, p); + /* no need to do anything for Unknown inputs */ + if (!arch_irn_consider_in_reg_alloc(cls, op)) + continue; + + /* we have permuted all values into the correct registers so we can simply query which value occupies the phis register in the predecessor */ a = arch_register_get_index(arch_get_irn_register(node)); - op = pred_info->assignments[a].value; + op = pred_info->assignments[a]; set_Phi_pred(node, p, op); } } -static void handle_phi_prefs(ir_node *phi) +/** + * Set preferences for a phis register based on the registers used on the + * phi inputs. + */ +static void adapt_phi_prefs(ir_node *phi) { int i; int arity = get_irn_arity(phi); @@ -1095,21 +1431,148 @@ static void handle_phi_prefs(ir_node *phi) for (i = 0; i < arity; ++i) { ir_node *op = get_irn_n(phi, i); const arch_register_t *reg = arch_get_irn_register(op); - ir_node *pred; + ir_node *pred_block; + block_info_t *pred_block_info; float weight; unsigned r; if (reg == NULL) continue; + /* we only give the bonus if the predecessor already has registers + * assigned, otherwise we only see a dummy value + * and any conclusions about its register are useless */ + pred_block = get_Block_cfgpred_block(block, i); + pred_block_info = get_block_info(pred_block); + if (!pred_block_info->processed) + continue; /* give bonus for already assigned register */ - pred = get_Block_cfgpred_block(block, i); - weight = get_block_execfreq(execfreqs, pred); + weight = get_block_execfreq(execfreqs, pred_block); r = arch_register_get_index(reg); info->prefs[r] += weight * AFF_PHI; } } +/** + * After a phi has been assigned a register propagate preference inputs + * to the phi inputs. + */ +static void propagate_phi_register(ir_node *phi, unsigned assigned_r) +{ + int i; + ir_node *block = get_nodes_block(phi); + int arity = get_irn_arity(phi); + + for (i = 0; i < arity; ++i) { + ir_node *op = get_Phi_pred(phi, i); + allocation_info_t *info = get_allocation_info(op); + ir_node *pred_block = get_Block_cfgpred_block(block, i); + unsigned r; + float weight + = get_block_execfreq(execfreqs, pred_block) * AFF_PHI; + + if (info->prefs[assigned_r] >= weight) + continue; + + /* promote the prefered register */ + for (r = 0; r < n_regs; ++r) { + if (r == assigned_r) { + info->prefs[r] = AFF_PHI * weight; + } else { + info->prefs[r] -= AFF_PHI * weight; + } + } + + if (is_Phi(op)) + propagate_phi_register(op, assigned_r); + } +} + +static void assign_phi_registers(ir_node *block) +{ + int n_phis = 0; + int n; + int res; + int *assignment; + ir_node *node; + hungarian_problem_t *bp; + + /* count phi nodes */ + sched_foreach(block, node) { + if (!is_Phi(node)) + break; + if (!arch_irn_consider_in_reg_alloc(cls, node)) + continue; + ++n_phis; + } + + if (n_phis == 0) + return; + + /* build a bipartite matching problem for all phi nodes */ + bp = hungarian_new(n_phis, n_regs, HUNGARIAN_MATCH_PERFECT); + n = 0; + sched_foreach(block, node) { + unsigned r; + + allocation_info_t *info; + if (!is_Phi(node)) + break; + if (!arch_irn_consider_in_reg_alloc(cls, node)) + continue; + + /* give boni for predecessor colorings */ + adapt_phi_prefs(node); + /* add stuff to bipartite problem */ + info = get_allocation_info(node); + DB((dbg, LEVEL_3, "Prefs for %+F: ", node)); + for (r = 0; r < n_regs; ++r) { + float costs; + + if (!rbitset_is_set(normal_regs, r)) + continue; + + costs = info->prefs[r]; + costs = costs < 0 ? -logf(-costs+1) : logf(costs+1); + costs *= 100; + costs += 10000; + hungarian_add(bp, n, r, costs); + DB((dbg, LEVEL_3, " %s(%f)", arch_register_for_index(cls, r)->name, + info->prefs[r])); + } + DB((dbg, LEVEL_3, "\n")); + ++n; + } + + //hungarian_print_cost_matrix(bp, 7); + hungarian_prepare_cost_matrix(bp, HUNGARIAN_MODE_MAXIMIZE_UTIL); + + assignment = ALLOCAN(int, n_regs); + res = hungarian_solve(bp, assignment, NULL, 0); + assert(res == 0); + + /* apply results */ + n = 0; + sched_foreach(block, node) { + unsigned r; + const arch_register_t *reg; + + if (!is_Phi(node)) + break; + if (!arch_irn_consider_in_reg_alloc(cls, node)) + continue; + + r = assignment[n++]; + assert(rbitset_is_set(normal_regs, r)); + reg = arch_register_for_index(cls, r); + DB((dbg, LEVEL_2, "Assign %+F -> %s\n", node, reg->name)); + use_reg(node, reg); + + /* adapt preferences for phi inputs */ + propagate_phi_register(node, r); + } +} + /** * Walker: assign registers to all nodes of a block that * need registers from the currently considered register class. @@ -1119,11 +1582,13 @@ static void allocate_coalesce_block(ir_node *block, void *data) int i; ir_nodeset_t live_nodes; ir_nodeset_iterator_t iter; - ir_node *node, *start; + ir_node *node; int n_preds; block_info_t *block_info; block_info_t **pred_block_infos; ir_node **phi_ins; + unsigned *forbidden_regs; /**< collects registers which must + not be used for optimistic splits */ (void) data; DB((dbg, LEVEL_2, "* Block %+F\n", block)); @@ -1170,7 +1635,7 @@ static void allocate_coalesce_block(ir_node *block, void *data) /* must live out of predecessor */ assert(a >= 0); - phi_ins[p] = pred_info->assignments[a].value; + phi_ins[p] = pred_info->assignments[a]; /* different value from last time? then we need a phi */ if (p > 0 && phi_ins[p-1] != phi_ins[p]) { need_phi = true; @@ -1182,12 +1647,18 @@ static void allocate_coalesce_block(ir_node *block, void *data) ir_mode *mode = get_irn_mode(node); const arch_register_req_t *req = get_default_req_current_cls(); ir_node *phi; + int i; phi = new_r_Phi(block, n_preds, phi_ins, mode); be_set_phi_reg_req(phi, req); - DB((dbg, LEVEL_3, "Create Phi %+F (for %+F)\n", phi, node)); - + DB((dbg, LEVEL_3, "Create Phi %+F (for %+F) -", phi, node)); +#ifdef DEBUG_libfirm + for (i = 0; i < n_preds; ++i) { + DB((dbg, LEVEL_3, " %+F", phi_ins[i])); + } + DB((dbg, LEVEL_3, "\n")); +#endif mark_as_copy_of(phi, node); sched_add_after(block, phi); @@ -1205,11 +1676,6 @@ static void allocate_coalesce_block(ir_node *block, void *data) /* if the node already has a register assigned use it */ reg = arch_get_irn_register(node); if (reg != NULL) { - /* TODO: consult pred-block infos here. The value could be copied - away in some/all predecessor blocks. We need to construct - phi-nodes in this case. - We even need to construct some Phi_0 like constructs in cases - where the predecessor allocation is not determined yet. */ use_reg(node, reg); } @@ -1217,46 +1683,48 @@ static void allocate_coalesce_block(ir_node *block, void *data) ir_nodeset_insert(&live_nodes, node); } - /* handle phis... */ - node = sched_first(block); - for ( ; is_Phi(node); node = sched_next(node)) { - const arch_register_t *reg; - - if (!arch_irn_consider_in_reg_alloc(cls, node)) - continue; + rbitset_alloca(forbidden_regs, n_regs); - /* fill in regs already assigned */ - reg = arch_get_irn_register(node); - if (reg != NULL) { - use_reg(node, reg); - } else { - /* TODO: give boni for registers already assigned at the - predecessors */ - handle_phi_prefs(node); - assign_reg(block, node); - } - } - start = node; + /* handle phis... */ + assign_phi_registers(block); - /* assign regs for live-in values */ + /* all live-ins must have a register */ +#ifdef DEBUG_libfirm foreach_ir_nodeset(&live_nodes, node, iter) { const arch_register_t *reg = arch_get_irn_register(node); - if (reg != NULL) - continue; - - assign_reg(block, node); + assert(reg != NULL); } +#endif /* assign instructions in the block */ - for (node = start; !sched_is_end(node); node = sched_next(node)) { + sched_foreach(block, node) { + int i; + int arity; + + /* phis are already assigned */ + if (is_Phi(node)) + continue; rewire_inputs(node); /* enforce use constraints */ - enforce_constraints(&live_nodes, node); + rbitset_clear_all(forbidden_regs, n_regs); + enforce_constraints(&live_nodes, node, forbidden_regs); rewire_inputs(node); + /* we may not use registers used for inputs for optimistic splits */ + arity = get_irn_arity(node); + for (i = 0; i < arity; ++i) { + ir_node *op = get_irn_n(node, i); + const arch_register_t *reg; + if (!arch_irn_consider_in_reg_alloc(cls, op)) + continue; + + reg = arch_get_irn_register(op); + rbitset_set(forbidden_regs, arch_register_get_index(reg)); + } + /* free registers of values last used at this instruction */ free_last_uses(&live_nodes, node); @@ -1268,10 +1736,10 @@ static void allocate_coalesce_block(ir_node *block, void *data) ir_node *proj = get_edge_src_irn(edge); if (!arch_irn_consider_in_reg_alloc(cls, proj)) continue; - assign_reg(block, proj); + assign_reg(block, proj, forbidden_regs); } } else if (arch_irn_consider_in_reg_alloc(cls, node)) { - assign_reg(block, node); + assign_reg(block, node, forbidden_regs); } } @@ -1280,7 +1748,7 @@ static void allocate_coalesce_block(ir_node *block, void *data) block_info->processed = true; - /* permutate values at end of predecessor blocks in case of phi-nodes */ + /* permute values at end of predecessor blocks in case of phi-nodes */ if (n_preds > 1) { int p; for (p = 0; p < n_preds; ++p) { @@ -1303,26 +1771,147 @@ static void allocate_coalesce_block(ir_node *block, void *data) } } +typedef struct block_costs_t block_costs_t; +struct block_costs_t { + float costs; /**< costs of the block */ + int dfs_num; /**< depth first search number (to detect backedges) */ +}; + +static int cmp_block_costs(const void *d1, const void *d2) +{ + const ir_node * const *block1 = d1; + const ir_node * const *block2 = d2; + const block_costs_t *info1 = get_irn_link(*block1); + const block_costs_t *info2 = get_irn_link(*block2); + return QSORT_CMP(info2->costs, info1->costs); +} + +static void determine_block_order(void) +{ + int i; + ir_node **blocklist = be_get_cfgpostorder(irg); + int n_blocks = ARR_LEN(blocklist); + int dfs_num = 0; + pdeq *worklist = new_pdeq(); + ir_node **order = XMALLOCN(ir_node*, n_blocks); + int order_p = 0; + + /* clear block links... */ + for (i = 0; i < n_blocks; ++i) { + ir_node *block = blocklist[i]; + set_irn_link(block, NULL); + } + + /* walk blocks in reverse postorder, the costs for each block are the + * sum of the costs of its predecessors (excluding the costs on backedges + * which we can't determine) */ + for (i = n_blocks-1; i >= 0; --i) { + block_costs_t *cost_info; + ir_node *block = blocklist[i]; + + float execfreq = get_block_execfreq(execfreqs, block); + float costs = execfreq; + int n_cfgpreds = get_Block_n_cfgpreds(block); + int p; + for (p = 0; p < n_cfgpreds; ++p) { + ir_node *pred_block = get_Block_cfgpred_block(block, p); + block_costs_t *pred_costs = get_irn_link(pred_block); + /* we don't have any info for backedges */ + if (pred_costs == NULL) + continue; + costs += pred_costs->costs; + } + + cost_info = OALLOCZ(&obst, block_costs_t); + cost_info->costs = costs; + cost_info->dfs_num = dfs_num++; + set_irn_link(block, cost_info); + } + + /* sort array by block costs */ + qsort(blocklist, n_blocks, sizeof(blocklist[0]), cmp_block_costs); + + ir_reserve_resources(irg, IR_RESOURCE_BLOCK_VISITED); + inc_irg_block_visited(irg); + + for (i = 0; i < n_blocks; ++i) { + ir_node *block = blocklist[i]; + if (Block_block_visited(block)) + continue; + + /* continually add predecessors with highest costs to worklist + * (without using backedges) */ + do { + block_costs_t *info = get_irn_link(block); + ir_node *best_pred = NULL; + float best_costs = -1; + int n_cfgpred = get_Block_n_cfgpreds(block); + int i; + + pdeq_putr(worklist, block); + mark_Block_block_visited(block); + for (i = 0; i < n_cfgpred; ++i) { + ir_node *pred_block = get_Block_cfgpred_block(block, i); + block_costs_t *pred_info = get_irn_link(pred_block); + + /* ignore backedges */ + if (pred_info->dfs_num > info->dfs_num) + continue; + + if (info->costs > best_costs) { + best_costs = info->costs; + best_pred = pred_block; + } + } + block = best_pred; + } while(block != NULL && !Block_block_visited(block)); + + /* now put all nodes in the worklist in our final order */ + while (!pdeq_empty(worklist)) { + ir_node *pblock = pdeq_getr(worklist); + assert(order_p < n_blocks); + order[order_p++] = pblock; + } + } + assert(order_p == n_blocks); + del_pdeq(worklist); + + ir_free_resources(irg, IR_RESOURCE_BLOCK_VISITED); + + DEL_ARR_F(blocklist); + + obstack_free(&obst, NULL); + obstack_init(&obst); + + block_order = order; + n_block_order = n_blocks; +} + /** * Run the register allocator for the current register class. */ static void be_straight_alloc_cls(void) { + int i; + lv = be_assure_liveness(birg); be_liveness_assure_sets(lv); be_liveness_assure_chk(lv); - ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK | IR_RESOURCE_IRN_VISITED); - inc_irg_visited(irg); + ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK); DB((dbg, LEVEL_2, "=== Allocating registers of %s ===\n", cls->name)); + be_clear_links(irg); irg_block_walk_graph(irg, NULL, analyze_block, NULL); - /* we need some dominance pre-order walk to ensure we see all - * definitions/create copies before we encounter their users */ - dom_tree_walk_irg(irg, allocate_coalesce_block, NULL, NULL); + combine_congruence_classes(); + + for (i = 0; i < n_block_order; ++i) { + ir_node *block = block_order[i]; + allocate_coalesce_block(block, NULL); + } - ir_free_resources(irg, IR_RESOURCE_IRN_LINK | IR_RESOURCE_IRN_VISITED); + ir_free_resources(irg, IR_RESOURCE_IRN_LINK); } static void dump(int mask, ir_graph *irg, const char *suffix, @@ -1371,8 +1960,8 @@ static void be_straight_alloc(be_irg_t *new_birg) irg = be_get_birg_irg(birg); execfreqs = birg->exec_freq; - /* TODO: extract some of the stuff from bechordal allocator, like - * statistics, time measurements, etc. and use them here too */ + /* determine a good coloring order */ + determine_block_order(); for (c = 0; c < n_cls; ++c) { cls = arch_env_get_reg_class(arch_env, c);