X-Git-Url: http://nsz.repo.hu/git/?a=blobdiff_plain;f=ir%2Fbe%2Fbenewalloc.c;h=2c709a75b49a09ca1ad7c6486b781f6bcbd14a7e;hb=c6af4586a08198d22de066573b3d0375c4e76a8a;hp=e29fffca5753c76dfdb2a5935a545d324cee7f75;hpb=d2c1b0191844c3c23731158a153838d570dcd65a;p=libfirm diff --git a/ir/be/benewalloc.c b/ir/be/benewalloc.c index e29fffca5..2c709a75b 100644 --- a/ir/be/benewalloc.c +++ b/ir/be/benewalloc.c @@ -39,49 +39,54 @@ * add copies and split live-ranges. * * TODO: - * - make use of free registers in the permutate_values code - * - output constraints are not ensured. The algorithm fails to copy values - * away, so the registers for constrained outputs are free. - * - must_be_different constraint is not respected - * - We have to pessimistically construct Phi_0s when not all predecessors - * of a block are known. - * - Phi color assignment should give bonus points towards registers already - * assigned at predecessors. + * - make use of free registers in the permute_values code * - think about a smarter sequence of visiting the blocks. Sorted by * execfreq might be good, or looptree from inner to outermost loops going * over blocks in a reverse postorder + * - propagate preferences through Phis */ #include "config.h" #include +#include +#include -#include "obst.h" -#include "irnode_t.h" -#include "irgraph_t.h" -#include "iredges_t.h" +#include "error.h" +#include "execfreq.h" #include "ircons.h" +#include "irdom.h" +#include "iredges_t.h" +#include "irgraph_t.h" #include "irgwalk.h" -#include "execfreq.h" +#include "irnode_t.h" +#include "irprintf.h" +#include "obst.h" +#include "raw_bitset.h" +#include "unionfind.h" +#include "pdeq.h" +#include "hungarian.h" +#include "beabi.h" +#include "bechordal_t.h" #include "be.h" -#include "bera.h" +#include "beirg.h" #include "belive_t.h" #include "bemodule.h" -#include "bechordal_t.h" -#include "besched.h" -#include "beirg.h" #include "benode_t.h" +#include "bera.h" +#include "besched.h" #include "bespill.h" -#include "bespilloptions.h" +#include "bespillutil.h" #include "beverify.h" +#include "beutil.h" -#include "bipartite.h" -#include "hungarian.h" - -#define USE_FACTOR 1.0f -#define DEF_FACTOR 1.0f -#define NEIGHBOR_FACTOR 0.2f -#define SHOULD_BE_SAME 1.0f +#define USE_FACTOR 1.0f +#define DEF_FACTOR 1.0f +#define NEIGHBOR_FACTOR 0.2f +#define AFF_SHOULD_BE_SAME 0.5f +#define AFF_PHI 1.0f +#define SPLIT_DELTA 1.0f +#define MAX_OPTIMISTIC_SPLIT_RECURSION 2 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;) @@ -89,28 +94,28 @@ static struct obstack obst; static be_irg_t *birg; static ir_graph *irg; static const arch_register_class_t *cls; +static const arch_register_req_t *default_cls_req; static be_lv_t *lv; static const ir_exec_freq *execfreqs; static unsigned n_regs; -static bitset_t *ignore_regs; - -/** info about the current assignment for a register */ -struct assignment_t { - ir_node *value; /**< currently assigned value */ -}; -typedef struct assignment_t assignment_t; +static unsigned *normal_regs; +static int *congruence_classes; +static ir_node **block_order; +static int n_block_order; -/** currently active assignments (while processing a basic block) */ -static assignment_t *assignments; +/** currently active assignments (while processing a basic block) + * maps registers to values(their current copies) */ +static ir_node **assignments; /** * allocation information: last_uses, register preferences * the information is per firm-node. */ struct allocation_info_t { - unsigned last_uses; /**< bitset indicating last uses (input pos) */ - assignment_t *current_assignment; - float prefs[0]; /**< register preferences */ + unsigned last_uses; /**< bitset indicating last uses (input pos) */ + ir_node *current_value; /**< copy of the value that should be used */ + ir_node *original_value; /**< for copies point to original value */ + float prefs[0]; /**< register preferences */ }; typedef struct allocation_info_t allocation_info_t; @@ -123,8 +128,8 @@ typedef struct reg_pref_t reg_pref_t; /** per basic-block information */ struct block_info_t { - int processed; /**< indicate wether block is processed */ - assignment_t assignments[0]; /**< register assignments at end of block */ + bool processed; /**< indicate wether block is processed */ + ir_node *assignments[0]; /**< register assignments at end of block */ }; typedef struct block_info_t block_info_t; @@ -134,15 +139,12 @@ typedef struct block_info_t block_info_t; */ static allocation_info_t *get_allocation_info(ir_node *node) { - allocation_info_t *info; - if (!irn_visited(node)) { - size_t size = sizeof(info[0]) + n_regs * sizeof(info->prefs[0]); - info = obstack_alloc(&obst, size); - memset(info, 0, size); + allocation_info_t *info = get_irn_link(node); + if (info == NULL) { + info = OALLOCFZ(&obst, allocation_info_t, prefs, n_regs); + info->current_value = node; + info->original_value = node; set_irn_link(node, info); - mark_irn_visited(node); - } else { - info = get_irn_link(node); } return info; @@ -153,22 +155,34 @@ static allocation_info_t *get_allocation_info(ir_node *node) */ static block_info_t *get_block_info(ir_node *block) { - block_info_t *info; + block_info_t *info = get_irn_link(block); assert(is_Block(block)); - if (!irn_visited(block)) { - size_t size = sizeof(info[0]) + n_regs * sizeof(info->assignments[0]); - info = obstack_alloc(&obst, size); - memset(info, 0, size); + if (info == NULL) { + info = OALLOCFZ(&obst, block_info_t, assignments, n_regs); set_irn_link(block, info); - mark_irn_visited(block); - } else { - info = get_irn_link(block); } return info; } +/** + * Get default register requirement for the current register class + */ +static const arch_register_req_t *get_default_req_current_cls(void) +{ + if (default_cls_req == NULL) { + struct obstack *obst = get_irg_obstack(irg); + arch_register_req_t *req = OALLOCZ(obst, arch_register_req_t); + + req->type = arch_register_req_type_normal; + req->cls = cls; + + default_cls_req = req; + } + return default_cls_req; +} + /** * Link the allocation info of a node to a copy. * Afterwards, both nodes uses the same allocation info. @@ -177,28 +191,44 @@ static block_info_t *get_block_info(ir_node *block) * @param copy the node that gets the allocation info assigned * @param value the original node */ -static void link_to(ir_node *copy, ir_node *value) +static void mark_as_copy_of(ir_node *copy, ir_node *value) { - allocation_info_t *info = get_allocation_info(value); - assert(!irn_visited(copy)); - set_irn_link(copy, info); - mark_irn_visited(copy); + ir_node *original; + allocation_info_t *info = get_allocation_info(value); + allocation_info_t *copy_info = get_allocation_info(copy); + + /* find original value */ + original = info->original_value; + if (original != value) { + info = get_allocation_info(original); + } + + assert(info->original_value == original); + info->current_value = copy; + + /* the copy should not be linked to something else yet */ + assert(copy_info->original_value == copy); + copy_info->original_value = original; + + /* copy over allocation preferences */ + memcpy(copy_info->prefs, info->prefs, n_regs * sizeof(copy_info->prefs[0])); } /** * Calculate the penalties for every register on a node and its live neighbors. * - * @param live_nodes the set of live nodes at the current position, may be NULL - * @param penalty the penalty to subtract from - * @param limited a raw bitset containing the limited set for the node - * @param node the node + * @param live_nodes the set of live nodes at the current position, may be NULL + * @param penalty the penalty to subtract from + * @param limited a raw bitset containing the limited set for the node + * @param node the node */ static void give_penalties_for_limits(const ir_nodeset_t *live_nodes, float penalty, const unsigned* limited, - ir_node *node) + ir_node *node) { ir_nodeset_iterator_t iter; unsigned r; + unsigned n_allowed; allocation_info_t *info = get_allocation_info(node); ir_node *neighbor; @@ -214,8 +244,12 @@ static void give_penalties_for_limits(const ir_nodeset_t *live_nodes, if (live_nodes == NULL) return; - /* TODO: reduce penalty if there are multiple allowed registers... */ - penalty *= NEIGHBOR_FACTOR; + penalty *= NEIGHBOR_FACTOR; + n_allowed = rbitset_popcnt(limited, n_regs); + if (n_allowed > 1) { + /* only create a very weak penalty if multiple regs are allowed */ + penalty = (penalty * 0.8) / n_allowed; + } foreach_ir_nodeset(live_nodes, neighbor, iter) { allocation_info_t *neighbor_info; @@ -224,7 +258,7 @@ static void give_penalties_for_limits(const ir_nodeset_t *live_nodes, if (neighbor == node) continue; - neighbor_info = get_allocation_info(neighbor); + neighbor_info = get_allocation_info(neighbor); for (r = 0; r < n_regs; ++r) { if (!rbitset_is_set(limited, r)) continue; @@ -276,17 +310,22 @@ static void check_defs(const ir_nodeset_t *live_nodes, float weight, float factor = 1.0f / rbitset_popcnt(&req->other_same, arity); for (i = 0; i < arity; ++i) { ir_node *op; - unsigned r; + unsigned r; allocation_info_t *op_info; if (!rbitset_is_set(&req->other_same, i)) continue; - op = get_irn_n(insn, i); + op = get_irn_n(insn, i); + + /* if we the value at the should_be_same input doesn't die at the + * node, then it is no use to propagate the constraints (since a + * copy will emerge anyway) */ + if (ir_nodeset_contains(live_nodes, op)) + continue; + op_info = get_allocation_info(op); for (r = 0; r < n_regs; ++r) { - if (bitset_is_set(ignore_regs, r)) - continue; op_info->prefs[r] += info->prefs[r] * factor; } } @@ -309,21 +348,24 @@ static void analyze_block(ir_node *block, void *data) sched_foreach_reverse(block, node) { allocation_info_t *info; - int i, arity; + int i; + int arity; - if (is_Phi(node)) { - /* TODO: handle constrained phi-nodes */ + if (is_Phi(node)) break; - } - /* TODO give/take penalties for should_be_same/different) */ check_defs(&live_nodes, weight, node); /* mark last uses */ arity = get_irn_arity(node); - /* I was lazy, and only allocated 1 unsigned - => maximum of 32 uses per node (rewrite if necessary) */ - assert(arity <= (int) sizeof(unsigned) * 8); + + /* the allocation info node currently only uses 1 unsigned value + to mark last used inputs. So we will fail for a node with more than + 32 inputs. */ + if (arity >= (int) sizeof(unsigned) * 8) { + panic("Node with more than %d inputs not supported yet", + (int) sizeof(unsigned) * 8); + } info = get_allocation_info(node); for (i = 0; i < arity; ++i) { @@ -349,11 +391,9 @@ static void analyze_block(ir_node *block, void *data) continue; req = arch_get_register_req(node, i); - if ((req->type & arch_register_req_type_limited) == 0) + if (!(req->type & arch_register_req_type_limited)) continue; - /* TODO: give penalties to neighbors for precolored nodes! */ - limited = req->limited; give_penalties_for_limits(&live_nodes, weight * USE_FACTOR, limited, op); @@ -363,6 +403,212 @@ static void analyze_block(ir_node *block, void *data) ir_nodeset_destroy(&live_nodes); } +static void congruence_def(ir_nodeset_t *live_nodes, ir_node *node) +{ + const arch_register_req_t *req; + + if (get_irn_mode(node) == mode_T) { + const ir_edge_t *edge; + foreach_out_edge(node, edge) { + ir_node *def = get_edge_src_irn(edge); + congruence_def(live_nodes, def); + } + return; + } + + if (!arch_irn_consider_in_reg_alloc(cls, node)) + return; + + /* should be same constraint? */ + req = arch_get_register_req_out(node); + if (req->type & arch_register_req_type_should_be_same) { + ir_node *insn = skip_Proj(node); + int arity = get_irn_arity(insn); + int i; + unsigned node_idx = get_irn_idx(node); + node_idx = uf_find(congruence_classes, node_idx); + + for (i = 0; i < arity; ++i) { + ir_node *live; + ir_node *op; + int op_idx; + ir_nodeset_iterator_t iter; + bool interferes = false; + + if (!rbitset_is_set(&req->other_same, i)) + continue; + + op = get_irn_n(insn, i); + op_idx = get_irn_idx(op); + op_idx = uf_find(congruence_classes, op_idx); + + /* do we interfere with the value */ + foreach_ir_nodeset(live_nodes, live, iter) { + int lv_idx = get_irn_idx(live); + lv_idx = uf_find(congruence_classes, lv_idx); + if (lv_idx == op_idx) { + interferes = true; + break; + } + } + /* don't put in same affinity class if we interfere */ + if (interferes) + continue; + + node_idx = uf_union(congruence_classes, node_idx, op_idx); + DB((dbg, LEVEL_3, "Merge %+F and %+F congruence classes\n", + node, op)); + /* one should_be_same is enough... */ + break; + } + } +} + +static void create_congurence_class(ir_node *block, void *data) +{ + ir_nodeset_t live_nodes; + ir_node *node; + + (void) data; + ir_nodeset_init(&live_nodes); + be_liveness_end_of_block(lv, cls, block, &live_nodes); + + /* check should be same constraints */ + sched_foreach_reverse(block, node) { + if (is_Phi(node)) + break; + + congruence_def(&live_nodes, node); + be_liveness_transfer(cls, node, &live_nodes); + } + + /* check phi congruence classes */ + sched_foreach_reverse_from(node, node) { + int i; + int arity; + int node_idx; + assert(is_Phi(node)); + + if (!arch_irn_consider_in_reg_alloc(cls, node)) + continue; + + node_idx = get_irn_idx(node); + node_idx = uf_find(congruence_classes, node_idx); + + arity = get_irn_arity(node); + for (i = 0; i < arity; ++i) { + bool interferes = false; + ir_nodeset_iterator_t iter; + ir_node *live; + ir_node *phi; + ir_node *op = get_Phi_pred(node, i); + int op_idx = get_irn_idx(op); + op_idx = uf_find(congruence_classes, op_idx); + + /* do we interfere with the value */ + foreach_ir_nodeset(&live_nodes, live, iter) { + int lv_idx = get_irn_idx(live); + lv_idx = uf_find(congruence_classes, lv_idx); + if (lv_idx == op_idx) { + interferes = true; + break; + } + } + /* don't put in same affinity class if we interfere */ + if (interferes) + continue; + /* any other phi has the same input? */ + sched_foreach(block, phi) { + ir_node *oop; + int oop_idx; + if (!is_Phi(phi)) + break; + if (!arch_irn_consider_in_reg_alloc(cls, phi)) + continue; + oop = get_Phi_pred(phi, i); + if (oop == op) + continue; + oop_idx = get_irn_idx(oop); + oop_idx = uf_find(congruence_classes, oop_idx); + if (oop_idx == op_idx) { + interferes = true; + break; + } + } + if (interferes) + continue; + + node_idx = uf_union(congruence_classes, node_idx, op_idx); + DB((dbg, LEVEL_3, "Merge %+F and %+F congruence classes\n", + node, op)); + } + } +} + +static void merge_congruence_prefs(ir_node *node, void *data) +{ + allocation_info_t *info; + allocation_info_t *head_info; + unsigned node_idx = get_irn_idx(node); + unsigned node_set = uf_find(congruence_classes, node_idx); + unsigned r; + + (void) data; + + /* head of congruence class or not in any class */ + if (node_set == node_idx) + return; + + if (!arch_irn_consider_in_reg_alloc(cls, node)) + return; + + head_info = get_allocation_info(get_idx_irn(irg, node_set)); + info = get_allocation_info(node); + + for (r = 0; r < n_regs; ++r) { + head_info->prefs[r] += info->prefs[r]; + } +} + +static void set_congruence_prefs(ir_node *node, void *data) +{ + allocation_info_t *info; + allocation_info_t *head_info; + unsigned node_idx = get_irn_idx(node); + unsigned node_set = uf_find(congruence_classes, node_idx); + + (void) data; + + /* head of congruence class or not in any class */ + if (node_set == node_idx) + return; + + if (!arch_irn_consider_in_reg_alloc(cls, node)) + return; + + head_info = get_allocation_info(get_idx_irn(irg, node_set)); + info = get_allocation_info(node); + + memcpy(info->prefs, head_info->prefs, n_regs * sizeof(info->prefs[0])); +} + +static void combine_congruence_classes(void) +{ + size_t n = get_irg_last_idx(irg); + congruence_classes = XMALLOCN(int, n); + uf_init(congruence_classes, n); + + /* create congruence classes */ + irg_block_walk_graph(irg, create_congurence_class, NULL, NULL); + /* merge preferences */ + irg_walk_graph(irg, merge_congruence_prefs, NULL, NULL); + irg_walk_graph(irg, set_congruence_prefs, NULL, NULL); +} + + + + + /** * Assign register reg to the given node. * @@ -371,17 +617,25 @@ static void analyze_block(ir_node *block, void *data) */ static void use_reg(ir_node *node, const arch_register_t *reg) { - unsigned r = arch_register_get_index(reg); - assignment_t *assignment = &assignments[r]; - allocation_info_t *info; + unsigned r = arch_register_get_index(reg); + assignments[r] = node; + arch_set_irn_register(node, reg); +} - assert(assignment->value == NULL); - assignment->value = node; +static void free_reg_of_value(ir_node *node) +{ + const arch_register_t *reg; + unsigned r; - info = get_allocation_info(node); - info->current_assignment = assignment; + if (!arch_irn_consider_in_reg_alloc(cls, node)) + return; - arch_set_irn_register(node, reg); + reg = arch_get_irn_register(node); + r = arch_register_get_index(reg); + /* assignment->value may be NULL if a value is used at 2 inputs + so it gets freed twice. */ + assert(assignments[r] == node || assignments[r] == NULL); + assignments[r] = NULL; } /** @@ -405,9 +659,6 @@ static void fill_sort_candidates(reg_pref_t *regprefs, for (r = 0; r < n_regs; ++r) { float pref = info->prefs[r]; - if (bitset_is_set(ignore_regs, r)) { - pref = -10000; - } regprefs[r].num = r; regprefs[r].pref = pref; } @@ -415,18 +666,130 @@ static void fill_sort_candidates(reg_pref_t *regprefs, qsort(regprefs, n_regs, sizeof(regprefs[0]), compare_reg_pref); } +static bool try_optimistic_split(ir_node *to_split, ir_node *before, + float pref, float pref_delta, + unsigned *forbidden_regs, int recursion) +{ + const arch_register_t *from_reg; + const arch_register_t *reg; + ir_node *original_insn; + ir_node *block; + ir_node *copy; + unsigned r; + unsigned from_r; + unsigned i; + allocation_info_t *info = get_allocation_info(to_split); + reg_pref_t *prefs; + float delta; + float split_threshold; + + (void) pref; + + /* stupid hack: don't optimisticallt split don't spill nodes... + * (so we don't split away the values produced because of + * must_be_different constraints) */ + original_insn = skip_Proj(info->original_value); + if (arch_irn_get_flags(original_insn) & arch_irn_flags_dont_spill) + return false; + + from_reg = arch_get_irn_register(to_split); + from_r = arch_register_get_index(from_reg); + block = get_nodes_block(before); + split_threshold = get_block_execfreq(execfreqs, block) * SPLIT_DELTA; + + if (pref_delta < split_threshold*0.5) + return false; + + /* find the best free position where we could move to */ + prefs = ALLOCAN(reg_pref_t, n_regs); + fill_sort_candidates(prefs, info); + for (i = 0; i < n_regs; ++i) { + float apref; + float apref_delta; + bool res; + bool old_source_state; + + /* we need a normal register which is not an output register + an different from the current register of to_split */ + r = prefs[i].num; + if (!rbitset_is_set(normal_regs, r)) + continue; + if (rbitset_is_set(forbidden_regs, r)) + continue; + if (r == from_r) + continue; + + /* is the split worth it? */ + delta = pref_delta + prefs[i].pref; + if (delta < split_threshold) { + DB((dbg, LEVEL_3, "Not doing optimistical split of %+F (depth %d), win %f too low\n", + to_split, recursion, delta)); + return false; + } + + /* if the register is free then we can do the split */ + if (assignments[r] == NULL) + break; + + /* otherwise we might try recursively calling optimistic_split */ + if (recursion+1 > MAX_OPTIMISTIC_SPLIT_RECURSION) + continue; + + apref = prefs[i].pref; + apref_delta = i+1 < n_regs ? apref - prefs[i+1].pref : 0; + apref_delta += pref_delta - split_threshold; + + /* our source register isn't a usefull destination for recursive + splits */ + old_source_state = rbitset_is_set(forbidden_regs, from_r); + rbitset_set(forbidden_regs, from_r); + /* try recursive split */ + res = try_optimistic_split(assignments[r], before, apref, + apref_delta, forbidden_regs, recursion+1); + /* restore our destination */ + if (old_source_state) { + rbitset_set(forbidden_regs, from_r); + } else { + rbitset_clear(forbidden_regs, from_r); + } + + if (res) + break; + } + if (i >= n_regs) + return false; + + reg = arch_register_for_index(cls, r); + copy = be_new_Copy(cls, block, to_split); + mark_as_copy_of(copy, to_split); + /* hacky, but correct here */ + if (assignments[arch_register_get_index(from_reg)] == to_split) + free_reg_of_value(to_split); + use_reg(copy, reg); + sched_add_before(before, copy); + + DB((dbg, LEVEL_3, + "Optimistic live-range split %+F move %+F(%s) -> %s before %+F (win %f, depth %d)\n", + copy, to_split, from_reg->name, reg->name, before, delta, recursion)); + return true; +} + /** * Determine and assign a register for node @p node */ -static void assign_reg(const ir_node *block, ir_node *node) +static void assign_reg(const ir_node *block, ir_node *node, + unsigned *forbidden_regs) { const arch_register_t *reg; allocation_info_t *info; const arch_register_req_t *req; reg_pref_t *reg_prefs; ir_node *in_node; - unsigned i; + unsigned i; + const unsigned *allowed_regs; + unsigned r; + assert(!is_Phi(node)); assert(arch_irn_consider_in_reg_alloc(cls, node)); /* preassigned register? */ @@ -459,78 +822,58 @@ static void assign_reg(const ir_node *block, ir_node *node) reg = arch_get_irn_register(in); assert(reg != NULL); r = arch_register_get_index(reg); - if (bitset_is_set(ignore_regs, r)) + + /* if the value didn't die here then we should not propagate the + * should_be_same info */ + if (assignments[r] == in) continue; - info->prefs[r] += weight * SHOULD_BE_SAME; + + info->prefs[r] += weight * AFF_SHOULD_BE_SAME; } } - /* TODO: handle must_be_different */ - - /* */ + /* create list of register candidates and sort by their preference */ DB((dbg, LEVEL_2, "Candidates for %+F:", node)); reg_prefs = alloca(n_regs * sizeof(reg_prefs[0])); fill_sort_candidates(reg_prefs, info); for (i = 0; i < n_regs; ++i) { - unsigned num = reg_prefs[i].num; - const arch_register_t *reg = arch_register_for_index(cls, num); + unsigned num = reg_prefs[i].num; + const arch_register_t *reg; + + if (!rbitset_is_set(normal_regs, num)) + continue; + + reg = arch_register_for_index(cls, num); DB((dbg, LEVEL_2, " %s(%f)", reg->name, reg_prefs[i].pref)); } DB((dbg, LEVEL_2, "\n")); + allowed_regs = normal_regs; + if (req->type & arch_register_req_type_limited) { + allowed_regs = req->limited; + } + for (i = 0; i < n_regs; ++i) { - unsigned r = reg_prefs[i].num; - /* ignores should be last and we should have a non-ignore left */ - assert(!bitset_is_set(ignore_regs, r)); - /* already used? - TODO: It might be better to copy the value occupying the register around here, find out when... */ - if (assignments[r].value != NULL) + r = reg_prefs[i].num; + if (!rbitset_is_set(allowed_regs, r)) continue; - reg = arch_register_for_index(cls, r); - DB((dbg, LEVEL_2, "Assign %+F -> %s\n", node, reg->name)); - use_reg(node, reg); - break; + if (assignments[r] == NULL) + break; + float pref = reg_prefs[i].pref; + float delta = i+1 < n_regs ? pref - reg_prefs[i+1].pref : 0; + ir_node *before = skip_Proj(node); + bool res = try_optimistic_split(assignments[r], before, + pref, delta, forbidden_regs, 0); + if (res) + break; + } + if (i >= n_regs) { + panic("No register left for %+F\n", node); } -} - -static void free_reg_of_value(ir_node *node) -{ - allocation_info_t *info; - assignment_t *assignment; - unsigned r; - - if (!arch_irn_consider_in_reg_alloc(cls, node)) - return; - - info = get_allocation_info(node); - assignment = info->current_assignment; - - assert(assignment != NULL); - - r = assignment - assignments; - DB((dbg, LEVEL_2, "Value %+F ended, freeing %s\n", - node, arch_register_for_index(cls, r)->name)); - assignment->value = NULL; - info->current_assignment = NULL; -} - -/** - * Return the index of the currently assigned register of a node. - */ -static unsigned get_current_reg(ir_node *node) -{ - allocation_info_t *info = get_allocation_info(node); - assignment_t *assignment = info->current_assignment; - return assignment - assignments; -} -/** - * Return the currently assigned assignment of a node. - */ -static assignment_t *get_current_assignment(ir_node *node) -{ - allocation_info_t *info = get_allocation_info(node); - return info->current_assignment; + reg = arch_register_for_index(cls, r); + DB((dbg, LEVEL_2, "Assign %+F -> %s\n", node, reg->name)); + use_reg(node, reg); } /** @@ -567,26 +910,19 @@ static assignment_t *get_current_assignment(ir_node *node) * registers, the values in the array are the source * registers. */ -static void permutate_values(ir_nodeset_t *live_nodes, ir_node *before, +static void permute_values(ir_nodeset_t *live_nodes, ir_node *before, unsigned *permutation) { - ir_node *block; - ir_node **ins = ALLOCANZ(ir_node*, n_regs); unsigned *n_used = ALLOCANZ(unsigned, n_regs); + ir_node *block; unsigned r; - /* create a list of permutations. Leave out fix points. */ + /* determine how often each source register needs to be read */ for (r = 0; r < n_regs; ++r) { - unsigned old_reg = permutation[r]; - assignment_t *assignment; - ir_node *value; - - /* no need to do anything for a fixpoint */ - if (old_reg == r) - continue; + unsigned old_reg = permutation[r]; + ir_node *value; - assignment = &assignments[old_reg]; - value = assignment->value; + value = assignments[old_reg]; if (value == NULL) { /* nothing to do here, reg is not live. Mark it as fixpoint * so we ignore it in the next steps */ @@ -594,14 +930,7 @@ static void permutate_values(ir_nodeset_t *live_nodes, ir_node *before, continue; } - ins[old_reg] = value; ++n_used[old_reg]; - - /* free occupation infos, we'll add the values back later */ - if (live_nodes != NULL) { - free_reg_of_value(value); - ir_nodeset_remove(live_nodes, value); - } } block = get_nodes_block(before); @@ -621,21 +950,33 @@ static void permutate_values(ir_nodeset_t *live_nodes, ir_node *before, } /* create a copy */ - src = ins[old_r]; + src = assignments[old_r]; copy = be_new_Copy(cls, block, src); + sched_add_before(before, copy); reg = arch_register_for_index(cls, r); - DB((dbg, LEVEL_2, "Copy %+F (from %+F) -> %s\n", copy, src, reg->name)); - link_to(copy, src); + DB((dbg, LEVEL_2, "Copy %+F (from %+F, before %+F) -> %s\n", + copy, src, before, reg->name)); + mark_as_copy_of(copy, src); use_reg(copy, reg); - sched_add_before(before, copy); + + if (live_nodes != NULL) { + ir_nodeset_insert(live_nodes, copy); + } /* old register has 1 user less, permutation is resolved */ assert(arch_register_get_index(arch_get_irn_register(src)) == old_r); + permutation[r] = r; + assert(n_used[old_r] > 0); --n_used[old_r]; - permutation[r] = r; + if (n_used[old_r] == 0) { + if (live_nodes != NULL) { + ir_nodeset_remove(live_nodes, src); + } + free_reg_of_value(src); + } - /* advance or jump back (this copy could have enabled another copy) */ + /* advance or jump back (if this copy enabled another copy) */ if (old_r < r && n_used[old_r] == 0) { r = old_r; } else { @@ -648,8 +989,7 @@ static void permutate_values(ir_nodeset_t *live_nodes, ir_node *before, * TODO: if we have free registers left, then we should really use copy * instructions for any cycle longer than 2 registers... * (this is probably architecture dependent, there might be archs where - * copies are preferable even for 2 cycles) - */ + * copies are preferable even for 2-cycles) */ /* create perms with the rest */ for (r = 0; r < n_regs; /* empty */) { @@ -672,29 +1012,34 @@ static void permutate_values(ir_nodeset_t *live_nodes, ir_node *before, /* exchange old_r and r2; after that old_r is a fixed point */ r2 = permutation[old_r]; - in[0] = ins[r2]; - in[1] = ins[old_r]; + in[0] = assignments[r2]; + in[1] = assignments[old_r]; perm = be_new_Perm(cls, block, 2, in); + sched_add_before(before, perm); + DB((dbg, LEVEL_2, "Perm %+F (perm %+F,%+F, before %+F)\n", + perm, in[0], in[1], before)); proj0 = new_r_Proj(block, perm, get_irn_mode(in[0]), 0); - link_to(proj0, in[0]); + mark_as_copy_of(proj0, in[0]); reg = arch_register_for_index(cls, old_r); use_reg(proj0, reg); proj1 = new_r_Proj(block, perm, get_irn_mode(in[1]), 1); + mark_as_copy_of(proj1, in[1]); + reg = arch_register_for_index(cls, r2); + use_reg(proj1, reg); /* 1 value is now in the correct register */ permutation[old_r] = old_r; /* the source of r changed to r2 */ permutation[r] = r2; - ins[r2] = in[1]; - reg = arch_register_for_index(cls, r2); - if (r == r2) { - /* if we have reached a fixpoint update data structures */ - link_to(proj1, in[1]); - use_reg(proj1, reg); - } else { - arch_set_irn_register(proj1, reg); + + /* if we have reached a fixpoint update data structures */ + if (live_nodes != NULL) { + ir_nodeset_remove(live_nodes, in[0]); + ir_nodeset_remove(live_nodes, in[1]); + ir_nodeset_remove(live_nodes, proj0); + ir_nodeset_insert(live_nodes, proj1); } } @@ -714,14 +1059,16 @@ static void permutate_values(ir_nodeset_t *live_nodes, ir_node *before, */ static void free_last_uses(ir_nodeset_t *live_nodes, ir_node *node) { - allocation_info_t *info = get_allocation_info(node); - int arity = get_irn_arity(node); - int i; + allocation_info_t *info = get_allocation_info(node); + const unsigned *last_uses = &info->last_uses; + int arity = get_irn_arity(node); + int i; + for (i = 0; i < arity; ++i) { ir_node *op; /* check if one operand is the last use */ - if (!rbitset_is_set(&info->last_uses, i)) + if (!rbitset_is_set(last_uses, i)) continue; op = get_irn_n(node, i); @@ -730,24 +1077,88 @@ static void free_last_uses(ir_nodeset_t *live_nodes, ir_node *node) } } +/** + * change inputs of a node to the current value (copies/perms) + */ +static void rewire_inputs(ir_node *node) +{ + int i; + int arity = get_irn_arity(node); + + for (i = 0; i < arity; ++i) { + ir_node *op = get_irn_n(node, i); + allocation_info_t *info; + + if (!arch_irn_consider_in_reg_alloc(cls, op)) + continue; + + info = get_allocation_info(op); + info = get_allocation_info(info->original_value); + if (info->current_value != op) { + set_irn_n(node, i, info->current_value); + } + } +} + +/** + * Create a bitset of registers occupied with value living through an + * instruction + */ +static void determine_live_through_regs(unsigned *bitset, ir_node *node) +{ + const allocation_info_t *info = get_allocation_info(node); + unsigned r; + int i; + int arity; + + /* mark all used registers as potentially live-through */ + for (r = 0; r < n_regs; ++r) { + if (assignments[r] == NULL) + continue; + if (!rbitset_is_set(normal_regs, r)) + continue; + + rbitset_set(bitset, r); + } + + /* remove registers of value dying at the instruction */ + arity = get_irn_arity(node); + for (i = 0; i < arity; ++i) { + ir_node *op; + const arch_register_t *reg; + + if (!rbitset_is_set(&info->last_uses, i)) + continue; + + op = get_irn_n(node, i); + reg = arch_get_irn_register(op); + rbitset_clear(bitset, arch_register_get_index(reg)); + } +} + /** * Enforce constraints at a node by live range splits. * - * @param live_nodes the set of live nodes, might be changed - * @param node the current node + * @param live_nodes the set of live nodes, might be changed + * @param node the current node */ -static void enforce_constraints(ir_nodeset_t *live_nodes, ir_node *node) +static void enforce_constraints(ir_nodeset_t *live_nodes, ir_node *node, + unsigned *forbidden_regs) { int arity = get_irn_arity(node); - int i, dummy, res; + int i, res; hungarian_problem_t *bp; - unsigned l, r, p; + unsigned l, r; unsigned *assignment; + /* construct a list of register occupied by live-through values */ + unsigned *live_through_regs = NULL; + /* see if any use constraints are not met */ bool good = true; for (i = 0; i < arity; ++i) { ir_node *op = get_irn_n(node, i); + const arch_register_t *reg; const arch_register_req_t *req; const unsigned *limited; unsigned r; @@ -757,108 +1168,166 @@ static void enforce_constraints(ir_nodeset_t *live_nodes, ir_node *node) /* are there any limitations for the i'th operand? */ req = arch_get_register_req(node, i); - if ((req->type & arch_register_req_type_limited) == 0) + if (!(req->type & arch_register_req_type_limited)) continue; limited = req->limited; - r = get_current_reg(op); + reg = arch_get_irn_register(op); + r = arch_register_get_index(reg); if (!rbitset_is_set(limited, r)) { - /* found an assignement outside the limited set */ + /* found an assignment outside the limited set */ good = false; break; } } + /* is any of the live-throughs using a constrained output register? */ + if (get_irn_mode(node) == mode_T) { + const ir_edge_t *edge; + + foreach_out_edge(node, edge) { + ir_node *proj = get_edge_src_irn(edge); + const arch_register_req_t *req; + + if (!arch_irn_consider_in_reg_alloc(cls, proj)) + continue; + + req = arch_get_register_req_out(proj); + if (!(req->type & arch_register_req_type_limited)) + continue; + + if (live_through_regs == NULL) { + rbitset_alloca(live_through_regs, n_regs); + determine_live_through_regs(live_through_regs, node); + } + + rbitset_or(forbidden_regs, req->limited, n_regs); + if (rbitsets_have_common(req->limited, live_through_regs, n_regs)) { + good = false; + } + } + } else { + if (arch_irn_consider_in_reg_alloc(cls, node)) { + const arch_register_req_t *req = arch_get_register_req_out(node); + if (req->type & arch_register_req_type_limited) { + rbitset_alloca(live_through_regs, n_regs); + determine_live_through_regs(live_through_regs, node); + if (rbitsets_have_common(req->limited, live_through_regs, n_regs)) { + good = false; + rbitset_or(forbidden_regs, req->limited, n_regs); + } + } + } + } + if (good) return; - /* swap values around */ + /* create these arrays if we haven't yet */ + if (live_through_regs == NULL) { + rbitset_alloca(live_through_regs, n_regs); + } + + /* at this point we have to construct a bipartite matching problem to see + * which values should go to which registers + * Note: We're building the matrix in "reverse" - source registers are + * right, destinations left because this will produce the solution + * in the format required for permute_values. + */ bp = hungarian_new(n_regs, n_regs, HUNGARIAN_MATCH_PERFECT); /* add all combinations, then remove not allowed ones */ for (l = 0; l < n_regs; ++l) { - if (bitset_is_set(ignore_regs, l)) { - hungarian_add(bp, l, l, 90); + if (!rbitset_is_set(normal_regs, l)) { + hungarian_add(bp, l, l, 1); continue; } for (r = 0; r < n_regs; ++r) { - if (bitset_is_set(ignore_regs, r)) + if (!rbitset_is_set(normal_regs, r)) + continue; + /* livethrough values may not use constrainted output registers */ + if (rbitset_is_set(live_through_regs, l) + && rbitset_is_set(forbidden_regs, r)) continue; - hungarian_add(bp, l, r, l == r ? 90 : 89); + hungarian_add(bp, r, l, l == r ? 9 : 8); } } for (i = 0; i < arity; ++i) { ir_node *op = get_irn_n(node, i); + const arch_register_t *reg; const arch_register_req_t *req; const unsigned *limited; - unsigned current_reg; + unsigned current_reg; if (!arch_irn_consider_in_reg_alloc(cls, op)) continue; req = arch_get_register_req(node, i); - if ((req->type & arch_register_req_type_limited) == 0) + if (!(req->type & arch_register_req_type_limited)) continue; limited = req->limited; - current_reg = get_current_reg(op); + reg = arch_get_irn_register(op); + current_reg = arch_register_get_index(reg); for (r = 0; r < n_regs; ++r) { if (rbitset_is_set(limited, r)) continue; - hungarian_remv(bp, current_reg, r); + hungarian_remv(bp, r, current_reg); } } - hungarian_print_costmatrix(bp, 1); + //hungarian_print_cost_matrix(bp, 1); hungarian_prepare_cost_matrix(bp, HUNGARIAN_MODE_MAXIMIZE_UTIL); assignment = ALLOCAN(unsigned, n_regs); - res = hungarian_solve(bp, (int*) assignment, &dummy, 0); + res = hungarian_solve(bp, (int*) assignment, NULL, 0); assert(res == 0); - printf("Swap result:"); - for (p = 0; p < n_regs; ++p) { - printf(" %d", assignment[p]); +#if 0 + fprintf(stderr, "Swap result:"); + for (i = 0; i < (int) n_regs; ++i) { + fprintf(stderr, " %d", assignment[i]); } - printf("\n"); + fprintf(stderr, "\n"); +#endif hungarian_free(bp); - permutate_values(live_nodes, node, assignment); + permute_values(live_nodes, node, assignment); } /** test wether a node @p n is a copy of the value of node @p of */ -static int is_copy_of(ir_node *n, ir_node *of) +static bool is_copy_of(ir_node *value, ir_node *test_value) { - allocation_info_t *of_info; - - if (n == NULL) - return 0; - - if (n == of) - return 1; + allocation_info_t *test_info; + allocation_info_t *info; - of_info = get_allocation_info(of); - if (!irn_visited(n)) - return 0; + if (value == test_value) + return true; - return of_info == get_irn_link(n); + info = get_allocation_info(value); + test_info = get_allocation_info(test_value); + return test_info->original_value == info->original_value; } -/** find a value in the end-assignment of a basic block +/** + * find a value in the end-assignment of a basic block * @returns the index into the assignment array if found * -1 if not found */ static int find_value_in_block_info(block_info_t *info, ir_node *value) { - unsigned r; - assignment_t *assignments = info->assignments; + unsigned r; + ir_node **assignments = info->assignments; for (r = 0; r < n_regs; ++r) { - const assignment_t *assignment = &assignments[r]; - if (is_copy_of(assignment->value, value)) + ir_node *a_value = assignments[r]; + + if (a_value == NULL) + continue; + if (is_copy_of(a_value, value)) return (int) r; } @@ -871,12 +1340,12 @@ static int find_value_in_block_info(block_info_t *info, ir_node *value) */ static void add_phi_permutations(ir_node *block, int p) { - unsigned r; - unsigned *permutation; - assignment_t *old_assignments; - int need_permutation; - ir_node *node; - ir_node *pred = get_Block_cfgpred_block(block, p); + unsigned r; + unsigned *permutation; + ir_node **old_assignments; + bool need_permutation; + ir_node *node; + ir_node *pred = get_Block_cfgpred_block(block, p); block_info_t *pred_info = get_block_info(pred); @@ -890,7 +1359,7 @@ static void add_phi_permutations(ir_node *block, int p) } /* check phi nodes */ - need_permutation = 0; + need_permutation = false; node = sched_first(block); for ( ; is_Phi(node); node = sched_next(node)) { const arch_register_t *reg; @@ -902,166 +1371,358 @@ static void add_phi_permutations(ir_node *block, int p) continue; op = get_Phi_pred(node, p); + if (!arch_irn_consider_in_reg_alloc(cls, op)) + continue; + a = find_value_in_block_info(pred_info, op); assert(a >= 0); - reg = arch_get_irn_register(node); + reg = arch_get_irn_register(node); regn = arch_register_get_index(reg); if (regn != a) { permutation[regn] = a; - need_permutation = 1; + need_permutation = true; } } - old_assignments = assignments; - assignments = pred_info->assignments; - permutate_values(NULL, be_get_end_of_block_insertion_point(pred), - permutation); - assignments = old_assignments; + if (need_permutation) { + /* permute values at end of predecessor */ + old_assignments = assignments; + assignments = pred_info->assignments; + permute_values(NULL, be_get_end_of_block_insertion_point(pred), + permutation); + assignments = old_assignments; + } + /* change phi nodes to use the copied values */ node = sched_first(block); for ( ; is_Phi(node); node = sched_next(node)) { - int a; - ir_node *op; + int a; + ir_node *op; if (!arch_irn_consider_in_reg_alloc(cls, node)) continue; op = get_Phi_pred(node, p); - /* TODO: optimize */ - a = find_value_in_block_info(pred_info, op); - assert(a >= 0); + /* no need to do anything for Unknown inputs */ + if (!arch_irn_consider_in_reg_alloc(cls, op)) + continue; - op = pred_info->assignments[a].value; + /* we have permuted all values into the correct registers so we can + simply query which value occupies the phis register in the + predecessor */ + a = arch_register_get_index(arch_get_irn_register(node)); + op = pred_info->assignments[a]; set_Phi_pred(node, p, op); } } +/** + * Set preferences for a phis register based on the registers used on the + * phi inputs. + */ +static void adapt_phi_prefs(ir_node *phi) +{ + int i; + int arity = get_irn_arity(phi); + ir_node *block = get_nodes_block(phi); + allocation_info_t *info = get_allocation_info(phi); + + for (i = 0; i < arity; ++i) { + ir_node *op = get_irn_n(phi, i); + const arch_register_t *reg = arch_get_irn_register(op); + ir_node *pred_block; + block_info_t *pred_block_info; + float weight; + unsigned r; + + if (reg == NULL) + continue; + /* we only give the bonus if the predecessor already has registers + * assigned, otherwise we only see a dummy value + * and any conclusions about its register are useless */ + pred_block = get_Block_cfgpred_block(block, i); + pred_block_info = get_block_info(pred_block); + if (!pred_block_info->processed) + continue; + + /* give bonus for already assigned register */ + weight = get_block_execfreq(execfreqs, pred_block); + r = arch_register_get_index(reg); + info->prefs[r] += weight * AFF_PHI; + } +} + +/** + * After a phi has been assigned a register propagate preference inputs + * to the phi inputs. + */ +static void propagate_phi_register(ir_node *phi, unsigned assigned_r) +{ + int i; + ir_node *block = get_nodes_block(phi); + int arity = get_irn_arity(phi); + + for (i = 0; i < arity; ++i) { + ir_node *op = get_Phi_pred(phi, i); + allocation_info_t *info = get_allocation_info(op); + ir_node *pred_block = get_Block_cfgpred_block(block, i); + unsigned r; + float weight + = get_block_execfreq(execfreqs, pred_block) * AFF_PHI; + + if (info->prefs[assigned_r] >= weight) + continue; + + /* promote the prefered register */ + for (r = 0; r < n_regs; ++r) { + if (r == assigned_r) { + info->prefs[r] = AFF_PHI * weight; + } else { + info->prefs[r] -= AFF_PHI * weight; + } + } + + if (is_Phi(op)) + propagate_phi_register(op, assigned_r); + } +} + +static void assign_phi_registers(ir_node *block) +{ + int n_phis = 0; + int n; + int res; + int *assignment; + ir_node *node; + hungarian_problem_t *bp; + + /* count phi nodes */ + sched_foreach(block, node) { + if (!is_Phi(node)) + break; + if (!arch_irn_consider_in_reg_alloc(cls, node)) + continue; + ++n_phis; + } + + if (n_phis == 0) + return; + + /* build a bipartite matching problem for all phi nodes */ + bp = hungarian_new(n_phis, n_regs, HUNGARIAN_MATCH_PERFECT); + n = 0; + sched_foreach(block, node) { + unsigned r; + + allocation_info_t *info; + if (!is_Phi(node)) + break; + if (!arch_irn_consider_in_reg_alloc(cls, node)) + continue; + + /* give boni for predecessor colorings */ + adapt_phi_prefs(node); + /* add stuff to bipartite problem */ + info = get_allocation_info(node); + DB((dbg, LEVEL_3, "Prefs for %+F: ", node)); + for (r = 0; r < n_regs; ++r) { + float costs; + + if (!rbitset_is_set(normal_regs, r)) + continue; + + costs = info->prefs[r]; + costs = costs < 0 ? -logf(-costs+1) : logf(costs+1); + costs *= 100; + costs += 10000; + hungarian_add(bp, n, r, costs); + DB((dbg, LEVEL_3, " %s(%f)", arch_register_for_index(cls, r)->name, + info->prefs[r])); + } + DB((dbg, LEVEL_3, "\n")); + ++n; + } + + //hungarian_print_cost_matrix(bp, 7); + hungarian_prepare_cost_matrix(bp, HUNGARIAN_MODE_MAXIMIZE_UTIL); + + assignment = ALLOCAN(int, n_regs); + res = hungarian_solve(bp, assignment, NULL, 0); + assert(res == 0); + + /* apply results */ + n = 0; + sched_foreach(block, node) { + unsigned r; + const arch_register_t *reg; + + if (!is_Phi(node)) + break; + if (!arch_irn_consider_in_reg_alloc(cls, node)) + continue; + + r = assignment[n++]; + assert(rbitset_is_set(normal_regs, r)); + reg = arch_register_for_index(cls, r); + DB((dbg, LEVEL_2, "Assign %+F -> %s\n", node, reg->name)); + use_reg(node, reg); + + /* adapt preferences for phi inputs */ + propagate_phi_register(node, r); + } +} + /** * Walker: assign registers to all nodes of a block that - * needs registers from the currently considered register class. + * need registers from the currently considered register class. */ static void allocate_coalesce_block(ir_node *block, void *data) { int i; - unsigned r; ir_nodeset_t live_nodes; ir_nodeset_iterator_t iter; - ir_node *node, *start; + ir_node *node; int n_preds; block_info_t *block_info; block_info_t **pred_block_infos; + ir_node **phi_ins; + unsigned *forbidden_regs; /**< collects registers which must + not be used for optimistic splits */ (void) data; - DB((dbg, LEVEL_2, "Allocating in block %+F\n", - block)); + DB((dbg, LEVEL_2, "* Block %+F\n", block)); /* clear assignments */ block_info = get_block_info(block); assignments = block_info->assignments; - for (r = 0; r < n_regs; ++r) { - assignment_t *assignment = &assignments[r]; - ir_node *value = assignment->value; - allocation_info_t *info; - - if (value == NULL) - continue; - - info = get_allocation_info(value); - info->current_assignment = assignment; - } - ir_nodeset_init(&live_nodes); /* gather regalloc infos of predecessor blocks */ - n_preds = get_Block_n_cfgpreds(block); - pred_block_infos = ALLOCAN(block_info_t*, n_preds); + n_preds = get_Block_n_cfgpreds(block); + pred_block_infos = ALLOCAN(block_info_t*, n_preds); for (i = 0; i < n_preds; ++i) { - ir_node *pred = get_Block_cfgpred_block(block, i); - pred_block_infos[i] = get_block_info(pred); + ir_node *pred = get_Block_cfgpred_block(block, i); + block_info_t *pred_info = get_block_info(pred); + pred_block_infos[i] = pred_info; } + phi_ins = ALLOCAN(ir_node*, n_preds); + /* collect live-in nodes and preassigned values */ be_lv_foreach(lv, block, be_lv_state_in, i) { const arch_register_t *reg; + int p; + bool need_phi = false; node = be_lv_get_irn(lv, block, i); if (!arch_irn_consider_in_reg_alloc(cls, node)) continue; - /* remember that this node is live at the beginning of the block */ - ir_nodeset_insert(&live_nodes, node); - - /* if the node already has a register assigned use it */ - reg = arch_get_irn_register(node); - if (reg != NULL) { - /* TODO: consult pred-block infos here. The value could be copied - away in some/all predecessor blocks. We need to construct - phi-nodes in this case. - We even need to construct some Phi_0 like constructs in cases - where the predecessor allocation is not determined yet. */ - use_reg(node, reg); + /* check all predecessors for this value, if it is not everywhere the + same or unknown then we have to construct a phi + (we collect the potential phi inputs here) */ + for (p = 0; p < n_preds; ++p) { + block_info_t *pred_info = pred_block_infos[p]; + + if (!pred_info->processed) { + /* use node for now, it will get fixed later */ + phi_ins[p] = node; + need_phi = true; + } else { + int a = find_value_in_block_info(pred_info, node); + + /* must live out of predecessor */ + assert(a >= 0); + phi_ins[p] = pred_info->assignments[a]; + /* different value from last time? then we need a phi */ + if (p > 0 && phi_ins[p-1] != phi_ins[p]) { + need_phi = true; + } + } } - } - /* handle phis... */ - node = sched_first(block); - for ( ; is_Phi(node); node = sched_next(node)) { - const arch_register_t *reg; + if (need_phi) { + ir_mode *mode = get_irn_mode(node); + const arch_register_req_t *req = get_default_req_current_cls(); + ir_node *phi; + int i; - if (!arch_irn_consider_in_reg_alloc(cls, node)) - continue; + phi = new_r_Phi(block, n_preds, phi_ins, mode); + be_set_phi_reg_req(phi, req); + + DB((dbg, LEVEL_3, "Create Phi %+F (for %+F) -", phi, node)); +#ifdef DEBUG_libfirm + for (i = 0; i < n_preds; ++i) { + DB((dbg, LEVEL_3, " %+F", phi_ins[i])); + } + DB((dbg, LEVEL_3, "\n")); +#endif + mark_as_copy_of(phi, node); + sched_add_after(block, phi); + + node = phi; + } else { + allocation_info_t *info = get_allocation_info(node); + info->current_value = phi_ins[0]; - /* fill in regs already assigned */ + /* Grab 1 of the inputs we constructed (might not be the same as + * "node" as we could see the same copy of the value in all + * predecessors */ + node = phi_ins[0]; + } + + /* if the node already has a register assigned use it */ reg = arch_get_irn_register(node); if (reg != NULL) { use_reg(node, reg); - } else { - /* TODO: give boni for registers already assigned at the - predecessors */ - assign_reg(block, node); } + + /* remember that this node is live at the beginning of the block */ + ir_nodeset_insert(&live_nodes, node); } - start = node; - /* assign regs for live-in values */ - foreach_ir_nodeset(&live_nodes, node, iter) { - const arch_register_t *reg = arch_get_irn_register(node); - if (reg != NULL) - continue; + rbitset_alloca(forbidden_regs, n_regs); - assign_reg(block, node); - } + /* handle phis... */ + assign_phi_registers(block); - /* permutate values at end of predecessor blocks in case of phi-nodes */ - if (n_preds > 1) { - int p; - for (p = 0; p < n_preds; ++p) { - add_phi_permutations(block, p); - } + /* all live-ins must have a register */ +#ifdef DEBUG_libfirm + foreach_ir_nodeset(&live_nodes, node, iter) { + const arch_register_t *reg = arch_get_irn_register(node); + assert(reg != NULL); } +#endif /* assign instructions in the block */ - for (node = start; !sched_is_end(node); node = sched_next(node)) { - int arity = get_irn_arity(node); + sched_foreach(block, node) { int i; + int arity; + + /* phis are already assigned */ + if (is_Phi(node)) + continue; + + rewire_inputs(node); /* enforce use constraints */ - enforce_constraints(&live_nodes, node); + rbitset_clear_all(forbidden_regs, n_regs); + enforce_constraints(&live_nodes, node, forbidden_regs); - /* exchange values to copied values where needed */ - for (i = 0; i < arity; ++i) { - ir_node *op = get_irn_n(node, i); - assignment_t *assignment; + rewire_inputs(node); + /* we may not use registers used for inputs for optimistic splits */ + arity = get_irn_arity(node); + for (i = 0; i < arity; ++i) { + ir_node *op = get_irn_n(node, i); + const arch_register_t *reg; if (!arch_irn_consider_in_reg_alloc(cls, op)) continue; - assignment = get_current_assignment(op); - assert(assignment != NULL); - if (op != assignment->value) { - set_irn_n(node, i, assignment->value); - } + + reg = arch_get_irn_register(op); + rbitset_set(forbidden_regs, arch_register_get_index(reg)); } /* free registers of values last used at this instruction */ @@ -1075,17 +1736,25 @@ static void allocate_coalesce_block(ir_node *block, void *data) ir_node *proj = get_edge_src_irn(edge); if (!arch_irn_consider_in_reg_alloc(cls, proj)) continue; - assign_reg(block, proj); + assign_reg(block, proj, forbidden_regs); } } else if (arch_irn_consider_in_reg_alloc(cls, node)) { - assign_reg(block, node); + assign_reg(block, node, forbidden_regs); } } ir_nodeset_destroy(&live_nodes); assignments = NULL; - block_info->processed = 1; + block_info->processed = true; + + /* permute values at end of predecessor blocks in case of phi-nodes */ + if (n_preds > 1) { + int p; + for (p = 0; p < n_preds; ++p) { + add_phi_permutations(block, p); + } + } /* if we have exactly 1 successor then we might be able to produce phi copies now */ @@ -1102,27 +1771,154 @@ static void allocate_coalesce_block(ir_node *block, void *data) } } +typedef struct block_costs_t block_costs_t; +struct block_costs_t { + float costs; /**< costs of the block */ + int dfs_num; /**< depth first search number (to detect backedges) */ +}; + +static int cmp_block_costs(const void *d1, const void *d2) +{ + const ir_node * const *block1 = d1; + const ir_node * const *block2 = d2; + const block_costs_t *info1 = get_irn_link(*block1); + const block_costs_t *info2 = get_irn_link(*block2); + return QSORT_CMP(info2->costs, info1->costs); +} + +static void determine_block_order(void) +{ + int i; + ir_node **blocklist = be_get_cfgpostorder(irg); + int n_blocks = ARR_LEN(blocklist); + int dfs_num = 0; + pdeq *worklist = new_pdeq(); + ir_node **order = XMALLOCN(ir_node*, n_blocks); + int order_p = 0; + + /* clear block links... */ + for (i = 0; i < n_blocks; ++i) { + ir_node *block = blocklist[i]; + set_irn_link(block, NULL); + } + + /* walk blocks in reverse postorder, the costs for each block are the + * sum of the costs of its predecessors (excluding the costs on backedges + * which we can't determine) */ + for (i = n_blocks-1; i >= 0; --i) { + block_costs_t *cost_info; + ir_node *block = blocklist[i]; + + float execfreq = get_block_execfreq(execfreqs, block); + float costs = execfreq; + int n_cfgpreds = get_Block_n_cfgpreds(block); + int p; + for (p = 0; p < n_cfgpreds; ++p) { + ir_node *pred_block = get_Block_cfgpred_block(block, p); + block_costs_t *pred_costs = get_irn_link(pred_block); + /* we don't have any info for backedges */ + if (pred_costs == NULL) + continue; + costs += pred_costs->costs; + } + + cost_info = OALLOCZ(&obst, block_costs_t); + cost_info->costs = costs; + cost_info->dfs_num = dfs_num++; + set_irn_link(block, cost_info); + } + + /* sort array by block costs */ + qsort(blocklist, n_blocks, sizeof(blocklist[0]), cmp_block_costs); + + ir_reserve_resources(irg, IR_RESOURCE_BLOCK_VISITED); + inc_irg_block_visited(irg); + + for (i = 0; i < n_blocks; ++i) { + ir_node *block = blocklist[i]; + if (Block_block_visited(block)) + continue; + + /* continually add predecessors with highest costs to worklist + * (without using backedges) */ + do { + block_costs_t *info = get_irn_link(block); + ir_node *best_pred = NULL; + float best_costs = -1; + int n_cfgpred = get_Block_n_cfgpreds(block); + int i; + + pdeq_putr(worklist, block); + mark_Block_block_visited(block); + for (i = 0; i < n_cfgpred; ++i) { + ir_node *pred_block = get_Block_cfgpred_block(block, i); + block_costs_t *pred_info = get_irn_link(pred_block); + + /* ignore backedges */ + if (pred_info->dfs_num > info->dfs_num) + continue; + + if (info->costs > best_costs) { + best_costs = info->costs; + best_pred = pred_block; + } + } + block = best_pred; + } while(block != NULL && !Block_block_visited(block)); + + /* now put all nodes in the worklist in our final order */ + while (!pdeq_empty(worklist)) { + ir_node *pblock = pdeq_getr(worklist); + assert(order_p < n_blocks); + order[order_p++] = pblock; + } + } + assert(order_p == n_blocks); + del_pdeq(worklist); + + ir_free_resources(irg, IR_RESOURCE_BLOCK_VISITED); + + DEL_ARR_F(blocklist); + + obstack_free(&obst, NULL); + obstack_init(&obst); + + block_order = order; + n_block_order = n_blocks; +} + /** * Run the register allocator for the current register class. */ static void be_straight_alloc_cls(void) { - n_regs = arch_register_class_n_regs(cls); - lv = be_assure_liveness(birg); + int i; + + lv = be_assure_liveness(birg); be_liveness_assure_sets(lv); be_liveness_assure_chk(lv); - assignments = NULL; - - ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK | IR_RESOURCE_IRN_VISITED); - inc_irg_visited(irg); + ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK); DB((dbg, LEVEL_2, "=== Allocating registers of %s ===\n", cls->name)); + be_clear_links(irg); irg_block_walk_graph(irg, NULL, analyze_block, NULL); - irg_block_walk_graph(irg, NULL, allocate_coalesce_block, NULL); + combine_congruence_classes(); + + for (i = 0; i < n_block_order; ++i) { + ir_node *block = block_order[i]; + allocate_coalesce_block(block, NULL); + } - ir_free_resources(irg, IR_RESOURCE_IRN_LINK | IR_RESOURCE_IRN_VISITED); + ir_free_resources(irg, IR_RESOURCE_IRN_LINK); +} + +static void dump(int mask, ir_graph *irg, const char *suffix, + void (*dumper)(ir_graph *, const char *)) +{ + if(birg->main_env->options->dump_flags & mask) + be_dump(irg, suffix, dumper); } /** @@ -1135,6 +1931,8 @@ static void spill(void) be_pre_spill_prepare_constr(birg, cls); BE_TIMER_POP(t_ra_constr); + dump(DUMP_RA, irg, "-spillprepare", dump_ir_block_graph_sched); + /* spill */ BE_TIMER_PUSH(t_ra_spill); be_do_spill(birg, cls); @@ -1143,6 +1941,8 @@ static void spill(void) BE_TIMER_PUSH(t_ra_spill_apply); check_for_memory_operands(irg); BE_TIMER_POP(t_ra_spill_apply); + + dump(DUMP_RA, irg, "-spill", dump_ir_block_graph_sched); } /** @@ -1160,28 +1960,29 @@ static void be_straight_alloc(be_irg_t *new_birg) irg = be_get_birg_irg(birg); execfreqs = birg->exec_freq; - /* TODO: extract some of the stuff from bechordal allocator, like - * statistics, time measurements, etc. and use them here too */ + /* determine a good coloring order */ + determine_block_order(); for (c = 0; c < n_cls; ++c) { - cls = arch_env_get_reg_class(arch_env, c); + cls = arch_env_get_reg_class(arch_env, c); + default_cls_req = NULL; if (arch_register_class_flags(cls) & arch_register_class_flag_manual_ra) continue; - stat_ev_ctx_push_str("bestraight_cls", cls->name); + stat_ev_ctx_push_str("regcls", cls->name); - n_regs = cls->n_regs; - ignore_regs = bitset_malloc(n_regs); - be_put_ignore_regs(birg, cls, ignore_regs); + n_regs = arch_register_class_n_regs(cls); + normal_regs = rbitset_malloc(n_regs); + be_abi_set_non_ignore_regs(birg->abi, cls, normal_regs); spill(); /* verify schedule and register pressure */ BE_TIMER_PUSH(t_verify); - if (birg->main_env->options->vrfy_option == BE_CH_VRFY_WARN) { + if (birg->main_env->options->vrfy_option == BE_VRFY_WARN) { be_verify_schedule(birg); be_verify_register_pressure(birg, cls, irg); - } else if (birg->main_env->options->vrfy_option == BE_CH_VRFY_ASSERT) { + } else if (birg->main_env->options->vrfy_option == BE_VRFY_ASSERT) { assert(be_verify_schedule(birg) && "Schedule verification failed"); assert(be_verify_register_pressure(birg, cls, irg) && "Register pressure verification failed"); @@ -1192,15 +1993,23 @@ static void be_straight_alloc(be_irg_t *new_birg) be_straight_alloc_cls(); BE_TIMER_POP(t_ra_color); - bitset_free(ignore_regs); + /* we most probably constructed new Phis so liveness info is invalid + * now */ + /* TODO: test liveness_introduce */ + be_liveness_invalidate(lv); + free(normal_regs); - stat_ev_ctx_pop("bestraight_cls"); + stat_ev_ctx_pop("regcls"); } + BE_TIMER_PUSH(t_ra_spill_apply); + be_abi_fix_stack_nodes(birg->abi); + BE_TIMER_POP(t_ra_spill_apply); + BE_TIMER_PUSH(t_verify); - if (birg->main_env->options->vrfy_option == BE_CH_VRFY_WARN) { + if (birg->main_env->options->vrfy_option == BE_VRFY_WARN) { be_verify_register_allocation(birg); - } else if(birg->main_env->options->vrfy_option == BE_CH_VRFY_ASSERT) { + } else if (birg->main_env->options->vrfy_option == BE_VRFY_ASSERT) { assert(be_verify_register_allocation(birg) && "Register allocation invalid"); }