X-Git-Url: http://nsz.repo.hu/git/?a=blobdiff_plain;f=ir%2Fbe%2Fbeprefalloc.c;h=d0080e76367ea9b100933fceb8a38b6aaf5e5bfd;hb=b2709c45d975072f86e99e29083eda4b43a0210b;hp=85b7f0c5a975329518c661715d8244619a3b61a6;hpb=f8cc15664f571aa7ef89d6f6bc8d5bd2b8ca7d53;p=libfirm diff --git a/ir/be/beprefalloc.c b/ir/be/beprefalloc.c index 85b7f0c5a..d0080e763 100644 --- a/ir/be/beprefalloc.c +++ b/ir/be/beprefalloc.c @@ -60,7 +60,7 @@ #include "unionfind.h" #include "pdeq.h" #include "hungarian.h" - +#include "statev.h" #include "beabi.h" #include "bechordal_t.h" #include "be.h" @@ -90,22 +90,11 @@ static struct obstack obst; static ir_graph *irg; static const arch_register_class_t *cls; static be_lv_t *lv; -static const ir_exec_freq *execfreqs; static unsigned n_regs; static unsigned *normal_regs; static int *congruence_classes; static ir_node **block_order; static size_t n_block_order; -static int create_preferences = true; -static int create_congruence_classes = true; -static int propagate_phi_registers = true; - -static const lc_opt_table_entry_t options[] = { - LC_OPT_ENT_BOOL("prefs", "use preference based coloring", &create_preferences), - LC_OPT_ENT_BOOL("congruences", "create congruence classes", &create_congruence_classes), - LC_OPT_ENT_BOOL("prop_phi", "propagate phi registers", &propagate_phi_registers), - LC_OPT_LAST -}; /** currently active assignments (while processing a basic block) * maps registers to values(their current copies) */ @@ -119,7 +108,7 @@ struct allocation_info_t { unsigned last_uses[2]; /**< bitset indicating last uses (input pos) */ ir_node *current_value; /**< copy of the value that should be used */ ir_node *original_value; /**< for copies point to original value */ - float prefs[0]; /**< register preferences */ + float prefs[]; /**< register preferences */ }; typedef struct allocation_info_t allocation_info_t; @@ -133,7 +122,7 @@ typedef struct reg_pref_t reg_pref_t; /** per basic-block information */ struct block_info_t { bool processed; /**< indicate whether block is processed */ - ir_node *assignments[0]; /**< register assignments at end of block */ + ir_node *assignments[]; /**< register assignments at end of block */ }; typedef struct block_info_t block_info_t; @@ -185,12 +174,11 @@ static block_info_t *get_block_info(ir_node *block) */ static void mark_as_copy_of(ir_node *copy, ir_node *value) { - ir_node *original; allocation_info_t *info = get_allocation_info(value); allocation_info_t *copy_info = get_allocation_info(copy); /* find original value */ - original = info->original_value; + ir_node *original = info->original_value; if (original != value) { info = get_allocation_info(original); } @@ -218,14 +206,10 @@ static void give_penalties_for_limits(const ir_nodeset_t *live_nodes, float penalty, const unsigned* limited, ir_node *node) { - ir_nodeset_iterator_t iter; - unsigned r; - size_t n_allowed; - allocation_info_t *info = get_allocation_info(node); - ir_node *neighbor; + allocation_info_t *info = get_allocation_info(node); /* give penalty for all forbidden regs */ - for (r = 0; r < n_regs; ++r) { + for (unsigned r = 0; r < n_regs; ++r) { if (rbitset_is_set(limited, r)) continue; @@ -237,7 +221,7 @@ static void give_penalties_for_limits(const ir_nodeset_t *live_nodes, return; penalty *= NEIGHBOR_FACTOR; - n_allowed = rbitset_popcount(limited, n_regs); + size_t n_allowed = rbitset_popcount(limited, n_regs); if (n_allowed > 1) { /* only create a very weak penalty if multiple regs are allowed */ penalty = (penalty * 0.8f) / n_allowed; @@ -251,7 +235,7 @@ static void give_penalties_for_limits(const ir_nodeset_t *live_nodes, continue; neighbor_info = get_allocation_info(neighbor); - for (r = 0; r < n_regs; ++r) { + for (unsigned r = 0; r < n_regs; ++r) { if (!rbitset_is_set(limited, r)) continue; @@ -269,32 +253,25 @@ static void give_penalties_for_limits(const ir_nodeset_t *live_nodes, * @param weight the weight * @param node the current node */ -static void check_defs(const ir_nodeset_t *live_nodes, float weight, - ir_node *node) +static void check_defs(ir_nodeset_t const *const live_nodes, float const weight, ir_node *const node, arch_register_req_t const *const req) { - const arch_register_req_t *req = arch_get_irn_register_req(node); - if (req->type & arch_register_req_type_limited) { + if (arch_register_req_is(req, limited)) { const unsigned *limited = req->limited; float penalty = weight * DEF_FACTOR; give_penalties_for_limits(live_nodes, penalty, limited, node); } - if (req->type & arch_register_req_type_should_be_same) { + if (arch_register_req_is(req, should_be_same)) { ir_node *insn = skip_Proj(node); allocation_info_t *info = get_allocation_info(node); int arity = get_irn_arity(insn); - int i; float factor = 1.0f / rbitset_popcount(&req->other_same, arity); - for (i = 0; i < arity; ++i) { - ir_node *op; - unsigned r; - allocation_info_t *op_info; - + for (int i = 0; i < arity; ++i) { if (!rbitset_is_set(&req->other_same, i)) continue; - op = get_irn_n(insn, i); + ir_node *op = get_irn_n(insn, i); /* if we the value at the should_be_same input doesn't die at the * node, then it is no use to propagate the constraints (since a @@ -302,8 +279,8 @@ static void check_defs(const ir_nodeset_t *live_nodes, float weight, if (ir_nodeset_contains(live_nodes, op)) continue; - op_info = get_allocation_info(op); - for (r = 0; r < n_regs; ++r) { + allocation_info_t *op_info = get_allocation_info(op); + for (unsigned r = 0; r < n_regs; ++r) { op_info->prefs[r] += info->prefs[r] * factor; } } @@ -316,42 +293,34 @@ static void check_defs(const ir_nodeset_t *live_nodes, float weight, */ static void analyze_block(ir_node *block, void *data) { - float weight = (float)get_block_execfreq(execfreqs, block); - ir_nodeset_t live_nodes; - ir_node *node; + float weight = (float)get_block_execfreq(block); + ir_nodeset_t live_nodes; (void) data; ir_nodeset_init(&live_nodes); be_liveness_end_of_block(lv, cls, block, &live_nodes); sched_foreach_reverse(block, node) { - allocation_info_t *info; - int i; - int arity; - if (is_Phi(node)) break; - if (create_preferences) { - ir_node *value; - be_foreach_definition(node, cls, value, - check_defs(&live_nodes, weight, value); - ); - } + be_foreach_definition(node, cls, value, req, + check_defs(&live_nodes, weight, value, req); + ); /* mark last uses */ - arity = get_irn_arity(node); + int arity = get_irn_arity(node); /* the allocation info node currently only uses 1 unsigned value to mark last used inputs. So we will fail for a node with more than 32 inputs. */ + allocation_info_t *info = get_allocation_info(node); if (arity >= (int) sizeof(info->last_uses) * 8) { panic("Node with more than %d inputs not supported yet", (int) sizeof(info->last_uses) * 8); } - info = get_allocation_info(node); - for (i = 0; i < arity; ++i) { + for (int i = 0; i < arity; ++i) { ir_node *op = get_irn_n(node, i); const arch_register_req_t *req = arch_get_irn_register_req(op); if (req->cls != cls) @@ -365,57 +334,37 @@ static void analyze_block(ir_node *block, void *data) be_liveness_transfer(cls, node, &live_nodes); - if (create_preferences) { - /* update weights based on usage constraints */ - for (i = 0; i < arity; ++i) { - const arch_register_req_t *req; - const unsigned *limited; - ir_node *op = get_irn_n(node, i); - - if (!arch_irn_consider_in_reg_alloc(cls, op)) - continue; - - req = arch_get_irn_register_req_in(node, i); - if (!(req->type & arch_register_req_type_limited)) - continue; + /* update weights based on usage constraints */ + be_foreach_use(node, cls, req, op, op_req, + if (!arch_register_req_is(req, limited)) + continue; - limited = req->limited; - give_penalties_for_limits(&live_nodes, weight * USE_FACTOR, - limited, op); - } - } + give_penalties_for_limits(&live_nodes, weight * USE_FACTOR, req->limited, op); + ); } ir_nodeset_destroy(&live_nodes); } -static void congruence_def(ir_nodeset_t *live_nodes, const ir_node *node) +static void congruence_def(ir_nodeset_t *const live_nodes, ir_node const *const node, arch_register_req_t const *const req) { - const arch_register_req_t *req = arch_get_irn_register_req(node); - /* should be same constraint? */ - if (req->type & arch_register_req_type_should_be_same) { - const ir_node *insn = skip_Proj_const(node); - int arity = get_irn_arity(insn); - int i; - unsigned node_idx = get_irn_idx(node); - node_idx = uf_find(congruence_classes, node_idx); - - for (i = 0; i < arity; ++i) { - ir_node *live; - ir_node *op; - int op_idx; - ir_nodeset_iterator_t iter; - bool interferes = false; + if (arch_register_req_is(req, should_be_same)) { + const ir_node *insn = skip_Proj_const(node); + int arity = get_irn_arity(insn); + unsigned node_idx = get_irn_idx(node); + node_idx = uf_find(congruence_classes, node_idx); + for (int i = 0; i < arity; ++i) { if (!rbitset_is_set(&req->other_same, i)) continue; - op = get_irn_n(insn, i); - op_idx = get_irn_idx(op); + ir_node *op = get_irn_n(insn, i); + int op_idx = get_irn_idx(op); op_idx = uf_find(congruence_classes, op_idx); /* do we interfere with the value */ + bool interferes = false; foreach_ir_nodeset(live_nodes, live, iter) { int lv_idx = get_irn_idx(live); lv_idx = uf_find(congruence_classes, lv_idx); @@ -428,7 +377,7 @@ static void congruence_def(ir_nodeset_t *live_nodes, const ir_node *node) if (interferes) continue; - node_idx = uf_union(congruence_classes, node_idx, op_idx); + uf_union(congruence_classes, node_idx, op_idx); DB((dbg, LEVEL_3, "Merge %+F and %+F congruence classes\n", node, op)); /* one should_be_same is enough... */ @@ -439,53 +388,48 @@ static void congruence_def(ir_nodeset_t *live_nodes, const ir_node *node) static void create_congruence_class(ir_node *block, void *data) { - ir_nodeset_t live_nodes; - ir_node *node; + ir_nodeset_t live_nodes; (void) data; ir_nodeset_init(&live_nodes); be_liveness_end_of_block(lv, cls, block, &live_nodes); /* check should be same constraints */ + ir_node *last_phi = NULL; sched_foreach_reverse(block, node) { - ir_node *value; - if (is_Phi(node)) + if (is_Phi(node)) { + last_phi = node; break; + } - be_foreach_definition(node, cls, value, - congruence_def(&live_nodes, value); + be_foreach_definition(node, cls, value, req, + congruence_def(&live_nodes, value, req); ); be_liveness_transfer(cls, node, &live_nodes); } + if (!last_phi) { + ir_nodeset_destroy(&live_nodes); + return; + } /* check phi congruence classes */ - sched_foreach_reverse_from(node, node) { - int i; - int arity; - int node_idx; - assert(is_Phi(node)); + sched_foreach_reverse_from(last_phi, phi) { + assert(is_Phi(phi)); - if (!arch_irn_consider_in_reg_alloc(cls, node)) + if (!arch_irn_consider_in_reg_alloc(cls, phi)) continue; - node_idx = get_irn_idx(node); + int node_idx = get_irn_idx(phi); node_idx = uf_find(congruence_classes, node_idx); - arity = get_irn_arity(node); - for (i = 0; i < arity; ++i) { - bool interferes = false; - ir_nodeset_iterator_t iter; - unsigned r; - int old_node_idx; - ir_node *live; - ir_node *phi; - allocation_info_t *head_info; - allocation_info_t *other_info; - ir_node *op = get_Phi_pred(node, i); - int op_idx = get_irn_idx(op); + int arity = get_irn_arity(phi); + for (int i = 0; i < arity; ++i) { + ir_node *op = get_Phi_pred(phi, i); + int op_idx = get_irn_idx(op); op_idx = uf_find(congruence_classes, op_idx); /* do we interfere with the value */ + bool interferes = false; foreach_ir_nodeset(&live_nodes, live, iter) { int lv_idx = get_irn_idx(live); lv_idx = uf_find(congruence_classes, lv_idx); @@ -519,30 +463,30 @@ static void create_congruence_class(ir_node *block, void *data) continue; /* merge the 2 congruence classes and sum up their preferences */ - old_node_idx = node_idx; + int old_node_idx = node_idx; node_idx = uf_union(congruence_classes, node_idx, op_idx); DB((dbg, LEVEL_3, "Merge %+F and %+F congruence classes\n", - node, op)); + phi, op)); old_node_idx = node_idx == old_node_idx ? op_idx : old_node_idx; - head_info = get_allocation_info(get_idx_irn(irg, node_idx)); - other_info = get_allocation_info(get_idx_irn(irg, old_node_idx)); - for (r = 0; r < n_regs; ++r) { + allocation_info_t *head_info + = get_allocation_info(get_idx_irn(irg, node_idx)); + allocation_info_t *other_info + = get_allocation_info(get_idx_irn(irg, old_node_idx)); + for (unsigned r = 0; r < n_regs; ++r) { head_info->prefs[r] += other_info->prefs[r]; } } } + ir_nodeset_destroy(&live_nodes); } static void set_congruence_prefs(ir_node *node, void *data) { - allocation_info_t *info; - allocation_info_t *head_info; + (void) data; unsigned node_idx = get_irn_idx(node); unsigned node_set = uf_find(congruence_classes, node_idx); - (void) data; - /* head of congruence class or not in any class */ if (node_set == node_idx) return; @@ -550,8 +494,9 @@ static void set_congruence_prefs(ir_node *node, void *data) if (!arch_irn_consider_in_reg_alloc(cls, node)) return; - head_info = get_allocation_info(get_idx_irn(irg, node_set)); - info = get_allocation_info(node); + ir_node *head = get_idx_irn(irg, node_set); + allocation_info_t *head_info = get_allocation_info(head); + allocation_info_t *info = get_allocation_info(node); memcpy(info->prefs, head_info->prefs, n_regs * sizeof(info->prefs[0])); } @@ -577,27 +522,28 @@ static void combine_congruence_classes(void) * @param node the node * @param reg the register */ -static void use_reg(ir_node *node, const arch_register_t *reg) +static void use_reg(ir_node *node, const arch_register_t *reg, unsigned width) { - unsigned r = arch_register_get_index(reg); - assignments[r] = node; + unsigned r = reg->index; + for (unsigned r0 = r; r0 < r + width; ++r0) + assignments[r0] = node; arch_set_irn_register(node, reg); } static void free_reg_of_value(ir_node *node) { - const arch_register_t *reg; - unsigned r; - if (!arch_irn_consider_in_reg_alloc(cls, node)) return; - reg = arch_get_irn_register(node); - r = arch_register_get_index(reg); + const arch_register_t *reg = arch_get_irn_register(node); + const arch_register_req_t *req = arch_get_irn_register_req(node); + unsigned r = reg->index; /* assignment->value may be NULL if a value is used at 2 inputs - so it gets freed twice. */ - assert(assignments[r] == node || assignments[r] == NULL); - assignments[r] = NULL; + * so it gets freed twice. */ + for (unsigned r0 = r; r0 < r + req->width; ++r0) { + assert(assignments[r0] == node || assignments[r0] == NULL); + assignments[r0] = NULL; + } } /** @@ -617,9 +563,7 @@ static int compare_reg_pref(const void *e1, const void *e2) static void fill_sort_candidates(reg_pref_t *regprefs, const allocation_info_t *info) { - unsigned r; - - for (r = 0; r < n_regs; ++r) { + for (unsigned r = 0; r < n_regs; ++r) { float pref = info->prefs[r]; regprefs[r].num = r; regprefs[r].pref = pref; @@ -632,45 +576,31 @@ static bool try_optimistic_split(ir_node *to_split, ir_node *before, float pref, float pref_delta, unsigned *forbidden_regs, int recursion) { - const arch_register_t *from_reg; - const arch_register_t *reg; - ir_node *original_insn; - ir_node *block; - ir_node *copy; - unsigned r = 0; - unsigned from_r; - unsigned i; - allocation_info_t *info = get_allocation_info(to_split); - reg_pref_t *prefs; - float delta = 0; - float split_threshold; - (void) pref; + unsigned r = 0; + allocation_info_t *info = get_allocation_info(to_split); + float delta = 0; - /* stupid hack: don't optimisticallt split don't spill nodes... + /* stupid hack: don't optimistically split don't spill nodes... * (so we don't split away the values produced because of * must_be_different constraints) */ - original_insn = skip_Proj(info->original_value); + ir_node *original_insn = skip_Proj(info->original_value); if (arch_get_irn_flags(original_insn) & arch_irn_flags_dont_spill) return false; - from_reg = arch_get_irn_register(to_split); - from_r = arch_register_get_index(from_reg); - block = get_nodes_block(before); - split_threshold = (float)get_block_execfreq(execfreqs, block) * SPLIT_DELTA; + const arch_register_t *from_reg = arch_get_irn_register(to_split); + unsigned from_r = from_reg->index; + ir_node *block = get_nodes_block(before); + float split_threshold = (float)get_block_execfreq(block) * SPLIT_DELTA; if (pref_delta < split_threshold*0.5) return false; /* find the best free position where we could move to */ - prefs = ALLOCAN(reg_pref_t, n_regs); + reg_pref_t *prefs = ALLOCAN(reg_pref_t, n_regs); fill_sort_candidates(prefs, info); + unsigned i; for (i = 0; i < n_regs; ++i) { - float apref; - float apref_delta; - bool res; - bool old_source_state; - /* we need a normal register which is not an output register an different from the current register of to_split */ r = prefs[i].num; @@ -697,17 +627,17 @@ static bool try_optimistic_split(ir_node *to_split, ir_node *before, if (recursion+1 > MAX_OPTIMISTIC_SPLIT_RECURSION) continue; - apref = prefs[i].pref; - apref_delta = i+1 < n_regs ? apref - prefs[i+1].pref : 0; + float apref = prefs[i].pref; + float apref_delta = i+1 < n_regs ? apref - prefs[i+1].pref : 0; apref_delta += pref_delta - split_threshold; /* our source register isn't a useful destination for recursive splits */ - old_source_state = rbitset_is_set(forbidden_regs, from_r); + bool old_source_state = rbitset_is_set(forbidden_regs, from_r); rbitset_set(forbidden_regs, from_r); /* try recursive split */ - res = try_optimistic_split(assignments[r], before, apref, - apref_delta, forbidden_regs, recursion+1); + bool res = try_optimistic_split(assignments[r], before, apref, + apref_delta, forbidden_regs, recursion+1); /* restore our destination */ if (old_source_state) { rbitset_set(forbidden_regs, from_r); @@ -721,13 +651,14 @@ static bool try_optimistic_split(ir_node *to_split, ir_node *before, if (i >= n_regs) return false; - reg = arch_register_for_index(cls, r); - copy = be_new_Copy(block, to_split); + const arch_register_t *reg = arch_register_for_index(cls, r); + ir_node *copy = be_new_Copy(block, to_split); + unsigned width = 1; mark_as_copy_of(copy, to_split); /* hacky, but correct here */ - if (assignments[arch_register_get_index(from_reg)] == to_split) + if (assignments[from_reg->index] == to_split) free_reg_of_value(to_split); - use_reg(copy, reg); + use_reg(copy, reg, width); sched_add_before(before, copy); DB((dbg, LEVEL_3, @@ -739,51 +670,36 @@ static bool try_optimistic_split(ir_node *to_split, ir_node *before, /** * Determine and assign a register for node @p node */ -static void assign_reg(const ir_node *block, ir_node *node, - unsigned *forbidden_regs) +static void assign_reg(ir_node const *const block, ir_node *const node, arch_register_req_t const *const req, unsigned *const forbidden_regs) { - const arch_register_t *final_reg; - allocation_info_t *info; - const arch_register_req_t *req; - reg_pref_t *reg_prefs; - ir_node *in_node; - unsigned r; - const unsigned *allowed_regs; - unsigned final_reg_index = 0; - assert(!is_Phi(node)); /* preassigned register? */ - final_reg = arch_get_irn_register(node); + arch_register_t const *final_reg = arch_get_irn_register(node); + unsigned const width = req->width; if (final_reg != NULL) { DB((dbg, LEVEL_2, "Preassignment %+F -> %s\n", node, final_reg->name)); - use_reg(node, final_reg); + use_reg(node, final_reg, width); return; } - req = arch_get_irn_register_req(node); /* ignore reqs must be preassigned */ - assert (! (req->type & arch_register_req_type_ignore)); + assert(!arch_register_req_is(req, ignore)); /* give should_be_same boni */ - info = get_allocation_info(node); - in_node = skip_Proj(node); - if (req->type & arch_register_req_type_should_be_same) { - float weight = (float)get_block_execfreq(execfreqs, block); + allocation_info_t *info = get_allocation_info(node); + ir_node *in_node = skip_Proj(node); + if (arch_register_req_is(req, should_be_same)) { + float weight = (float)get_block_execfreq(block); int arity = get_irn_arity(in_node); - int i; assert(arity <= (int) sizeof(req->other_same) * 8); - for (i = 0; i < arity; ++i) { - ir_node *in; - const arch_register_t *reg; - unsigned reg_index; + for (int i = 0; i < arity; ++i) { if (!rbitset_is_set(&req->other_same, i)) continue; - in = get_irn_n(in_node, i); - reg = arch_get_irn_register(in); - assert(reg != NULL); - reg_index = arch_register_get_index(reg); + ir_node *in = get_irn_n(in_node, i); + const arch_register_t *reg = arch_get_irn_register(in); + unsigned reg_index = reg->index; /* if the value didn't die here then we should not propagate the * should_be_same info */ @@ -796,44 +712,50 @@ static void assign_reg(const ir_node *block, ir_node *node, /* create list of register candidates and sort by their preference */ DB((dbg, LEVEL_2, "Candidates for %+F:", node)); - reg_prefs = ALLOCAN(reg_pref_t, n_regs); + reg_pref_t *reg_prefs = ALLOCAN(reg_pref_t, n_regs); fill_sort_candidates(reg_prefs, info); - for (r = 0; r < n_regs; ++r) { + for (unsigned r = 0; r < n_regs; ++r) { unsigned num = reg_prefs[r].num; - const arch_register_t *reg; - if (!rbitset_is_set(normal_regs, num)) continue; - reg = arch_register_for_index(cls, num); + const arch_register_t *reg = arch_register_for_index(cls, num); DB((dbg, LEVEL_2, " %s(%f)", reg->name, reg_prefs[r].pref)); } DB((dbg, LEVEL_2, "\n")); - allowed_regs = normal_regs; - if (req->type & arch_register_req_type_limited) { + const unsigned *allowed_regs = normal_regs; + if (arch_register_req_is(req, limited)) { allowed_regs = req->limited; } + unsigned final_reg_index = 0; + unsigned r; for (r = 0; r < n_regs; ++r) { - float pref, delta; - ir_node *before; - bool res; - final_reg_index = reg_prefs[r].num; if (!rbitset_is_set(allowed_regs, final_reg_index)) continue; /* alignment constraint? */ - if (req->width > 1 && (req->type & arch_register_req_type_aligned) - && (final_reg_index % req->width) != 0) - continue; + if (width > 1) { + if (arch_register_req_is(req, aligned) && (final_reg_index % width) != 0) + continue; + bool fine = true; + for (unsigned r0 = r+1; r0 < r+width; ++r0) { + if (assignments[r0] != NULL) + fine = false; + } + /* TODO: attempt optimistic split here */ + if (!fine) + continue; + } if (assignments[final_reg_index] == NULL) break; - pref = reg_prefs[r].pref; - delta = r+1 < n_regs ? pref - reg_prefs[r+1].pref : 0; - before = skip_Proj(node); - res = try_optimistic_split(assignments[final_reg_index], before, - pref, delta, forbidden_regs, 0); + float pref = reg_prefs[r].pref; + float delta = r+1 < n_regs ? pref - reg_prefs[r+1].pref : 0; + ir_node *before = skip_Proj(node); + bool res + = try_optimistic_split(assignments[final_reg_index], before, pref, + delta, forbidden_regs, 0); if (res) break; } @@ -845,7 +767,7 @@ static void assign_reg(const ir_node *block, ir_node *node, final_reg = arch_register_for_index(cls, final_reg_index); DB((dbg, LEVEL_2, "Assign %+F -> %s\n", node, final_reg->name)); - use_reg(node, final_reg); + use_reg(node, final_reg, width); } /** @@ -885,12 +807,10 @@ static void assign_reg(const ir_node *block, ir_node *node, static void permute_values(ir_nodeset_t *live_nodes, ir_node *before, unsigned *permutation) { - unsigned *n_used = ALLOCANZ(unsigned, n_regs); - ir_node *block; - unsigned r; + unsigned *n_used = ALLOCANZ(unsigned, n_regs); /* determine how often each source register needs to be read */ - for (r = 0; r < n_regs; ++r) { + for (unsigned r = 0; r < n_regs; ++r) { unsigned old_reg = permutation[r]; ir_node *value; @@ -905,14 +825,11 @@ static void permute_values(ir_nodeset_t *live_nodes, ir_node *before, ++n_used[old_reg]; } - block = get_nodes_block(before); + ir_node *block = get_nodes_block(before); /* step1: create copies where immediately possible */ - for (r = 0; r < n_regs; /* empty */) { - ir_node *copy; - ir_node *src; - const arch_register_t *reg; - unsigned old_r = permutation[r]; + for (unsigned r = 0; r < n_regs; /* empty */) { + unsigned old_r = permutation[r]; /* - no need to do anything for fixed points. - we can't copy if the value in the dest reg is still needed */ @@ -922,21 +839,22 @@ static void permute_values(ir_nodeset_t *live_nodes, ir_node *before, } /* create a copy */ - src = assignments[old_r]; - copy = be_new_Copy(block, src); + ir_node *src = assignments[old_r]; + ir_node *copy = be_new_Copy(block, src); sched_add_before(before, copy); - reg = arch_register_for_index(cls, r); + const arch_register_t *reg = arch_register_for_index(cls, r); DB((dbg, LEVEL_2, "Copy %+F (from %+F, before %+F) -> %s\n", copy, src, before, reg->name)); mark_as_copy_of(copy, src); - use_reg(copy, reg); + unsigned width = 1; /* TODO */ + use_reg(copy, reg, width); if (live_nodes != NULL) { ir_nodeset_insert(live_nodes, copy); } /* old register has 1 user less, permutation is resolved */ - assert(arch_register_get_index(arch_get_irn_register(src)) == old_r); + assert(arch_get_irn_register(src)->index == old_r); permutation[r] = r; assert(n_used[old_r] > 0); @@ -964,14 +882,8 @@ static void permute_values(ir_nodeset_t *live_nodes, ir_node *before, * copies are preferable even for 2-cycles) */ /* create perms with the rest */ - for (r = 0; r < n_regs; /* empty */) { - const arch_register_t *reg; - unsigned old_r = permutation[r]; - unsigned r2; - ir_node *in[2]; - ir_node *perm; - ir_node *proj0; - ir_node *proj1; + for (unsigned r = 0; r < n_regs; /* empty */) { + unsigned old_r = permutation[r]; if (old_r == r) { ++r; @@ -982,24 +894,25 @@ static void permute_values(ir_nodeset_t *live_nodes, ir_node *before, assert(n_used[old_r] == 1); /* exchange old_r and r2; after that old_r is a fixed point */ - r2 = permutation[old_r]; + unsigned r2 = permutation[old_r]; - in[0] = assignments[r2]; - in[1] = assignments[old_r]; - perm = be_new_Perm(cls, block, 2, in); + ir_node *in[2] = { assignments[r2], assignments[old_r] }; + ir_node *perm = be_new_Perm(cls, block, 2, in); sched_add_before(before, perm); DB((dbg, LEVEL_2, "Perm %+F (perm %+F,%+F, before %+F)\n", perm, in[0], in[1], before)); - proj0 = new_r_Proj(perm, get_irn_mode(in[0]), 0); + unsigned width = 1; /* TODO */ + + ir_node *proj0 = new_r_Proj(perm, get_irn_mode(in[0]), 0); mark_as_copy_of(proj0, in[0]); - reg = arch_register_for_index(cls, old_r); - use_reg(proj0, reg); + const arch_register_t *reg0 = arch_register_for_index(cls, old_r); + use_reg(proj0, reg0, width); - proj1 = new_r_Proj(perm, get_irn_mode(in[1]), 1); + ir_node *proj1 = new_r_Proj(perm, get_irn_mode(in[1]), 1); mark_as_copy_of(proj1, in[1]); - reg = arch_register_for_index(cls, r2); - use_reg(proj1, reg); + const arch_register_t *reg1 = arch_register_for_index(cls, r2); + use_reg(proj1, reg1, width); /* 1 value is now in the correct register */ permutation[old_r] = old_r; @@ -1015,9 +928,9 @@ static void permute_values(ir_nodeset_t *live_nodes, ir_node *before, } } -#ifdef DEBUG_libfirm +#ifndef NDEBUG /* now we should only have fixpoints left */ - for (r = 0; r < n_regs; ++r) { + for (unsigned r = 0; r < n_regs; ++r) { assert(permutation[r] == r); } #endif @@ -1034,16 +947,13 @@ static void free_last_uses(ir_nodeset_t *live_nodes, ir_node *node) allocation_info_t *info = get_allocation_info(node); const unsigned *last_uses = info->last_uses; int arity = get_irn_arity(node); - int i; - - for (i = 0; i < arity; ++i) { - ir_node *op; + for (int i = 0; i < arity; ++i) { /* check if one operand is the last use */ if (!rbitset_is_set(last_uses, i)) continue; - op = get_irn_n(node, i); + ir_node *op = get_irn_n(node, i); free_reg_of_value(op); ir_nodeset_remove(live_nodes, op); } @@ -1054,10 +964,8 @@ static void free_last_uses(ir_nodeset_t *live_nodes, ir_node *node) */ static void rewire_inputs(ir_node *node) { - int i; int arity = get_irn_arity(node); - - for (i = 0; i < arity; ++i) { + for (int i = 0; i < arity; ++i) { ir_node *op = get_irn_n(node, i); allocation_info_t *info = try_get_allocation_info(op); @@ -1078,12 +986,9 @@ static void rewire_inputs(ir_node *node) static void determine_live_through_regs(unsigned *bitset, ir_node *node) { const allocation_info_t *info = get_allocation_info(node); - unsigned r; - int i; - int arity; /* mark all used registers as potentially live-through */ - for (r = 0; r < n_regs; ++r) { + for (unsigned r = 0; r < n_regs; ++r) { if (assignments[r] == NULL) continue; if (!rbitset_is_set(normal_regs, r)) @@ -1093,17 +998,14 @@ static void determine_live_through_regs(unsigned *bitset, ir_node *node) } /* remove registers of value dying at the instruction */ - arity = get_irn_arity(node); - for (i = 0; i < arity; ++i) { - ir_node *op; - const arch_register_t *reg; - + int arity = get_irn_arity(node); + for (int i = 0; i < arity; ++i) { if (!rbitset_is_set(info->last_uses, i)) continue; - op = get_irn_n(node, i); - reg = arch_get_irn_register(op); - rbitset_clear(bitset, arch_register_get_index(reg)); + ir_node *op = get_irn_n(node, i); + const arch_register_t *reg = arch_get_irn_register(op); + rbitset_clear(bitset, reg->index); } } @@ -1112,43 +1014,29 @@ static void solve_lpp(ir_nodeset_t *live_nodes, ir_node *node, { unsigned *forbidden_edges = rbitset_malloc(n_regs * n_regs); int *lpp_vars = XMALLOCNZ(int, n_regs*n_regs); - int arity = get_irn_arity(node); - int i; - unsigned l; - unsigned r; lpp_t *lpp = lpp_new("prefalloc", lpp_minimize); //lpp_set_time_limit(lpp, 20); lpp_set_log(lpp, stdout); /** mark some edges as forbidden */ - for (i = 0; i < arity; ++i) { - ir_node *op = get_irn_n(node, i); - const arch_register_t *reg; - const arch_register_req_t *req; - const unsigned *limited; - unsigned current_reg; - - if (!arch_irn_consider_in_reg_alloc(cls, op)) + be_foreach_use(node, cls, req, op, op_req, + if (!arch_register_req_is(req, limited)) continue; - req = arch_get_irn_register_req_in(node, i); - if (!(req->type & arch_register_req_type_limited)) - continue; - - limited = req->limited; - reg = arch_get_irn_register(op); - current_reg = arch_register_get_index(reg); - for (r = 0; r < n_regs; ++r) { + const unsigned *limited = req->limited; + const arch_register_t *reg = arch_get_irn_register(op); + unsigned current_reg = reg->index; + for (unsigned r = 0; r < n_regs; ++r) { if (rbitset_is_set(limited, r)) continue; rbitset_set(forbidden_edges, current_reg*n_regs + r); } - } + ); /* add all combinations, except for not allowed ones */ - for (l = 0; l < n_regs; ++l) { + for (unsigned l = 0; l < n_regs; ++l) { if (!rbitset_is_set(normal_regs, l)) { char name[15]; snprintf(name, sizeof(name), "%u_to_%u", l, l); @@ -1156,7 +1044,7 @@ static void solve_lpp(ir_nodeset_t *live_nodes, ir_node *node, continue; } - for (r = 0; r < n_regs; ++r) { + for (unsigned r = 0; r < n_regs; ++r) { if (!rbitset_is_set(normal_regs, r)) continue; if (rbitset_is_set(forbidden_edges, l*n_regs + r)) @@ -1176,11 +1064,10 @@ static void solve_lpp(ir_nodeset_t *live_nodes, ir_node *node, } } /* add constraints */ - for (l = 0; l < n_regs; ++l) { - int constraint; + for (unsigned l = 0; l < n_regs; ++l) { /* only 1 destination per register */ - constraint = -1; - for (r = 0; r < n_regs; ++r) { + int constraint = -1; + for (unsigned r = 0; r < n_regs; ++r) { int var = lpp_vars[l*n_regs+r]; if (var == 0) continue; @@ -1193,7 +1080,7 @@ static void solve_lpp(ir_nodeset_t *live_nodes, ir_node *node, } /* each destination used by at most 1 value */ constraint = -1; - for (r = 0; r < n_regs; ++r) { + for (unsigned r = 0; r < n_regs; ++r) { int var = lpp_vars[r*n_regs+l]; if (var == 0) continue; @@ -1209,42 +1096,44 @@ static void solve_lpp(ir_nodeset_t *live_nodes, ir_node *node, lpp_dump_plain(lpp, fopen("lppdump.txt", "w")); /* solve lpp */ - { - ir_graph *irg = get_irn_irg(node); - be_options_t *options = be_get_irg_options(irg); - unsigned *assignment; - lpp_solve(lpp, options->ilp_server, options->ilp_solver); - if (!lpp_is_sol_valid(lpp)) - panic("ilp solution not valid!"); - - assignment = ALLOCAN(unsigned, n_regs); - for (l = 0; l < n_regs; ++l) { - unsigned dest_reg = (unsigned)-1; - for (r = 0; r < n_regs; ++r) { - int var = lpp_vars[l*n_regs+r]; - if (var == 0) - continue; - double val = lpp_get_var_sol(lpp, var); - if (val == 1) { - assert(dest_reg == (unsigned)-1); - dest_reg = r; - } + lpp_solve(lpp, be_options.ilp_server, be_options.ilp_solver); + if (!lpp_is_sol_valid(lpp)) + panic("ilp solution not valid!"); + + unsigned *assignment = ALLOCAN(unsigned, n_regs); + for (unsigned l = 0; l < n_regs; ++l) { + unsigned dest_reg = (unsigned)-1; + for (unsigned r = 0; r < n_regs; ++r) { + int var = lpp_vars[l*n_regs+r]; + if (var == 0) + continue; + double val = lpp_get_var_sol(lpp, var); + if (val == 1) { + assert(dest_reg == (unsigned)-1); + dest_reg = r; } - assert(dest_reg != (unsigned)-1); - assignment[dest_reg] = l; } + assert(dest_reg != (unsigned)-1); + assignment[dest_reg] = l; + } - fprintf(stderr, "Assignment: "); - for (l = 0; l < n_regs; ++l) { - fprintf(stderr, "%u ", assignment[l]); - } - fprintf(stderr, "\n"); - fflush(stdout); - permute_values(live_nodes, node, assignment); + fprintf(stderr, "Assignment: "); + for (unsigned l = 0; l < n_regs; ++l) { + fprintf(stderr, "%u ", assignment[l]); } + fprintf(stderr, "\n"); + fflush(stdout); + permute_values(live_nodes, node, assignment); lpp_free(lpp); } +static bool is_aligned(unsigned num, unsigned alignment) +{ + unsigned mask = alignment-1; + assert(is_po2(alignment)); + return (num&mask) == 0; +} + /** * Enforce constraints at a node by live range splits. * @@ -1254,58 +1143,47 @@ static void solve_lpp(ir_nodeset_t *live_nodes, ir_node *node, static void enforce_constraints(ir_nodeset_t *live_nodes, ir_node *node, unsigned *forbidden_regs) { - int arity = get_irn_arity(node); - int i, res; - hungarian_problem_t *bp; - unsigned l, r; - unsigned *assignment; - ir_node *value; - - /* construct a list of register occupied by live-through values */ - unsigned *live_through_regs = NULL; - - /* see if any use constraints are not met */ + /* see if any use constraints are not met and whether double-width + * values are involved */ bool double_width = false; bool good = true; - for (i = 0; i < arity; ++i) { - ir_node *op = get_irn_n(node, i); - const arch_register_t *reg; - const arch_register_req_t *req; - const unsigned *limited; - unsigned reg_index; - - if (!arch_irn_consider_in_reg_alloc(cls, op)) - continue; - + be_foreach_use(node, cls, req, op, op_req, /* are there any limitations for the i'th operand? */ - req = arch_get_irn_register_req_in(node, i); if (req->width > 1) double_width = true; - if (!(req->type & arch_register_req_type_limited)) + const arch_register_t *reg = arch_get_irn_register(op); + unsigned reg_index = reg->index; + if (arch_register_req_is(req, aligned)) { + if (!is_aligned(reg_index, req->width)) { + good = false; + continue; + } + } + if (!arch_register_req_is(req, limited)) continue; - limited = req->limited; - reg = arch_get_irn_register(op); - reg_index = arch_register_get_index(reg); + const unsigned *limited = req->limited; if (!rbitset_is_set(limited, reg_index)) { /* found an assignment outside the limited set */ good = false; - break; + continue; } - } + ); /* is any of the live-throughs using a constrained output register? */ - be_foreach_definition(node, cls, value, - if (req_->width > 1) + unsigned *live_through_regs = NULL; + be_foreach_definition(node, cls, value, req, + (void)value; + if (req->width > 1) double_width = true; - if (! (req_->type & arch_register_req_type_limited)) + if (!arch_register_req_is(req, limited)) continue; if (live_through_regs == NULL) { - rbitset_alloca(live_through_regs, n_regs); + live_through_regs = rbitset_alloca(n_regs); determine_live_through_regs(live_through_regs, node); } - rbitset_or(forbidden_regs, req_->limited, n_regs); - if (rbitsets_have_common(req_->limited, live_through_regs, n_regs)) + rbitset_or(forbidden_regs, req->limited, n_regs); + if (rbitsets_have_common(req->limited, live_through_regs, n_regs)) good = false; ); @@ -1314,7 +1192,7 @@ static void enforce_constraints(ir_nodeset_t *live_nodes, ir_node *node, /* create these arrays if we haven't yet */ if (live_through_regs == NULL) { - rbitset_alloca(live_through_regs, n_regs); + live_through_regs = rbitset_alloca(n_regs); } if (double_width) { @@ -1329,16 +1207,17 @@ static void enforce_constraints(ir_nodeset_t *live_nodes, ir_node *node, * right, destinations left because this will produce the solution * in the format required for permute_values. */ - bp = hungarian_new(n_regs, n_regs, HUNGARIAN_MATCH_PERFECT); + hungarian_problem_t *bp + = hungarian_new(n_regs, n_regs, HUNGARIAN_MATCH_PERFECT); /* add all combinations, then remove not allowed ones */ - for (l = 0; l < n_regs; ++l) { + for (unsigned l = 0; l < n_regs; ++l) { if (!rbitset_is_set(normal_regs, l)) { hungarian_add(bp, l, l, 1); continue; } - for (r = 0; r < n_regs; ++r) { + for (unsigned r = 0; r < n_regs; ++r) { if (!rbitset_is_set(normal_regs, r)) continue; /* livethrough values may not use constrainted output registers */ @@ -1350,35 +1229,25 @@ static void enforce_constraints(ir_nodeset_t *live_nodes, ir_node *node, } } - for (i = 0; i < arity; ++i) { - ir_node *op = get_irn_n(node, i); - const arch_register_t *reg; - const arch_register_req_t *req; - const unsigned *limited; - unsigned current_reg; - - if (!arch_irn_consider_in_reg_alloc(cls, op)) - continue; - - req = arch_get_irn_register_req_in(node, i); - if (!(req->type & arch_register_req_type_limited)) + be_foreach_use(node, cls, req, op, op_req, + if (!arch_register_req_is(req, limited)) continue; - limited = req->limited; - reg = arch_get_irn_register(op); - current_reg = arch_register_get_index(reg); - for (r = 0; r < n_regs; ++r) { + const unsigned *limited = req->limited; + const arch_register_t *reg = arch_get_irn_register(op); + unsigned current_reg = reg->index; + for (unsigned r = 0; r < n_regs; ++r) { if (rbitset_is_set(limited, r)) continue; hungarian_remove(bp, r, current_reg); } - } + ); //hungarian_print_cost_matrix(bp, 1); hungarian_prepare_cost_matrix(bp, HUNGARIAN_MODE_MAXIMIZE_UTIL); - assignment = ALLOCAN(unsigned, n_regs); - res = hungarian_solve(bp, assignment, NULL, 0); + unsigned *assignment = ALLOCAN(unsigned, n_regs); + int res = hungarian_solve(bp, assignment, NULL, 0); assert(res == 0); #if 0 @@ -1397,14 +1266,11 @@ static void enforce_constraints(ir_nodeset_t *live_nodes, ir_node *node, /** test whether a node @p n is a copy of the value of node @p of */ static bool is_copy_of(ir_node *value, ir_node *test_value) { - allocation_info_t *test_info; - allocation_info_t *info; - if (value == test_value) return true; - info = get_allocation_info(value); - test_info = get_allocation_info(test_value); + allocation_info_t *info = get_allocation_info(value); + allocation_info_t *test_info = get_allocation_info(test_value); return test_info->original_value == info->original_value; } @@ -1415,9 +1281,8 @@ static bool is_copy_of(ir_node *value, ir_node *test_value) */ static int find_value_in_block_info(block_info_t *info, ir_node *value) { - unsigned r; ir_node **end_assignments = info->assignments; - for (r = 0; r < n_regs; ++r) { + for (unsigned r = 0; r < n_regs; ++r) { ir_node *a_value = end_assignments[r]; if (a_value == NULL) @@ -1435,52 +1300,39 @@ static int find_value_in_block_info(block_info_t *info, ir_node *value) */ static void add_phi_permutations(ir_node *block, int p) { - unsigned r; - unsigned *permutation; - ir_node **old_assignments; - bool need_permutation; - ir_node *phi; - ir_node *pred = get_Block_cfgpred_block(block, p); - + ir_node *pred = get_Block_cfgpred_block(block, p); block_info_t *pred_info = get_block_info(pred); /* predecessor not processed yet? nothing to do */ if (!pred_info->processed) return; - permutation = ALLOCAN(unsigned, n_regs); - for (r = 0; r < n_regs; ++r) { + unsigned *permutation = ALLOCAN(unsigned, n_regs); + for (unsigned r = 0; r < n_regs; ++r) { permutation[r] = r; } /* check phi nodes */ - need_permutation = false; - phi = sched_first(block); + bool need_permutation = false; + ir_node *phi = sched_first(block); for ( ; is_Phi(phi); phi = sched_next(phi)) { - const arch_register_t *reg; - const arch_register_t *op_reg; - int regn; - int a; - ir_node *op; - if (!arch_irn_consider_in_reg_alloc(cls, phi)) continue; - op = get_Phi_pred(phi, p); - a = find_value_in_block_info(pred_info, op); + ir_node *phi_pred = get_Phi_pred(phi, p); + int a = find_value_in_block_info(pred_info, phi_pred); assert(a >= 0); - reg = arch_get_irn_register(phi); - regn = arch_register_get_index(reg); + const arch_register_t *reg = arch_get_irn_register(phi); + int regn = reg->index; /* same register? nothing to do */ if (regn == a) continue; - op = pred_info->assignments[a]; - op_reg = arch_get_irn_register(op); - /* virtual or joker registers are ok too */ - if ((op_reg->type & arch_register_type_joker) - || (op_reg->type & arch_register_type_virtual)) + ir_node *op = pred_info->assignments[a]; + const arch_register_t *op_reg = arch_get_irn_register(op); + /* Virtual registers are ok, too. */ + if (op_reg->type & arch_register_type_virtual) continue; permutation[regn] = a; @@ -1489,7 +1341,7 @@ static void add_phi_permutations(ir_node *block, int p) if (need_permutation) { /* permute values at end of predecessor */ - old_assignments = assignments; + ir_node **old_assignments = assignments; assignments = pred_info->assignments; permute_values(NULL, be_get_end_of_block_insertion_point(pred), permutation); @@ -1499,19 +1351,14 @@ static void add_phi_permutations(ir_node *block, int p) /* change phi nodes to use the copied values */ phi = sched_first(block); for ( ; is_Phi(phi); phi = sched_next(phi)) { - int a; - ir_node *op; - if (!arch_irn_consider_in_reg_alloc(cls, phi)) continue; - op = get_Phi_pred(phi, p); - /* we have permuted all values into the correct registers so we can simply query which value occupies the phis register in the predecessor */ - a = arch_register_get_index(arch_get_irn_register(phi)); - op = pred_info->assignments[a]; + int a = arch_get_irn_register(phi)->index; + ir_node *op = pred_info->assignments[a]; set_Phi_pred(phi, p, op); } } @@ -1522,33 +1369,27 @@ static void add_phi_permutations(ir_node *block, int p) */ static void adapt_phi_prefs(ir_node *phi) { - int i; - int arity = get_irn_arity(phi); ir_node *block = get_nodes_block(phi); allocation_info_t *info = get_allocation_info(phi); - for (i = 0; i < arity; ++i) { + int arity = get_irn_arity(phi); + for (int i = 0; i < arity; ++i) { ir_node *op = get_irn_n(phi, i); const arch_register_t *reg = arch_get_irn_register(op); - ir_node *pred_block; - block_info_t *pred_block_info; - float weight; - unsigned r; if (reg == NULL) continue; /* we only give the bonus if the predecessor already has registers * assigned, otherwise we only see a dummy value * and any conclusions about its register are useless */ - pred_block = get_Block_cfgpred_block(block, i); - pred_block_info = get_block_info(pred_block); + ir_node *pred_block = get_Block_cfgpred_block(block, i); + block_info_t *pred_block_info = get_block_info(pred_block); if (!pred_block_info->processed) continue; /* give bonus for already assigned register */ - weight = (float)get_block_execfreq(execfreqs, pred_block); - r = arch_register_get_index(reg); - info->prefs[r] += weight * AFF_PHI; + float weight = (float)get_block_execfreq(pred_block); + info->prefs[reg->index] += weight * AFF_PHI; } } @@ -1558,23 +1399,21 @@ static void adapt_phi_prefs(ir_node *phi) */ static void propagate_phi_register(ir_node *phi, unsigned assigned_r) { - int i; ir_node *block = get_nodes_block(phi); - int arity = get_irn_arity(phi); - for (i = 0; i < arity; ++i) { + int arity = get_irn_arity(phi); + for (int i = 0; i < arity; ++i) { ir_node *op = get_Phi_pred(phi, i); allocation_info_t *info = get_allocation_info(op); ir_node *pred_block = get_Block_cfgpred_block(block, i); - unsigned r; float weight - = (float)get_block_execfreq(execfreqs, pred_block) * AFF_PHI; + = (float)get_block_execfreq(pred_block) * AFF_PHI; if (info->prefs[assigned_r] >= weight) continue; /* promote the prefered register */ - for (r = 0; r < n_regs; ++r) { + for (unsigned r = 0; r < n_regs; ++r) { if (info->prefs[r] > -weight) { info->prefs[r] = -weight; } @@ -1588,14 +1427,8 @@ static void propagate_phi_register(ir_node *phi, unsigned assigned_r) static void assign_phi_registers(ir_node *block) { - int n_phis = 0; - int n; - int res; - unsigned *assignment; - ir_node *node; - hungarian_problem_t *bp; - /* count phi nodes */ + int n_phis = 0; sched_foreach(block, node) { if (!is_Phi(node)) break; @@ -1608,12 +1441,10 @@ static void assign_phi_registers(ir_node *block) return; /* build a bipartite matching problem for all phi nodes */ - bp = hungarian_new(n_phis, n_regs, HUNGARIAN_MATCH_PERFECT); - n = 0; + hungarian_problem_t *bp + = hungarian_new(n_phis, n_regs, HUNGARIAN_MATCH_PERFECT); + int n = 0; sched_foreach(block, node) { - unsigned r; - - allocation_info_t *info; if (!is_Phi(node)) break; if (!arch_irn_consider_in_reg_alloc(cls, node)) @@ -1622,15 +1453,13 @@ static void assign_phi_registers(ir_node *block) /* give boni for predecessor colorings */ adapt_phi_prefs(node); /* add stuff to bipartite problem */ - info = get_allocation_info(node); + allocation_info_t *info = get_allocation_info(node); DB((dbg, LEVEL_3, "Prefs for %+F: ", node)); - for (r = 0; r < n_regs; ++r) { - float costs; - + for (unsigned r = 0; r < n_regs; ++r) { if (!rbitset_is_set(normal_regs, r)) continue; - costs = info->prefs[r]; + float costs = info->prefs[r]; costs = costs < 0 ? -logf(-costs+1) : logf(costs+1); costs *= 100; costs += 10000; @@ -1645,95 +1474,86 @@ static void assign_phi_registers(ir_node *block) //hungarian_print_cost_matrix(bp, 7); hungarian_prepare_cost_matrix(bp, HUNGARIAN_MODE_MAXIMIZE_UTIL); - assignment = ALLOCAN(unsigned, n_regs); - res = hungarian_solve(bp, assignment, NULL, 0); + unsigned *assignment = ALLOCAN(unsigned, n_regs); + int res = hungarian_solve(bp, assignment, NULL, 0); assert(res == 0); /* apply results */ n = 0; sched_foreach(block, node) { - unsigned r; - const arch_register_t *reg; - if (!is_Phi(node)) break; if (!arch_irn_consider_in_reg_alloc(cls, node)) continue; + const arch_register_req_t *req + = arch_get_irn_register_req(node); - r = assignment[n++]; + unsigned r = assignment[n++]; assert(rbitset_is_set(normal_regs, r)); - reg = arch_register_for_index(cls, r); + const arch_register_t *reg = arch_register_for_index(cls, r); DB((dbg, LEVEL_2, "Assign %+F -> %s\n", node, reg->name)); - use_reg(node, reg); + use_reg(node, reg, req->width); /* adapt preferences for phi inputs */ - if (propagate_phi_registers) - propagate_phi_register(node, r); + propagate_phi_register(node, r); } } +static arch_register_req_t *allocate_reg_req(ir_graph *irg) +{ + struct obstack *obst = be_get_be_obst(irg); + arch_register_req_t *req = OALLOCZ(obst, arch_register_req_t); + return req; +} + /** * Walker: assign registers to all nodes of a block that * need registers from the currently considered register class. */ static void allocate_coalesce_block(ir_node *block, void *data) { - int i; - ir_nodeset_t live_nodes; - ir_node *node; - int n_preds; - block_info_t *block_info; - block_info_t **pred_block_infos; - ir_node **phi_ins; - unsigned *forbidden_regs; /**< collects registers which must - not be used for optimistic splits */ - (void) data; DB((dbg, LEVEL_2, "* Block %+F\n", block)); /* clear assignments */ - block_info = get_block_info(block); + block_info_t *block_info = get_block_info(block); assignments = block_info->assignments; + ir_nodeset_t live_nodes; ir_nodeset_init(&live_nodes); /* gather regalloc infos of predecessor blocks */ - n_preds = get_Block_n_cfgpreds(block); - pred_block_infos = ALLOCAN(block_info_t*, n_preds); - for (i = 0; i < n_preds; ++i) { + int n_preds = get_Block_n_cfgpreds(block); + block_info_t **pred_block_infos = ALLOCAN(block_info_t*, n_preds); + for (int i = 0; i < n_preds; ++i) { ir_node *pred = get_Block_cfgpred_block(block, i); block_info_t *pred_info = get_block_info(pred); pred_block_infos[i] = pred_info; } - phi_ins = ALLOCAN(ir_node*, n_preds); + ir_node **phi_ins = ALLOCAN(ir_node*, n_preds); /* collect live-in nodes and preassigned values */ - be_lv_foreach(lv, block, be_lv_state_in, i) { - bool need_phi = false; - const arch_register_req_t *req; - const arch_register_t *reg; - int p; - - node = be_lv_get_irn(lv, block, i); - req = arch_get_irn_register_req(node); + be_lv_foreach(lv, block, be_lv_state_in, node) { + const arch_register_req_t *req = arch_get_irn_register_req(node); if (req->cls != cls) continue; - if (req->type & arch_register_req_type_ignore) { + if (arch_register_req_is(req, ignore)) { allocation_info_t *info = get_allocation_info(node); info->current_value = node; - reg = arch_get_irn_register(node); + const arch_register_t *reg = arch_get_irn_register(node); assert(reg != NULL); /* ignore values must be preassigned */ - use_reg(node, reg); + use_reg(node, reg, req->width); continue; } /* check all predecessors for this value, if it is not everywhere the same or unknown then we have to construct a phi (we collect the potential phi inputs here) */ - for (p = 0; p < n_preds; ++p) { + bool need_phi = false; + for (int p = 0; p < n_preds; ++p) { block_info_t *pred_info = pred_block_infos[p]; if (!pred_info->processed) { @@ -1755,17 +1575,23 @@ static void allocate_coalesce_block(ir_node *block, void *data) if (need_phi) { ir_mode *mode = get_irn_mode(node); - ir_node *phi = be_new_Phi(block, n_preds, phi_ins, mode, cls); + const arch_register_req_t *phi_req = cls->class_req; + if (req->width > 1) { + arch_register_req_t *new_req = allocate_reg_req(irg); + new_req->cls = cls; + new_req->type = req->type & arch_register_req_type_aligned; + new_req->width = req->width; + phi_req = new_req; + } + ir_node *phi = be_new_Phi(block, n_preds, phi_ins, mode, + phi_req); DB((dbg, LEVEL_3, "Create Phi %+F (for %+F) -", phi, node)); #ifdef DEBUG_libfirm - { - int pi; - for (pi = 0; pi < n_preds; ++pi) { - DB((dbg, LEVEL_3, " %+F", phi_ins[pi])); - } - DB((dbg, LEVEL_3, "\n")); + for (int pi = 0; pi < n_preds; ++pi) { + DB((dbg, LEVEL_3, " %+F", phi_ins[pi])); } + DB((dbg, LEVEL_3, "\n")); #endif mark_as_copy_of(phi, node); sched_add_after(block, phi); @@ -1782,36 +1608,31 @@ static void allocate_coalesce_block(ir_node *block, void *data) } /* if the node already has a register assigned use it */ - reg = arch_get_irn_register(node); + const arch_register_t *reg = arch_get_irn_register(node); if (reg != NULL) { - use_reg(node, reg); + use_reg(node, reg, req->width); } /* remember that this node is live at the beginning of the block */ ir_nodeset_insert(&live_nodes, node); } - rbitset_alloca(forbidden_regs, n_regs); + /** Collects registers which must not be used for optimistic splits. */ + unsigned *const forbidden_regs = rbitset_alloca(n_regs); /* handle phis... */ assign_phi_registers(block); /* all live-ins must have a register */ -#ifdef DEBUG_libfirm - { - ir_nodeset_iterator_t iter; - foreach_ir_nodeset(&live_nodes, node, iter) { - const arch_register_t *reg = arch_get_irn_register(node); - assert(reg != NULL); - } +#ifndef NDEBUG + foreach_ir_nodeset(&live_nodes, node, iter) { + const arch_register_t *reg = arch_get_irn_register(node); + assert(reg != NULL); } #endif /* assign instructions in the block */ sched_foreach(block, node) { - int arity; - ir_node *value; - /* phis are already assigned */ if (is_Phi(node)) continue; @@ -1825,23 +1646,17 @@ static void allocate_coalesce_block(ir_node *block, void *data) rewire_inputs(node); /* we may not use registers used for inputs for optimistic splits */ - arity = get_irn_arity(node); - for (i = 0; i < arity; ++i) { - ir_node *op = get_irn_n(node, i); - const arch_register_t *reg; - if (!arch_irn_consider_in_reg_alloc(cls, op)) - continue; - - reg = arch_get_irn_register(op); - rbitset_set(forbidden_regs, arch_register_get_index(reg)); - } + be_foreach_use(node, cls, in_req, op, op_req, + const arch_register_t *reg = arch_get_irn_register(op); + rbitset_set(forbidden_regs, reg->index); + ); /* free registers of values last used at this instruction */ free_last_uses(&live_nodes, node); /* assign output registers */ - be_foreach_definition_(node, cls, value, - assign_reg(block, value, forbidden_regs); + be_foreach_definition_(node, cls, value, req, + assign_reg(block, value, req, forbidden_regs); ); } @@ -1852,8 +1667,7 @@ static void allocate_coalesce_block(ir_node *block, void *data) /* permute values at end of predecessor blocks in case of phi-nodes */ if (n_preds > 1) { - int p; - for (p = 0; p < n_preds; ++p) { + for (int p = 0; p < n_preds; ++p) { add_phi_permutations(block, p); } } @@ -1890,7 +1704,6 @@ static int cmp_block_costs(const void *d1, const void *d2) static void determine_block_order(void) { - size_t p; ir_node **blocklist = be_get_cfgpostorder(irg); size_t n_blocks = ARR_LEN(blocklist); int dfs_num = 0; @@ -1899,7 +1712,7 @@ static void determine_block_order(void) size_t order_p = 0; /* clear block links... */ - for (p = 0; p < n_blocks; ++p) { + for (size_t p = 0; p < n_blocks; ++p) { ir_node *block = blocklist[p]; set_irn_link(block, NULL); } @@ -1907,15 +1720,14 @@ static void determine_block_order(void) /* walk blocks in reverse postorder, the costs for each block are the * sum of the costs of its predecessors (excluding the costs on backedges * which we can't determine) */ - for (p = n_blocks; p > 0;) { + for (size_t p = n_blocks; p > 0;) { block_costs_t *cost_info; ir_node *block = blocklist[--p]; - float execfreq = (float)get_block_execfreq(execfreqs, block); + float execfreq = (float)get_block_execfreq(block); float costs = execfreq; int n_cfgpreds = get_Block_n_cfgpreds(block); - int p2; - for (p2 = 0; p2 < n_cfgpreds; ++p2) { + for (int p2 = 0; p2 < n_cfgpreds; ++p2) { ir_node *pred_block = get_Block_cfgpred_block(block, p2); block_costs_t *pred_costs = (block_costs_t*)get_irn_link(pred_block); /* we don't have any info for backedges */ @@ -1936,8 +1748,8 @@ static void determine_block_order(void) ir_reserve_resources(irg, IR_RESOURCE_BLOCK_VISITED); inc_irg_block_visited(irg); - for (p = 0; p < n_blocks; ++p) { - ir_node *block = blocklist[p]; + for (size_t p = 0; p < n_blocks; ++p) { + ir_node *block = blocklist[p]; if (Block_block_visited(block)) continue; @@ -1948,11 +1760,10 @@ static void determine_block_order(void) ir_node *best_pred = NULL; float best_costs = -1; int n_cfgpred = get_Block_n_cfgpreds(block); - int i; pdeq_putr(worklist, block); mark_Block_block_visited(block); - for (i = 0; i < n_cfgpred; ++i) { + for (int i = 0; i < n_cfgpred; ++i) { ir_node *pred_block = get_Block_cfgpred_block(block, i); block_costs_t *pred_info = (block_costs_t*)get_irn_link(pred_block); @@ -1989,15 +1800,18 @@ static void determine_block_order(void) n_block_order = n_blocks; } +static void free_block_order(void) +{ + xfree(block_order); +} + /** * Run the register allocator for the current register class. */ static void be_pref_alloc_cls(void) { - size_t i; - - lv = be_assure_liveness(irg); - be_liveness_assure_sets(lv); + be_assure_live_sets(irg); + lv = be_get_irg_liveness(irg); ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK); @@ -2006,10 +1820,9 @@ static void be_pref_alloc_cls(void) be_clear_links(irg); irg_block_walk_graph(irg, NULL, analyze_block, NULL); - if (create_congruence_classes) - combine_congruence_classes(); + combine_congruence_classes(); - for (i = 0; i < n_block_order; ++i) { + for (size_t i = 0; i < n_block_order; ++i) { ir_node *block = block_order[i]; allocate_coalesce_block(block, NULL); } @@ -2019,7 +1832,7 @@ static void be_pref_alloc_cls(void) static void dump(int mask, ir_graph *irg, const char *suffix) { - if (be_get_irg_options(irg)->dump_flags & mask) + if (be_options.dump_flags & mask) dump_ir_graph(irg, suffix); } @@ -2052,19 +1865,16 @@ static void spill(void) */ static void be_pref_alloc(ir_graph *new_irg) { - const arch_env_t *arch_env = be_get_irg_arch_env(new_irg); - int n_cls = arch_env->n_register_classes; - int c; - obstack_init(&obst); - irg = new_irg; - execfreqs = be_get_irg_exec_freq(irg); + irg = new_irg; /* determine a good coloring order */ determine_block_order(); - for (c = 0; c < n_cls; ++c) { + const arch_env_t *arch_env = be_get_irg_arch_env(new_irg); + int n_cls = arch_env->n_register_classes; + for (int c = 0; c < n_cls; ++c) { cls = &arch_env->register_classes[c]; if (arch_register_class_flags(cls) & arch_register_class_flag_manual_ra) continue; @@ -2079,10 +1889,10 @@ static void be_pref_alloc(ir_graph *new_irg) /* verify schedule and register pressure */ be_timer_push(T_VERIFY); - if (be_get_irg_options(irg)->verify_option == BE_VERIFY_WARN) { + if (be_options.verify_option == BE_VERIFY_WARN) { be_verify_schedule(irg); be_verify_register_pressure(irg, cls); - } else if (be_get_irg_options(irg)->verify_option == BE_VERIFY_ASSERT) { + } else if (be_options.verify_option == BE_VERIFY_ASSERT) { assert(be_verify_schedule(irg) && "Schedule verification failed"); assert(be_verify_register_pressure(irg, cls) && "Register pressure verification failed"); @@ -2095,21 +1905,22 @@ static void be_pref_alloc(ir_graph *new_irg) /* we most probably constructed new Phis so liveness info is invalid * now */ - /* TODO: test liveness_introduce */ - be_liveness_invalidate(lv); + be_invalidate_live_sets(irg); free(normal_regs); stat_ev_ctx_pop("regcls"); } + free_block_order(); + be_timer_push(T_RA_SPILL_APPLY); be_abi_fix_stack_nodes(irg); be_timer_pop(T_RA_SPILL_APPLY); be_timer_push(T_VERIFY); - if (be_get_irg_options(irg)->verify_option == BE_VERIFY_WARN) { + if (be_options.verify_option == BE_VERIFY_WARN) { be_verify_register_allocation(irg); - } else if (be_get_irg_options(irg)->verify_option == BE_VERIFY_ASSERT) { + } else if (be_options.verify_option == BE_VERIFY_ASSERT) { assert(be_verify_register_allocation(irg) && "Register allocation invalid"); } @@ -2121,13 +1932,7 @@ static void be_pref_alloc(ir_graph *new_irg) BE_REGISTER_MODULE_CONSTRUCTOR(be_init_pref_alloc) void be_init_pref_alloc(void) { - static be_ra_t be_ra_pref = { - be_pref_alloc, - }; - lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be"); - lc_opt_entry_t *prefalloc_group = lc_opt_get_grp(be_grp, "prefalloc"); - lc_opt_add_table(prefalloc_group, options); - + static be_ra_t be_ra_pref = { be_pref_alloc }; be_register_allocator("pref", &be_ra_pref); FIRM_DBG_REGISTER(dbg, "firm.be.prefalloc"); }