X-Git-Url: http://nsz.repo.hu/git/?a=blobdiff_plain;f=ir%2Fbe%2Fbeprefalloc.c;h=d0080e76367ea9b100933fceb8a38b6aaf5e5bfd;hb=b2709c45d975072f86e99e29083eda4b43a0210b;hp=451bf0ea82a371942cdb9ba9e08b502e20f1d1fc;hpb=8a40be4492274e2799181e4ce950b33b367a59ef;p=libfirm diff --git a/ir/be/beprefalloc.c b/ir/be/beprefalloc.c index 451bf0ea8..d0080e763 100644 --- a/ir/be/beprefalloc.c +++ b/ir/be/beprefalloc.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved. + * Copyright (C) 1995-2011 University of Karlsruhe. All right reserved. * * This file is part of libFirm. * @@ -22,7 +22,6 @@ * @brief Preference Guided Register Assignment * @author Matthias Braun * @date 14.2.2009 - * @version $Id$ * * The idea is to allocate registers in 2 passes: * 1. A first pass to determine "preferred" registers for live-ranges. This @@ -42,6 +41,7 @@ #include #include #include +#include "lpp.h" #include "error.h" #include "execfreq.h" @@ -53,12 +53,14 @@ #include "irnode_t.h" #include "irprintf.h" #include "irdump.h" +#include "irtools.h" +#include "util.h" #include "obst.h" #include "raw_bitset.h" #include "unionfind.h" #include "pdeq.h" #include "hungarian.h" - +#include "statev.h" #include "beabi.h" #include "bechordal_t.h" #include "be.h" @@ -72,6 +74,7 @@ #include "bespillutil.h" #include "beverify.h" #include "beutil.h" +#include "bestack.h" #define USE_FACTOR 1.0f #define DEF_FACTOR 1.0f @@ -86,24 +89,12 @@ DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;) static struct obstack obst; static ir_graph *irg; static const arch_register_class_t *cls; -static const arch_register_req_t *default_cls_req; static be_lv_t *lv; -static const ir_exec_freq *execfreqs; static unsigned n_regs; static unsigned *normal_regs; static int *congruence_classes; static ir_node **block_order; -static int n_block_order; -static int create_preferences = true; -static int create_congruence_classes = true; -static int propagate_phi_registers = true; - -static const lc_opt_table_entry_t options[] = { - LC_OPT_ENT_BOOL("prefs", "use preference based coloring", &create_preferences), - LC_OPT_ENT_BOOL("congruences", "create congruence classes", &create_congruence_classes), - LC_OPT_ENT_BOOL("prop_phi", "propagate phi registers", &propagate_phi_registers), - LC_OPT_LAST -}; +static size_t n_block_order; /** currently active assignments (while processing a basic block) * maps registers to values(their current copies) */ @@ -117,7 +108,7 @@ struct allocation_info_t { unsigned last_uses[2]; /**< bitset indicating last uses (input pos) */ ir_node *current_value; /**< copy of the value that should be used */ ir_node *original_value; /**< for copies point to original value */ - float prefs[0]; /**< register preferences */ + float prefs[]; /**< register preferences */ }; typedef struct allocation_info_t allocation_info_t; @@ -131,7 +122,7 @@ typedef struct reg_pref_t reg_pref_t; /** per basic-block information */ struct block_info_t { bool processed; /**< indicate whether block is processed */ - ir_node *assignments[0]; /**< register assignments at end of block */ + ir_node *assignments[]; /**< register assignments at end of block */ }; typedef struct block_info_t block_info_t; @@ -141,7 +132,7 @@ typedef struct block_info_t block_info_t; */ static allocation_info_t *get_allocation_info(ir_node *node) { - allocation_info_t *info = get_irn_link(node); + allocation_info_t *info = (allocation_info_t*)get_irn_link(node); if (info == NULL) { info = OALLOCFZ(&obst, allocation_info_t, prefs, n_regs); info->current_value = node; @@ -162,7 +153,7 @@ static allocation_info_t *try_get_allocation_info(const ir_node *node) */ static block_info_t *get_block_info(ir_node *block) { - block_info_t *info = get_irn_link(block); + block_info_t *info = (block_info_t*)get_irn_link(block); assert(is_Block(block)); if (info == NULL) { @@ -173,24 +164,6 @@ static block_info_t *get_block_info(ir_node *block) return info; } -/** - * Get default register requirement for the current register class - */ -static const arch_register_req_t *get_default_req_current_cls(void) -{ - if (default_cls_req == NULL) { - struct obstack *obst = get_irg_obstack(irg); - arch_register_req_t *req = OALLOCZ(obst, arch_register_req_t); - - req->type = arch_register_req_type_normal; - req->cls = cls; - req->width = 1; - - default_cls_req = req; - } - return default_cls_req; -} - /** * Link the allocation info of a node to a copy. * Afterwards, both nodes uses the same allocation info. @@ -201,12 +174,11 @@ static const arch_register_req_t *get_default_req_current_cls(void) */ static void mark_as_copy_of(ir_node *copy, ir_node *value) { - ir_node *original; allocation_info_t *info = get_allocation_info(value); allocation_info_t *copy_info = get_allocation_info(copy); /* find original value */ - original = info->original_value; + ir_node *original = info->original_value; if (original != value) { info = get_allocation_info(original); } @@ -234,14 +206,10 @@ static void give_penalties_for_limits(const ir_nodeset_t *live_nodes, float penalty, const unsigned* limited, ir_node *node) { - ir_nodeset_iterator_t iter; - unsigned r; - unsigned n_allowed; - allocation_info_t *info = get_allocation_info(node); - ir_node *neighbor; + allocation_info_t *info = get_allocation_info(node); /* give penalty for all forbidden regs */ - for (r = 0; r < n_regs; ++r) { + for (unsigned r = 0; r < n_regs; ++r) { if (rbitset_is_set(limited, r)) continue; @@ -253,7 +221,7 @@ static void give_penalties_for_limits(const ir_nodeset_t *live_nodes, return; penalty *= NEIGHBOR_FACTOR; - n_allowed = rbitset_popcount(limited, n_regs); + size_t n_allowed = rbitset_popcount(limited, n_regs); if (n_allowed > 1) { /* only create a very weak penalty if multiple regs are allowed */ penalty = (penalty * 0.8f) / n_allowed; @@ -267,7 +235,7 @@ static void give_penalties_for_limits(const ir_nodeset_t *live_nodes, continue; neighbor_info = get_allocation_info(neighbor); - for (r = 0; r < n_regs; ++r) { + for (unsigned r = 0; r < n_regs; ++r) { if (!rbitset_is_set(limited, r)) continue; @@ -285,32 +253,25 @@ static void give_penalties_for_limits(const ir_nodeset_t *live_nodes, * @param weight the weight * @param node the current node */ -static void check_defs(const ir_nodeset_t *live_nodes, float weight, - ir_node *node) +static void check_defs(ir_nodeset_t const *const live_nodes, float const weight, ir_node *const node, arch_register_req_t const *const req) { - const arch_register_req_t *req = arch_get_register_req_out(node); - if (req->type & arch_register_req_type_limited) { + if (arch_register_req_is(req, limited)) { const unsigned *limited = req->limited; float penalty = weight * DEF_FACTOR; give_penalties_for_limits(live_nodes, penalty, limited, node); } - if (req->type & arch_register_req_type_should_be_same) { + if (arch_register_req_is(req, should_be_same)) { ir_node *insn = skip_Proj(node); allocation_info_t *info = get_allocation_info(node); int arity = get_irn_arity(insn); - int i; float factor = 1.0f / rbitset_popcount(&req->other_same, arity); - for (i = 0; i < arity; ++i) { - ir_node *op; - unsigned r; - allocation_info_t *op_info; - + for (int i = 0; i < arity; ++i) { if (!rbitset_is_set(&req->other_same, i)) continue; - op = get_irn_n(insn, i); + ir_node *op = get_irn_n(insn, i); /* if we the value at the should_be_same input doesn't die at the * node, then it is no use to propagate the constraints (since a @@ -318,8 +279,8 @@ static void check_defs(const ir_nodeset_t *live_nodes, float weight, if (ir_nodeset_contains(live_nodes, op)) continue; - op_info = get_allocation_info(op); - for (r = 0; r < n_regs; ++r) { + allocation_info_t *op_info = get_allocation_info(op); + for (unsigned r = 0; r < n_regs; ++r) { op_info->prefs[r] += info->prefs[r] * factor; } } @@ -332,44 +293,37 @@ static void check_defs(const ir_nodeset_t *live_nodes, float weight, */ static void analyze_block(ir_node *block, void *data) { - float weight = (float)get_block_execfreq(execfreqs, block); - ir_nodeset_t live_nodes; - ir_node *node; + float weight = (float)get_block_execfreq(block); + ir_nodeset_t live_nodes; (void) data; ir_nodeset_init(&live_nodes); be_liveness_end_of_block(lv, cls, block, &live_nodes); sched_foreach_reverse(block, node) { - allocation_info_t *info; - int i; - int arity; - if (is_Phi(node)) break; - if (create_preferences) { - ir_node *value; - be_foreach_definition(node, cls, value, - check_defs(&live_nodes, weight, value); - ); - } + be_foreach_definition(node, cls, value, req, + check_defs(&live_nodes, weight, value, req); + ); /* mark last uses */ - arity = get_irn_arity(node); + int arity = get_irn_arity(node); /* the allocation info node currently only uses 1 unsigned value to mark last used inputs. So we will fail for a node with more than 32 inputs. */ + allocation_info_t *info = get_allocation_info(node); if (arity >= (int) sizeof(info->last_uses) * 8) { panic("Node with more than %d inputs not supported yet", (int) sizeof(info->last_uses) * 8); } - info = get_allocation_info(node); - for (i = 0; i < arity; ++i) { - ir_node *op = get_irn_n(node, i); - if (!arch_irn_consider_in_reg_alloc(cls, op)) + for (int i = 0; i < arity; ++i) { + ir_node *op = get_irn_n(node, i); + const arch_register_req_t *req = arch_get_irn_register_req(op); + if (req->cls != cls) continue; /* last usage of a value? */ @@ -380,57 +334,37 @@ static void analyze_block(ir_node *block, void *data) be_liveness_transfer(cls, node, &live_nodes); - if (create_preferences) { - /* update weights based on usage constraints */ - for (i = 0; i < arity; ++i) { - const arch_register_req_t *req; - const unsigned *limited; - ir_node *op = get_irn_n(node, i); - - if (!arch_irn_consider_in_reg_alloc(cls, op)) - continue; - - req = arch_get_register_req(node, i); - if (!(req->type & arch_register_req_type_limited)) - continue; + /* update weights based on usage constraints */ + be_foreach_use(node, cls, req, op, op_req, + if (!arch_register_req_is(req, limited)) + continue; - limited = req->limited; - give_penalties_for_limits(&live_nodes, weight * USE_FACTOR, - limited, op); - } - } + give_penalties_for_limits(&live_nodes, weight * USE_FACTOR, req->limited, op); + ); } ir_nodeset_destroy(&live_nodes); } -static void congruence_def(ir_nodeset_t *live_nodes, const ir_node *node) +static void congruence_def(ir_nodeset_t *const live_nodes, ir_node const *const node, arch_register_req_t const *const req) { - const arch_register_req_t *req = arch_get_register_req_out(node); - /* should be same constraint? */ - if (req->type & arch_register_req_type_should_be_same) { - const ir_node *insn = skip_Proj_const(node); - int arity = get_irn_arity(insn); - int i; - unsigned node_idx = get_irn_idx(node); - node_idx = uf_find(congruence_classes, node_idx); - - for (i = 0; i < arity; ++i) { - ir_node *live; - ir_node *op; - int op_idx; - ir_nodeset_iterator_t iter; - bool interferes = false; + if (arch_register_req_is(req, should_be_same)) { + const ir_node *insn = skip_Proj_const(node); + int arity = get_irn_arity(insn); + unsigned node_idx = get_irn_idx(node); + node_idx = uf_find(congruence_classes, node_idx); + for (int i = 0; i < arity; ++i) { if (!rbitset_is_set(&req->other_same, i)) continue; - op = get_irn_n(insn, i); - op_idx = get_irn_idx(op); + ir_node *op = get_irn_n(insn, i); + int op_idx = get_irn_idx(op); op_idx = uf_find(congruence_classes, op_idx); /* do we interfere with the value */ + bool interferes = false; foreach_ir_nodeset(live_nodes, live, iter) { int lv_idx = get_irn_idx(live); lv_idx = uf_find(congruence_classes, lv_idx); @@ -443,7 +377,7 @@ static void congruence_def(ir_nodeset_t *live_nodes, const ir_node *node) if (interferes) continue; - node_idx = uf_union(congruence_classes, node_idx, op_idx); + uf_union(congruence_classes, node_idx, op_idx); DB((dbg, LEVEL_3, "Merge %+F and %+F congruence classes\n", node, op)); /* one should_be_same is enough... */ @@ -454,53 +388,48 @@ static void congruence_def(ir_nodeset_t *live_nodes, const ir_node *node) static void create_congruence_class(ir_node *block, void *data) { - ir_nodeset_t live_nodes; - ir_node *node; + ir_nodeset_t live_nodes; (void) data; ir_nodeset_init(&live_nodes); be_liveness_end_of_block(lv, cls, block, &live_nodes); /* check should be same constraints */ + ir_node *last_phi = NULL; sched_foreach_reverse(block, node) { - ir_node *value; - if (is_Phi(node)) + if (is_Phi(node)) { + last_phi = node; break; + } - be_foreach_definition(node, cls, value, - congruence_def(&live_nodes, value); + be_foreach_definition(node, cls, value, req, + congruence_def(&live_nodes, value, req); ); be_liveness_transfer(cls, node, &live_nodes); } + if (!last_phi) { + ir_nodeset_destroy(&live_nodes); + return; + } /* check phi congruence classes */ - sched_foreach_reverse_from(node, node) { - int i; - int arity; - int node_idx; - assert(is_Phi(node)); + sched_foreach_reverse_from(last_phi, phi) { + assert(is_Phi(phi)); - if (!arch_irn_consider_in_reg_alloc(cls, node)) + if (!arch_irn_consider_in_reg_alloc(cls, phi)) continue; - node_idx = get_irn_idx(node); + int node_idx = get_irn_idx(phi); node_idx = uf_find(congruence_classes, node_idx); - arity = get_irn_arity(node); - for (i = 0; i < arity; ++i) { - bool interferes = false; - ir_nodeset_iterator_t iter; - unsigned r; - int old_node_idx; - ir_node *live; - ir_node *phi; - allocation_info_t *head_info; - allocation_info_t *other_info; - ir_node *op = get_Phi_pred(node, i); - int op_idx = get_irn_idx(op); + int arity = get_irn_arity(phi); + for (int i = 0; i < arity; ++i) { + ir_node *op = get_Phi_pred(phi, i); + int op_idx = get_irn_idx(op); op_idx = uf_find(congruence_classes, op_idx); /* do we interfere with the value */ + bool interferes = false; foreach_ir_nodeset(&live_nodes, live, iter) { int lv_idx = get_irn_idx(live); lv_idx = uf_find(congruence_classes, lv_idx); @@ -534,30 +463,30 @@ static void create_congruence_class(ir_node *block, void *data) continue; /* merge the 2 congruence classes and sum up their preferences */ - old_node_idx = node_idx; + int old_node_idx = node_idx; node_idx = uf_union(congruence_classes, node_idx, op_idx); DB((dbg, LEVEL_3, "Merge %+F and %+F congruence classes\n", - node, op)); + phi, op)); old_node_idx = node_idx == old_node_idx ? op_idx : old_node_idx; - head_info = get_allocation_info(get_idx_irn(irg, node_idx)); - other_info = get_allocation_info(get_idx_irn(irg, old_node_idx)); - for (r = 0; r < n_regs; ++r) { + allocation_info_t *head_info + = get_allocation_info(get_idx_irn(irg, node_idx)); + allocation_info_t *other_info + = get_allocation_info(get_idx_irn(irg, old_node_idx)); + for (unsigned r = 0; r < n_regs; ++r) { head_info->prefs[r] += other_info->prefs[r]; } } } + ir_nodeset_destroy(&live_nodes); } static void set_congruence_prefs(ir_node *node, void *data) { - allocation_info_t *info; - allocation_info_t *head_info; + (void) data; unsigned node_idx = get_irn_idx(node); unsigned node_set = uf_find(congruence_classes, node_idx); - (void) data; - /* head of congruence class or not in any class */ if (node_set == node_idx) return; @@ -565,8 +494,9 @@ static void set_congruence_prefs(ir_node *node, void *data) if (!arch_irn_consider_in_reg_alloc(cls, node)) return; - head_info = get_allocation_info(get_idx_irn(irg, node_set)); - info = get_allocation_info(node); + ir_node *head = get_idx_irn(irg, node_set); + allocation_info_t *head_info = get_allocation_info(head); + allocation_info_t *info = get_allocation_info(node); memcpy(info->prefs, head_info->prefs, n_regs * sizeof(info->prefs[0])); } @@ -592,27 +522,28 @@ static void combine_congruence_classes(void) * @param node the node * @param reg the register */ -static void use_reg(ir_node *node, const arch_register_t *reg) +static void use_reg(ir_node *node, const arch_register_t *reg, unsigned width) { - unsigned r = arch_register_get_index(reg); - assignments[r] = node; + unsigned r = reg->index; + for (unsigned r0 = r; r0 < r + width; ++r0) + assignments[r0] = node; arch_set_irn_register(node, reg); } static void free_reg_of_value(ir_node *node) { - const arch_register_t *reg; - unsigned r; - if (!arch_irn_consider_in_reg_alloc(cls, node)) return; - reg = arch_get_irn_register(node); - r = arch_register_get_index(reg); + const arch_register_t *reg = arch_get_irn_register(node); + const arch_register_req_t *req = arch_get_irn_register_req(node); + unsigned r = reg->index; /* assignment->value may be NULL if a value is used at 2 inputs - so it gets freed twice. */ - assert(assignments[r] == node || assignments[r] == NULL); - assignments[r] = NULL; + * so it gets freed twice. */ + for (unsigned r0 = r; r0 < r + req->width; ++r0) { + assert(assignments[r0] == node || assignments[r0] == NULL); + assignments[r0] = NULL; + } } /** @@ -632,9 +563,7 @@ static int compare_reg_pref(const void *e1, const void *e2) static void fill_sort_candidates(reg_pref_t *regprefs, const allocation_info_t *info) { - unsigned r; - - for (r = 0; r < n_regs; ++r) { + for (unsigned r = 0; r < n_regs; ++r) { float pref = info->prefs[r]; regprefs[r].num = r; regprefs[r].pref = pref; @@ -647,45 +576,31 @@ static bool try_optimistic_split(ir_node *to_split, ir_node *before, float pref, float pref_delta, unsigned *forbidden_regs, int recursion) { - const arch_register_t *from_reg; - const arch_register_t *reg; - ir_node *original_insn; - ir_node *block; - ir_node *copy; - unsigned r; - unsigned from_r; - unsigned i; - allocation_info_t *info = get_allocation_info(to_split); - reg_pref_t *prefs; - float delta; - float split_threshold; - (void) pref; + unsigned r = 0; + allocation_info_t *info = get_allocation_info(to_split); + float delta = 0; - /* stupid hack: don't optimisticallt split don't spill nodes... + /* stupid hack: don't optimistically split don't spill nodes... * (so we don't split away the values produced because of * must_be_different constraints) */ - original_insn = skip_Proj(info->original_value); - if (arch_irn_get_flags(original_insn) & arch_irn_flags_dont_spill) + ir_node *original_insn = skip_Proj(info->original_value); + if (arch_get_irn_flags(original_insn) & arch_irn_flags_dont_spill) return false; - from_reg = arch_get_irn_register(to_split); - from_r = arch_register_get_index(from_reg); - block = get_nodes_block(before); - split_threshold = (float)get_block_execfreq(execfreqs, block) * SPLIT_DELTA; + const arch_register_t *from_reg = arch_get_irn_register(to_split); + unsigned from_r = from_reg->index; + ir_node *block = get_nodes_block(before); + float split_threshold = (float)get_block_execfreq(block) * SPLIT_DELTA; if (pref_delta < split_threshold*0.5) return false; /* find the best free position where we could move to */ - prefs = ALLOCAN(reg_pref_t, n_regs); + reg_pref_t *prefs = ALLOCAN(reg_pref_t, n_regs); fill_sort_candidates(prefs, info); + unsigned i; for (i = 0; i < n_regs; ++i) { - float apref; - float apref_delta; - bool res; - bool old_source_state; - /* we need a normal register which is not an output register an different from the current register of to_split */ r = prefs[i].num; @@ -712,17 +627,17 @@ static bool try_optimistic_split(ir_node *to_split, ir_node *before, if (recursion+1 > MAX_OPTIMISTIC_SPLIT_RECURSION) continue; - apref = prefs[i].pref; - apref_delta = i+1 < n_regs ? apref - prefs[i+1].pref : 0; + float apref = prefs[i].pref; + float apref_delta = i+1 < n_regs ? apref - prefs[i+1].pref : 0; apref_delta += pref_delta - split_threshold; /* our source register isn't a useful destination for recursive splits */ - old_source_state = rbitset_is_set(forbidden_regs, from_r); + bool old_source_state = rbitset_is_set(forbidden_regs, from_r); rbitset_set(forbidden_regs, from_r); /* try recursive split */ - res = try_optimistic_split(assignments[r], before, apref, - apref_delta, forbidden_regs, recursion+1); + bool res = try_optimistic_split(assignments[r], before, apref, + apref_delta, forbidden_regs, recursion+1); /* restore our destination */ if (old_source_state) { rbitset_set(forbidden_regs, from_r); @@ -736,13 +651,14 @@ static bool try_optimistic_split(ir_node *to_split, ir_node *before, if (i >= n_regs) return false; - reg = arch_register_for_index(cls, r); - copy = be_new_Copy(cls, block, to_split); + const arch_register_t *reg = arch_register_for_index(cls, r); + ir_node *copy = be_new_Copy(block, to_split); + unsigned width = 1; mark_as_copy_of(copy, to_split); /* hacky, but correct here */ - if (assignments[arch_register_get_index(from_reg)] == to_split) + if (assignments[from_reg->index] == to_split) free_reg_of_value(to_split); - use_reg(copy, reg); + use_reg(copy, reg, width); sched_add_before(before, copy); DB((dbg, LEVEL_3, @@ -754,109 +670,104 @@ static bool try_optimistic_split(ir_node *to_split, ir_node *before, /** * Determine and assign a register for node @p node */ -static void assign_reg(const ir_node *block, ir_node *node, - unsigned *forbidden_regs) +static void assign_reg(ir_node const *const block, ir_node *const node, arch_register_req_t const *const req, unsigned *const forbidden_regs) { - const arch_register_t *reg; - allocation_info_t *info; - const arch_register_req_t *req; - reg_pref_t *reg_prefs; - ir_node *in_node; - unsigned i; - const unsigned *allowed_regs; - unsigned r; - assert(!is_Phi(node)); - assert(arch_irn_consider_in_reg_alloc(cls, node)); - /* preassigned register? */ - reg = arch_get_irn_register(node); - if (reg != NULL) { - DB((dbg, LEVEL_2, "Preassignment %+F -> %s\n", node, reg->name)); - use_reg(node, reg); + arch_register_t const *final_reg = arch_get_irn_register(node); + unsigned const width = req->width; + if (final_reg != NULL) { + DB((dbg, LEVEL_2, "Preassignment %+F -> %s\n", node, final_reg->name)); + use_reg(node, final_reg, width); return; } - /* give should_be_same boni */ - info = get_allocation_info(node); - req = arch_get_register_req_out(node); + /* ignore reqs must be preassigned */ + assert(!arch_register_req_is(req, ignore)); - in_node = skip_Proj(node); - if (req->type & arch_register_req_type_should_be_same) { - float weight = (float)get_block_execfreq(execfreqs, block); + /* give should_be_same boni */ + allocation_info_t *info = get_allocation_info(node); + ir_node *in_node = skip_Proj(node); + if (arch_register_req_is(req, should_be_same)) { + float weight = (float)get_block_execfreq(block); int arity = get_irn_arity(in_node); - int i; assert(arity <= (int) sizeof(req->other_same) * 8); - for (i = 0; i < arity; ++i) { - ir_node *in; - const arch_register_t *reg; - unsigned r; + for (int i = 0; i < arity; ++i) { if (!rbitset_is_set(&req->other_same, i)) continue; - in = get_irn_n(in_node, i); - reg = arch_get_irn_register(in); - assert(reg != NULL); - r = arch_register_get_index(reg); + ir_node *in = get_irn_n(in_node, i); + const arch_register_t *reg = arch_get_irn_register(in); + unsigned reg_index = reg->index; /* if the value didn't die here then we should not propagate the * should_be_same info */ - if (assignments[r] == in) + if (assignments[reg_index] == in) continue; - info->prefs[r] += weight * AFF_SHOULD_BE_SAME; + info->prefs[reg_index] += weight * AFF_SHOULD_BE_SAME; } } /* create list of register candidates and sort by their preference */ DB((dbg, LEVEL_2, "Candidates for %+F:", node)); - reg_prefs = alloca(n_regs * sizeof(reg_prefs[0])); + reg_pref_t *reg_prefs = ALLOCAN(reg_pref_t, n_regs); fill_sort_candidates(reg_prefs, info); - for (i = 0; i < n_regs; ++i) { - unsigned num = reg_prefs[i].num; - const arch_register_t *reg; - + for (unsigned r = 0; r < n_regs; ++r) { + unsigned num = reg_prefs[r].num; if (!rbitset_is_set(normal_regs, num)) continue; - - reg = arch_register_for_index(cls, num); - DB((dbg, LEVEL_2, " %s(%f)", reg->name, reg_prefs[i].pref)); + const arch_register_t *reg = arch_register_for_index(cls, num); + DB((dbg, LEVEL_2, " %s(%f)", reg->name, reg_prefs[r].pref)); } DB((dbg, LEVEL_2, "\n")); - allowed_regs = normal_regs; - if (req->type & arch_register_req_type_limited) { + const unsigned *allowed_regs = normal_regs; + if (arch_register_req_is(req, limited)) { allowed_regs = req->limited; } - for (i = 0; i < n_regs; ++i) { - float pref, delta; - ir_node *before; - bool res; - - r = reg_prefs[i].num; - if (!rbitset_is_set(allowed_regs, r)) + unsigned final_reg_index = 0; + unsigned r; + for (r = 0; r < n_regs; ++r) { + final_reg_index = reg_prefs[r].num; + if (!rbitset_is_set(allowed_regs, final_reg_index)) continue; - if (assignments[r] == NULL) + /* alignment constraint? */ + if (width > 1) { + if (arch_register_req_is(req, aligned) && (final_reg_index % width) != 0) + continue; + bool fine = true; + for (unsigned r0 = r+1; r0 < r+width; ++r0) { + if (assignments[r0] != NULL) + fine = false; + } + /* TODO: attempt optimistic split here */ + if (!fine) + continue; + } + + if (assignments[final_reg_index] == NULL) break; - pref = reg_prefs[i].pref; - delta = i+1 < n_regs ? pref - reg_prefs[i+1].pref : 0; - before = skip_Proj(node); - res = try_optimistic_split(assignments[r], before, - pref, delta, forbidden_regs, 0); + float pref = reg_prefs[r].pref; + float delta = r+1 < n_regs ? pref - reg_prefs[r+1].pref : 0; + ir_node *before = skip_Proj(node); + bool res + = try_optimistic_split(assignments[final_reg_index], before, pref, + delta, forbidden_regs, 0); if (res) break; } - if (i >= n_regs) { + if (r >= n_regs) { /* the common reason to hit this panic is when 1 of your nodes is not * register pressure faithful */ panic("No register left for %+F\n", node); } - reg = arch_register_for_index(cls, r); - DB((dbg, LEVEL_2, "Assign %+F -> %s\n", node, reg->name)); - use_reg(node, reg); + final_reg = arch_register_for_index(cls, final_reg_index); + DB((dbg, LEVEL_2, "Assign %+F -> %s\n", node, final_reg->name)); + use_reg(node, final_reg, width); } /** @@ -876,7 +787,7 @@ static void assign_reg(const ir_node *block, ir_node *node, * First we count how many destinations a single value has. At the same time * we can be sure that each destination register has at most 1 source register * (it can have 0 which means we don't care what value is in it). - * We ignore all fullfilled permuations (like 7->7) + * We ignore all fulfilled permuations (like 7->7) * In a first pass we create as much copy instructions as possible as they * are generally cheaper than exchanges. We do this by counting into how many * destinations a register has to be copied (in the example it's 2 for register @@ -884,8 +795,8 @@ static void assign_reg(const ir_node *block, ir_node *node, * We can then create a copy into every destination register when the usecount * of that register is 0 (= noone else needs the value in the register). * - * After this step we should have cycles left. We implement a cyclic permutation - * of n registers with n-1 transpositions. + * After this step we should only have cycles left. We implement a cyclic + * permutation of n registers with n-1 transpositions. * * @param live_nodes the set of live nodes, updated due to live range split * @param before the node before we add the permutation @@ -894,14 +805,12 @@ static void assign_reg(const ir_node *block, ir_node *node, * registers. */ static void permute_values(ir_nodeset_t *live_nodes, ir_node *before, - unsigned *permutation) + unsigned *permutation) { - unsigned *n_used = ALLOCANZ(unsigned, n_regs); - ir_node *block; - unsigned r; + unsigned *n_used = ALLOCANZ(unsigned, n_regs); /* determine how often each source register needs to be read */ - for (r = 0; r < n_regs; ++r) { + for (unsigned r = 0; r < n_regs; ++r) { unsigned old_reg = permutation[r]; ir_node *value; @@ -916,14 +825,11 @@ static void permute_values(ir_nodeset_t *live_nodes, ir_node *before, ++n_used[old_reg]; } - block = get_nodes_block(before); + ir_node *block = get_nodes_block(before); /* step1: create copies where immediately possible */ - for (r = 0; r < n_regs; /* empty */) { - ir_node *copy; - ir_node *src; - const arch_register_t *reg; - unsigned old_r = permutation[r]; + for (unsigned r = 0; r < n_regs; /* empty */) { + unsigned old_r = permutation[r]; /* - no need to do anything for fixed points. - we can't copy if the value in the dest reg is still needed */ @@ -933,21 +839,22 @@ static void permute_values(ir_nodeset_t *live_nodes, ir_node *before, } /* create a copy */ - src = assignments[old_r]; - copy = be_new_Copy(cls, block, src); + ir_node *src = assignments[old_r]; + ir_node *copy = be_new_Copy(block, src); sched_add_before(before, copy); - reg = arch_register_for_index(cls, r); + const arch_register_t *reg = arch_register_for_index(cls, r); DB((dbg, LEVEL_2, "Copy %+F (from %+F, before %+F) -> %s\n", copy, src, before, reg->name)); mark_as_copy_of(copy, src); - use_reg(copy, reg); + unsigned width = 1; /* TODO */ + use_reg(copy, reg, width); if (live_nodes != NULL) { ir_nodeset_insert(live_nodes, copy); } /* old register has 1 user less, permutation is resolved */ - assert(arch_register_get_index(arch_get_irn_register(src)) == old_r); + assert(arch_get_irn_register(src)->index == old_r); permutation[r] = r; assert(n_used[old_r] > 0); @@ -975,14 +882,8 @@ static void permute_values(ir_nodeset_t *live_nodes, ir_node *before, * copies are preferable even for 2-cycles) */ /* create perms with the rest */ - for (r = 0; r < n_regs; /* empty */) { - const arch_register_t *reg; - unsigned old_r = permutation[r]; - unsigned r2; - ir_node *in[2]; - ir_node *perm; - ir_node *proj0; - ir_node *proj1; + for (unsigned r = 0; r < n_regs; /* empty */) { + unsigned old_r = permutation[r]; if (old_r == r) { ++r; @@ -993,24 +894,25 @@ static void permute_values(ir_nodeset_t *live_nodes, ir_node *before, assert(n_used[old_r] == 1); /* exchange old_r and r2; after that old_r is a fixed point */ - r2 = permutation[old_r]; + unsigned r2 = permutation[old_r]; - in[0] = assignments[r2]; - in[1] = assignments[old_r]; - perm = be_new_Perm(cls, block, 2, in); + ir_node *in[2] = { assignments[r2], assignments[old_r] }; + ir_node *perm = be_new_Perm(cls, block, 2, in); sched_add_before(before, perm); DB((dbg, LEVEL_2, "Perm %+F (perm %+F,%+F, before %+F)\n", perm, in[0], in[1], before)); - proj0 = new_r_Proj(perm, get_irn_mode(in[0]), 0); + unsigned width = 1; /* TODO */ + + ir_node *proj0 = new_r_Proj(perm, get_irn_mode(in[0]), 0); mark_as_copy_of(proj0, in[0]); - reg = arch_register_for_index(cls, old_r); - use_reg(proj0, reg); + const arch_register_t *reg0 = arch_register_for_index(cls, old_r); + use_reg(proj0, reg0, width); - proj1 = new_r_Proj(perm, get_irn_mode(in[1]), 1); + ir_node *proj1 = new_r_Proj(perm, get_irn_mode(in[1]), 1); mark_as_copy_of(proj1, in[1]); - reg = arch_register_for_index(cls, r2); - use_reg(proj1, reg); + const arch_register_t *reg1 = arch_register_for_index(cls, r2); + use_reg(proj1, reg1, width); /* 1 value is now in the correct register */ permutation[old_r] = old_r; @@ -1026,9 +928,9 @@ static void permute_values(ir_nodeset_t *live_nodes, ir_node *before, } } -#ifdef DEBUG_libfirm +#ifndef NDEBUG /* now we should only have fixpoints left */ - for (r = 0; r < n_regs; ++r) { + for (unsigned r = 0; r < n_regs; ++r) { assert(permutation[r] == r); } #endif @@ -1042,19 +944,16 @@ static void permute_values(ir_nodeset_t *live_nodes, ir_node *before, */ static void free_last_uses(ir_nodeset_t *live_nodes, ir_node *node) { - allocation_info_t *info = get_allocation_info(node); - const unsigned *last_uses = info->last_uses; - int arity = get_irn_arity(node); - int i; - - for (i = 0; i < arity; ++i) { - ir_node *op; + allocation_info_t *info = get_allocation_info(node); + const unsigned *last_uses = info->last_uses; + int arity = get_irn_arity(node); + for (int i = 0; i < arity; ++i) { /* check if one operand is the last use */ if (!rbitset_is_set(last_uses, i)) continue; - op = get_irn_n(node, i); + ir_node *op = get_irn_n(node, i); free_reg_of_value(op); ir_nodeset_remove(live_nodes, op); } @@ -1065,10 +964,8 @@ static void free_last_uses(ir_nodeset_t *live_nodes, ir_node *node) */ static void rewire_inputs(ir_node *node) { - int i; int arity = get_irn_arity(node); - - for (i = 0; i < arity; ++i) { + for (int i = 0; i < arity; ++i) { ir_node *op = get_irn_n(node, i); allocation_info_t *info = try_get_allocation_info(op); @@ -1089,12 +986,9 @@ static void rewire_inputs(ir_node *node) static void determine_live_through_regs(unsigned *bitset, ir_node *node) { const allocation_info_t *info = get_allocation_info(node); - unsigned r; - int i; - int arity; /* mark all used registers as potentially live-through */ - for (r = 0; r < n_regs; ++r) { + for (unsigned r = 0; r < n_regs; ++r) { if (assignments[r] == NULL) continue; if (!rbitset_is_set(normal_regs, r)) @@ -1104,18 +998,140 @@ static void determine_live_through_regs(unsigned *bitset, ir_node *node) } /* remove registers of value dying at the instruction */ - arity = get_irn_arity(node); - for (i = 0; i < arity; ++i) { - ir_node *op; - const arch_register_t *reg; - + int arity = get_irn_arity(node); + for (int i = 0; i < arity; ++i) { if (!rbitset_is_set(info->last_uses, i)) continue; - op = get_irn_n(node, i); - reg = arch_get_irn_register(op); - rbitset_clear(bitset, arch_register_get_index(reg)); + ir_node *op = get_irn_n(node, i); + const arch_register_t *reg = arch_get_irn_register(op); + rbitset_clear(bitset, reg->index); + } +} + +static void solve_lpp(ir_nodeset_t *live_nodes, ir_node *node, + unsigned *forbidden_regs, unsigned *live_through_regs) +{ + unsigned *forbidden_edges = rbitset_malloc(n_regs * n_regs); + int *lpp_vars = XMALLOCNZ(int, n_regs*n_regs); + + lpp_t *lpp = lpp_new("prefalloc", lpp_minimize); + //lpp_set_time_limit(lpp, 20); + lpp_set_log(lpp, stdout); + + /** mark some edges as forbidden */ + be_foreach_use(node, cls, req, op, op_req, + if (!arch_register_req_is(req, limited)) + continue; + + const unsigned *limited = req->limited; + const arch_register_t *reg = arch_get_irn_register(op); + unsigned current_reg = reg->index; + for (unsigned r = 0; r < n_regs; ++r) { + if (rbitset_is_set(limited, r)) + continue; + + rbitset_set(forbidden_edges, current_reg*n_regs + r); + } + ); + + /* add all combinations, except for not allowed ones */ + for (unsigned l = 0; l < n_regs; ++l) { + if (!rbitset_is_set(normal_regs, l)) { + char name[15]; + snprintf(name, sizeof(name), "%u_to_%u", l, l); + lpp_vars[l*n_regs+l] = lpp_add_var(lpp, name, lpp_binary, 1); + continue; + } + + for (unsigned r = 0; r < n_regs; ++r) { + if (!rbitset_is_set(normal_regs, r)) + continue; + if (rbitset_is_set(forbidden_edges, l*n_regs + r)) + continue; + /* livethrough values may not use constrained output registers */ + if (rbitset_is_set(live_through_regs, l) + && rbitset_is_set(forbidden_regs, r)) + continue; + + char name[15]; + snprintf(name, sizeof(name), "%u_to_%u", l, r); + + double costs = l==r ? 9 : 8; + lpp_vars[l*n_regs+r] + = lpp_add_var(lpp, name, lpp_binary, costs); + assert(lpp_vars[l*n_regs+r] > 0); + } + } + /* add constraints */ + for (unsigned l = 0; l < n_regs; ++l) { + /* only 1 destination per register */ + int constraint = -1; + for (unsigned r = 0; r < n_regs; ++r) { + int var = lpp_vars[l*n_regs+r]; + if (var == 0) + continue; + if (constraint < 0) { + char name[64]; + snprintf(name, sizeof(name), "%u_to_dest", l); + constraint = lpp_add_cst(lpp, name, lpp_equal, 1); + } + lpp_set_factor_fast(lpp, constraint, var, 1); + } + /* each destination used by at most 1 value */ + constraint = -1; + for (unsigned r = 0; r < n_regs; ++r) { + int var = lpp_vars[r*n_regs+l]; + if (var == 0) + continue; + if (constraint < 0) { + char name[64]; + snprintf(name, sizeof(name), "one_to_%u", l); + constraint = lpp_add_cst(lpp, name, lpp_less_equal, 1); + } + lpp_set_factor_fast(lpp, constraint, var, 1); + } + } + + lpp_dump_plain(lpp, fopen("lppdump.txt", "w")); + + /* solve lpp */ + lpp_solve(lpp, be_options.ilp_server, be_options.ilp_solver); + if (!lpp_is_sol_valid(lpp)) + panic("ilp solution not valid!"); + + unsigned *assignment = ALLOCAN(unsigned, n_regs); + for (unsigned l = 0; l < n_regs; ++l) { + unsigned dest_reg = (unsigned)-1; + for (unsigned r = 0; r < n_regs; ++r) { + int var = lpp_vars[l*n_regs+r]; + if (var == 0) + continue; + double val = lpp_get_var_sol(lpp, var); + if (val == 1) { + assert(dest_reg == (unsigned)-1); + dest_reg = r; + } + } + assert(dest_reg != (unsigned)-1); + assignment[dest_reg] = l; + } + + fprintf(stderr, "Assignment: "); + for (unsigned l = 0; l < n_regs; ++l) { + fprintf(stderr, "%u ", assignment[l]); } + fprintf(stderr, "\n"); + fflush(stdout); + permute_values(live_nodes, node, assignment); + lpp_free(lpp); +} + +static bool is_aligned(unsigned num, unsigned alignment) +{ + unsigned mask = alignment-1; + assert(is_po2(alignment)); + return (num&mask) == 0; } /** @@ -1127,53 +1143,47 @@ static void determine_live_through_regs(unsigned *bitset, ir_node *node) static void enforce_constraints(ir_nodeset_t *live_nodes, ir_node *node, unsigned *forbidden_regs) { - int arity = get_irn_arity(node); - int i, res; - hungarian_problem_t *bp; - unsigned l, r; - unsigned *assignment; - ir_node *value; - - /* construct a list of register occupied by live-through values */ - unsigned *live_through_regs = NULL; - - /* see if any use constraints are not met */ + /* see if any use constraints are not met and whether double-width + * values are involved */ + bool double_width = false; bool good = true; - for (i = 0; i < arity; ++i) { - ir_node *op = get_irn_n(node, i); - const arch_register_t *reg; - const arch_register_req_t *req; - const unsigned *limited; - unsigned r; - - if (!arch_irn_consider_in_reg_alloc(cls, op)) - continue; - + be_foreach_use(node, cls, req, op, op_req, /* are there any limitations for the i'th operand? */ - req = arch_get_register_req(node, i); - if (!(req->type & arch_register_req_type_limited)) + if (req->width > 1) + double_width = true; + const arch_register_t *reg = arch_get_irn_register(op); + unsigned reg_index = reg->index; + if (arch_register_req_is(req, aligned)) { + if (!is_aligned(reg_index, req->width)) { + good = false; + continue; + } + } + if (!arch_register_req_is(req, limited)) continue; - limited = req->limited; - reg = arch_get_irn_register(op); - r = arch_register_get_index(reg); - if (!rbitset_is_set(limited, r)) { + const unsigned *limited = req->limited; + if (!rbitset_is_set(limited, reg_index)) { /* found an assignment outside the limited set */ good = false; - break; + continue; } - } + ); /* is any of the live-throughs using a constrained output register? */ - be_foreach_definition(node, cls, value, - if (! (req_->type & arch_register_req_type_limited)) + unsigned *live_through_regs = NULL; + be_foreach_definition(node, cls, value, req, + (void)value; + if (req->width > 1) + double_width = true; + if (!arch_register_req_is(req, limited)) continue; if (live_through_regs == NULL) { - rbitset_alloca(live_through_regs, n_regs); + live_through_regs = rbitset_alloca(n_regs); determine_live_through_regs(live_through_regs, node); } - rbitset_or(forbidden_regs, req_->limited, n_regs); - if (rbitsets_have_common(req_->limited, live_through_regs, n_regs)) + rbitset_or(forbidden_regs, req->limited, n_regs); + if (rbitsets_have_common(req->limited, live_through_regs, n_regs)) good = false; ); @@ -1182,7 +1192,13 @@ static void enforce_constraints(ir_nodeset_t *live_nodes, ir_node *node, /* create these arrays if we haven't yet */ if (live_through_regs == NULL) { - rbitset_alloca(live_through_regs, n_regs); + live_through_regs = rbitset_alloca(n_regs); + } + + if (double_width) { + /* only the ILP variant can solve this yet */ + solve_lpp(live_nodes, node, forbidden_regs, live_through_regs); + return; } /* at this point we have to construct a bipartite matching problem to see @@ -1191,16 +1207,17 @@ static void enforce_constraints(ir_nodeset_t *live_nodes, ir_node *node, * right, destinations left because this will produce the solution * in the format required for permute_values. */ - bp = hungarian_new(n_regs, n_regs, HUNGARIAN_MATCH_PERFECT); + hungarian_problem_t *bp + = hungarian_new(n_regs, n_regs, HUNGARIAN_MATCH_PERFECT); /* add all combinations, then remove not allowed ones */ - for (l = 0; l < n_regs; ++l) { + for (unsigned l = 0; l < n_regs; ++l) { if (!rbitset_is_set(normal_regs, l)) { hungarian_add(bp, l, l, 1); continue; } - for (r = 0; r < n_regs; ++r) { + for (unsigned r = 0; r < n_regs; ++r) { if (!rbitset_is_set(normal_regs, r)) continue; /* livethrough values may not use constrainted output registers */ @@ -1212,35 +1229,25 @@ static void enforce_constraints(ir_nodeset_t *live_nodes, ir_node *node, } } - for (i = 0; i < arity; ++i) { - ir_node *op = get_irn_n(node, i); - const arch_register_t *reg; - const arch_register_req_t *req; - const unsigned *limited; - unsigned current_reg; - - if (!arch_irn_consider_in_reg_alloc(cls, op)) + be_foreach_use(node, cls, req, op, op_req, + if (!arch_register_req_is(req, limited)) continue; - req = arch_get_register_req(node, i); - if (!(req->type & arch_register_req_type_limited)) - continue; - - limited = req->limited; - reg = arch_get_irn_register(op); - current_reg = arch_register_get_index(reg); - for (r = 0; r < n_regs; ++r) { + const unsigned *limited = req->limited; + const arch_register_t *reg = arch_get_irn_register(op); + unsigned current_reg = reg->index; + for (unsigned r = 0; r < n_regs; ++r) { if (rbitset_is_set(limited, r)) continue; hungarian_remove(bp, r, current_reg); } - } + ); //hungarian_print_cost_matrix(bp, 1); hungarian_prepare_cost_matrix(bp, HUNGARIAN_MODE_MAXIMIZE_UTIL); - assignment = ALLOCAN(unsigned, n_regs); - res = hungarian_solve(bp, assignment, NULL, 0); + unsigned *assignment = ALLOCAN(unsigned, n_regs); + int res = hungarian_solve(bp, assignment, NULL, 0); assert(res == 0); #if 0 @@ -1256,17 +1263,14 @@ static void enforce_constraints(ir_nodeset_t *live_nodes, ir_node *node, permute_values(live_nodes, node, assignment); } -/** test wether a node @p n is a copy of the value of node @p of */ +/** test whether a node @p n is a copy of the value of node @p of */ static bool is_copy_of(ir_node *value, ir_node *test_value) { - allocation_info_t *test_info; - allocation_info_t *info; - if (value == test_value) return true; - info = get_allocation_info(value); - test_info = get_allocation_info(test_value); + allocation_info_t *info = get_allocation_info(value); + allocation_info_t *test_info = get_allocation_info(test_value); return test_info->original_value == info->original_value; } @@ -1277,10 +1281,9 @@ static bool is_copy_of(ir_node *value, ir_node *test_value) */ static int find_value_in_block_info(block_info_t *info, ir_node *value) { - unsigned r; - ir_node **assignments = info->assignments; - for (r = 0; r < n_regs; ++r) { - ir_node *a_value = assignments[r]; + ir_node **end_assignments = info->assignments; + for (unsigned r = 0; r < n_regs; ++r) { + ir_node *a_value = end_assignments[r]; if (a_value == NULL) continue; @@ -1297,80 +1300,66 @@ static int find_value_in_block_info(block_info_t *info, ir_node *value) */ static void add_phi_permutations(ir_node *block, int p) { - unsigned r; - unsigned *permutation; - ir_node **old_assignments; - bool need_permutation; - ir_node *node; - ir_node *pred = get_Block_cfgpred_block(block, p); - + ir_node *pred = get_Block_cfgpred_block(block, p); block_info_t *pred_info = get_block_info(pred); /* predecessor not processed yet? nothing to do */ if (!pred_info->processed) return; - permutation = ALLOCAN(unsigned, n_regs); - for (r = 0; r < n_regs; ++r) { + unsigned *permutation = ALLOCAN(unsigned, n_regs); + for (unsigned r = 0; r < n_regs; ++r) { permutation[r] = r; } /* check phi nodes */ - need_permutation = false; - node = sched_first(block); - for ( ; is_Phi(node); node = sched_next(node)) { - const arch_register_t *reg; - int regn; - int a; - ir_node *op; - - if (!arch_irn_consider_in_reg_alloc(cls, node)) + bool need_permutation = false; + ir_node *phi = sched_first(block); + for ( ; is_Phi(phi); phi = sched_next(phi)) { + if (!arch_irn_consider_in_reg_alloc(cls, phi)) continue; - op = get_Phi_pred(node, p); - if (!arch_irn_consider_in_reg_alloc(cls, op)) + ir_node *phi_pred = get_Phi_pred(phi, p); + int a = find_value_in_block_info(pred_info, phi_pred); + assert(a >= 0); + + const arch_register_t *reg = arch_get_irn_register(phi); + int regn = reg->index; + /* same register? nothing to do */ + if (regn == a) continue; - a = find_value_in_block_info(pred_info, op); - assert(a >= 0); + ir_node *op = pred_info->assignments[a]; + const arch_register_t *op_reg = arch_get_irn_register(op); + /* Virtual registers are ok, too. */ + if (op_reg->type & arch_register_type_virtual) + continue; - reg = arch_get_irn_register(node); - regn = arch_register_get_index(reg); - if (regn != a) { - permutation[regn] = a; - need_permutation = true; - } + permutation[regn] = a; + need_permutation = true; } if (need_permutation) { /* permute values at end of predecessor */ - old_assignments = assignments; + ir_node **old_assignments = assignments; assignments = pred_info->assignments; permute_values(NULL, be_get_end_of_block_insertion_point(pred), - permutation); + permutation); assignments = old_assignments; } /* change phi nodes to use the copied values */ - node = sched_first(block); - for ( ; is_Phi(node); node = sched_next(node)) { - int a; - ir_node *op; - - if (!arch_irn_consider_in_reg_alloc(cls, node)) - continue; - - op = get_Phi_pred(node, p); - /* no need to do anything for Unknown inputs */ - if (!arch_irn_consider_in_reg_alloc(cls, op)) + phi = sched_first(block); + for ( ; is_Phi(phi); phi = sched_next(phi)) { + if (!arch_irn_consider_in_reg_alloc(cls, phi)) continue; /* we have permuted all values into the correct registers so we can simply query which value occupies the phis register in the predecessor */ - a = arch_register_get_index(arch_get_irn_register(node)); - op = pred_info->assignments[a]; - set_Phi_pred(node, p, op); + int a = arch_get_irn_register(phi)->index; + ir_node *op = pred_info->assignments[a]; + set_Phi_pred(phi, p, op); } } @@ -1380,33 +1369,27 @@ static void add_phi_permutations(ir_node *block, int p) */ static void adapt_phi_prefs(ir_node *phi) { - int i; - int arity = get_irn_arity(phi); ir_node *block = get_nodes_block(phi); allocation_info_t *info = get_allocation_info(phi); - for (i = 0; i < arity; ++i) { + int arity = get_irn_arity(phi); + for (int i = 0; i < arity; ++i) { ir_node *op = get_irn_n(phi, i); const arch_register_t *reg = arch_get_irn_register(op); - ir_node *pred_block; - block_info_t *pred_block_info; - float weight; - unsigned r; if (reg == NULL) continue; /* we only give the bonus if the predecessor already has registers * assigned, otherwise we only see a dummy value * and any conclusions about its register are useless */ - pred_block = get_Block_cfgpred_block(block, i); - pred_block_info = get_block_info(pred_block); + ir_node *pred_block = get_Block_cfgpred_block(block, i); + block_info_t *pred_block_info = get_block_info(pred_block); if (!pred_block_info->processed) continue; /* give bonus for already assigned register */ - weight = (float)get_block_execfreq(execfreqs, pred_block); - r = arch_register_get_index(reg); - info->prefs[r] += weight * AFF_PHI; + float weight = (float)get_block_execfreq(pred_block); + info->prefs[reg->index] += weight * AFF_PHI; } } @@ -1416,23 +1399,21 @@ static void adapt_phi_prefs(ir_node *phi) */ static void propagate_phi_register(ir_node *phi, unsigned assigned_r) { - int i; ir_node *block = get_nodes_block(phi); - int arity = get_irn_arity(phi); - for (i = 0; i < arity; ++i) { + int arity = get_irn_arity(phi); + for (int i = 0; i < arity; ++i) { ir_node *op = get_Phi_pred(phi, i); allocation_info_t *info = get_allocation_info(op); ir_node *pred_block = get_Block_cfgpred_block(block, i); - unsigned r; float weight - = (float)get_block_execfreq(execfreqs, pred_block) * AFF_PHI; + = (float)get_block_execfreq(pred_block) * AFF_PHI; if (info->prefs[assigned_r] >= weight) continue; /* promote the prefered register */ - for (r = 0; r < n_regs; ++r) { + for (unsigned r = 0; r < n_regs; ++r) { if (info->prefs[r] > -weight) { info->prefs[r] = -weight; } @@ -1446,14 +1427,8 @@ static void propagate_phi_register(ir_node *phi, unsigned assigned_r) static void assign_phi_registers(ir_node *block) { - int n_phis = 0; - int n; - int res; - unsigned *assignment; - ir_node *node; - hungarian_problem_t *bp; - /* count phi nodes */ + int n_phis = 0; sched_foreach(block, node) { if (!is_Phi(node)) break; @@ -1466,12 +1441,10 @@ static void assign_phi_registers(ir_node *block) return; /* build a bipartite matching problem for all phi nodes */ - bp = hungarian_new(n_phis, n_regs, HUNGARIAN_MATCH_PERFECT); - n = 0; + hungarian_problem_t *bp + = hungarian_new(n_phis, n_regs, HUNGARIAN_MATCH_PERFECT); + int n = 0; sched_foreach(block, node) { - unsigned r; - - allocation_info_t *info; if (!is_Phi(node)) break; if (!arch_irn_consider_in_reg_alloc(cls, node)) @@ -1480,15 +1453,13 @@ static void assign_phi_registers(ir_node *block) /* give boni for predecessor colorings */ adapt_phi_prefs(node); /* add stuff to bipartite problem */ - info = get_allocation_info(node); + allocation_info_t *info = get_allocation_info(node); DB((dbg, LEVEL_3, "Prefs for %+F: ", node)); - for (r = 0; r < n_regs; ++r) { - float costs; - + for (unsigned r = 0; r < n_regs; ++r) { if (!rbitset_is_set(normal_regs, r)) continue; - costs = info->prefs[r]; + float costs = info->prefs[r]; costs = costs < 0 ? -logf(-costs+1) : logf(costs+1); costs *= 100; costs += 10000; @@ -1503,83 +1474,86 @@ static void assign_phi_registers(ir_node *block) //hungarian_print_cost_matrix(bp, 7); hungarian_prepare_cost_matrix(bp, HUNGARIAN_MODE_MAXIMIZE_UTIL); - assignment = ALLOCAN(unsigned, n_regs); - res = hungarian_solve(bp, assignment, NULL, 0); + unsigned *assignment = ALLOCAN(unsigned, n_regs); + int res = hungarian_solve(bp, assignment, NULL, 0); assert(res == 0); /* apply results */ n = 0; sched_foreach(block, node) { - unsigned r; - const arch_register_t *reg; - if (!is_Phi(node)) break; if (!arch_irn_consider_in_reg_alloc(cls, node)) continue; + const arch_register_req_t *req + = arch_get_irn_register_req(node); - r = assignment[n++]; + unsigned r = assignment[n++]; assert(rbitset_is_set(normal_regs, r)); - reg = arch_register_for_index(cls, r); + const arch_register_t *reg = arch_register_for_index(cls, r); DB((dbg, LEVEL_2, "Assign %+F -> %s\n", node, reg->name)); - use_reg(node, reg); + use_reg(node, reg, req->width); /* adapt preferences for phi inputs */ - if (propagate_phi_registers) - propagate_phi_register(node, r); + propagate_phi_register(node, r); } } +static arch_register_req_t *allocate_reg_req(ir_graph *irg) +{ + struct obstack *obst = be_get_be_obst(irg); + arch_register_req_t *req = OALLOCZ(obst, arch_register_req_t); + return req; +} + /** * Walker: assign registers to all nodes of a block that * need registers from the currently considered register class. */ static void allocate_coalesce_block(ir_node *block, void *data) { - int i; - ir_nodeset_t live_nodes; - ir_node *node; - int n_preds; - block_info_t *block_info; - block_info_t **pred_block_infos; - ir_node **phi_ins; - unsigned *forbidden_regs; /**< collects registers which must - not be used for optimistic splits */ - (void) data; DB((dbg, LEVEL_2, "* Block %+F\n", block)); /* clear assignments */ - block_info = get_block_info(block); + block_info_t *block_info = get_block_info(block); assignments = block_info->assignments; + ir_nodeset_t live_nodes; ir_nodeset_init(&live_nodes); /* gather regalloc infos of predecessor blocks */ - n_preds = get_Block_n_cfgpreds(block); - pred_block_infos = ALLOCAN(block_info_t*, n_preds); - for (i = 0; i < n_preds; ++i) { + int n_preds = get_Block_n_cfgpreds(block); + block_info_t **pred_block_infos = ALLOCAN(block_info_t*, n_preds); + for (int i = 0; i < n_preds; ++i) { ir_node *pred = get_Block_cfgpred_block(block, i); block_info_t *pred_info = get_block_info(pred); pred_block_infos[i] = pred_info; } - phi_ins = ALLOCAN(ir_node*, n_preds); + ir_node **phi_ins = ALLOCAN(ir_node*, n_preds); /* collect live-in nodes and preassigned values */ - be_lv_foreach(lv, block, be_lv_state_in, i) { - const arch_register_t *reg; - int p; - bool need_phi = false; + be_lv_foreach(lv, block, be_lv_state_in, node) { + const arch_register_req_t *req = arch_get_irn_register_req(node); + if (req->cls != cls) + continue; - node = be_lv_get_irn(lv, block, i); - if (!arch_irn_consider_in_reg_alloc(cls, node)) + if (arch_register_req_is(req, ignore)) { + allocation_info_t *info = get_allocation_info(node); + info->current_value = node; + + const arch_register_t *reg = arch_get_irn_register(node); + assert(reg != NULL); /* ignore values must be preassigned */ + use_reg(node, reg, req->width); continue; + } /* check all predecessors for this value, if it is not everywhere the same or unknown then we have to construct a phi (we collect the potential phi inputs here) */ - for (p = 0; p < n_preds; ++p) { + bool need_phi = false; + for (int p = 0; p < n_preds; ++p) { block_info_t *pred_info = pred_block_infos[p]; if (!pred_info->processed) { @@ -1600,22 +1574,24 @@ static void allocate_coalesce_block(ir_node *block, void *data) } if (need_phi) { - ir_mode *mode = get_irn_mode(node); - const arch_register_req_t *req = get_default_req_current_cls(); - ir_node *phi; - - phi = new_r_Phi(block, n_preds, phi_ins, mode); - be_set_phi_reg_req(phi, req); + ir_mode *mode = get_irn_mode(node); + const arch_register_req_t *phi_req = cls->class_req; + if (req->width > 1) { + arch_register_req_t *new_req = allocate_reg_req(irg); + new_req->cls = cls; + new_req->type = req->type & arch_register_req_type_aligned; + new_req->width = req->width; + phi_req = new_req; + } + ir_node *phi = be_new_Phi(block, n_preds, phi_ins, mode, + phi_req); DB((dbg, LEVEL_3, "Create Phi %+F (for %+F) -", phi, node)); #ifdef DEBUG_libfirm - { - int i; - for (i = 0; i < n_preds; ++i) { - DB((dbg, LEVEL_3, " %+F", phi_ins[i])); - } - DB((dbg, LEVEL_3, "\n")); + for (int pi = 0; pi < n_preds; ++pi) { + DB((dbg, LEVEL_3, " %+F", phi_ins[pi])); } + DB((dbg, LEVEL_3, "\n")); #endif mark_as_copy_of(phi, node); sched_add_after(block, phi); @@ -1632,37 +1608,31 @@ static void allocate_coalesce_block(ir_node *block, void *data) } /* if the node already has a register assigned use it */ - reg = arch_get_irn_register(node); + const arch_register_t *reg = arch_get_irn_register(node); if (reg != NULL) { - use_reg(node, reg); + use_reg(node, reg, req->width); } /* remember that this node is live at the beginning of the block */ ir_nodeset_insert(&live_nodes, node); } - rbitset_alloca(forbidden_regs, n_regs); + /** Collects registers which must not be used for optimistic splits. */ + unsigned *const forbidden_regs = rbitset_alloca(n_regs); /* handle phis... */ assign_phi_registers(block); /* all live-ins must have a register */ -#ifdef DEBUG_libfirm - { - ir_nodeset_iterator_t iter; - foreach_ir_nodeset(&live_nodes, node, iter) { - const arch_register_t *reg = arch_get_irn_register(node); - assert(reg != NULL); - } +#ifndef NDEBUG + foreach_ir_nodeset(&live_nodes, node, iter) { + const arch_register_t *reg = arch_get_irn_register(node); + assert(reg != NULL); } #endif /* assign instructions in the block */ sched_foreach(block, node) { - int i; - int arity; - ir_node *value; - /* phis are already assigned */ if (is_Phi(node)) continue; @@ -1676,24 +1646,17 @@ static void allocate_coalesce_block(ir_node *block, void *data) rewire_inputs(node); /* we may not use registers used for inputs for optimistic splits */ - arity = get_irn_arity(node); - for (i = 0; i < arity; ++i) { - ir_node *op = get_irn_n(node, i); - const arch_register_t *reg; - if (!arch_irn_consider_in_reg_alloc(cls, op)) - continue; - - reg = arch_get_irn_register(op); - rbitset_set(forbidden_regs, arch_register_get_index(reg)); - } + be_foreach_use(node, cls, in_req, op, op_req, + const arch_register_t *reg = arch_get_irn_register(op); + rbitset_set(forbidden_regs, reg->index); + ); /* free registers of values last used at this instruction */ free_last_uses(&live_nodes, node); /* assign output registers */ - /* TODO: 2 phases: first: pre-assigned ones, 2nd real regs */ - be_foreach_definition(node, cls, value, - assign_reg(block, value, forbidden_regs); + be_foreach_definition_(node, cls, value, req, + assign_reg(block, value, req, forbidden_regs); ); } @@ -1704,8 +1667,7 @@ static void allocate_coalesce_block(ir_node *block, void *data) /* permute values at end of predecessor blocks in case of phi-nodes */ if (n_preds > 1) { - int p; - for (p = 0; p < n_preds; ++p) { + for (int p = 0; p < n_preds; ++p) { add_phi_permutations(block, p); } } @@ -1733,43 +1695,41 @@ struct block_costs_t { static int cmp_block_costs(const void *d1, const void *d2) { - const ir_node * const *block1 = d1; - const ir_node * const *block2 = d2; - const block_costs_t *info1 = get_irn_link(*block1); - const block_costs_t *info2 = get_irn_link(*block2); + const ir_node * const *block1 = (const ir_node**)d1; + const ir_node * const *block2 = (const ir_node**)d2; + const block_costs_t *info1 = (const block_costs_t*)get_irn_link(*block1); + const block_costs_t *info2 = (const block_costs_t*)get_irn_link(*block2); return QSORT_CMP(info2->costs, info1->costs); } static void determine_block_order(void) { - int i; ir_node **blocklist = be_get_cfgpostorder(irg); - int n_blocks = ARR_LEN(blocklist); + size_t n_blocks = ARR_LEN(blocklist); int dfs_num = 0; pdeq *worklist = new_pdeq(); ir_node **order = XMALLOCN(ir_node*, n_blocks); - int order_p = 0; + size_t order_p = 0; /* clear block links... */ - for (i = 0; i < n_blocks; ++i) { - ir_node *block = blocklist[i]; + for (size_t p = 0; p < n_blocks; ++p) { + ir_node *block = blocklist[p]; set_irn_link(block, NULL); } /* walk blocks in reverse postorder, the costs for each block are the * sum of the costs of its predecessors (excluding the costs on backedges * which we can't determine) */ - for (i = n_blocks-1; i >= 0; --i) { + for (size_t p = n_blocks; p > 0;) { block_costs_t *cost_info; - ir_node *block = blocklist[i]; + ir_node *block = blocklist[--p]; - float execfreq = (float)get_block_execfreq(execfreqs, block); + float execfreq = (float)get_block_execfreq(block); float costs = execfreq; int n_cfgpreds = get_Block_n_cfgpreds(block); - int p; - for (p = 0; p < n_cfgpreds; ++p) { - ir_node *pred_block = get_Block_cfgpred_block(block, p); - block_costs_t *pred_costs = get_irn_link(pred_block); + for (int p2 = 0; p2 < n_cfgpreds; ++p2) { + ir_node *pred_block = get_Block_cfgpred_block(block, p2); + block_costs_t *pred_costs = (block_costs_t*)get_irn_link(pred_block); /* we don't have any info for backedges */ if (pred_costs == NULL) continue; @@ -1788,25 +1748,24 @@ static void determine_block_order(void) ir_reserve_resources(irg, IR_RESOURCE_BLOCK_VISITED); inc_irg_block_visited(irg); - for (i = 0; i < n_blocks; ++i) { - ir_node *block = blocklist[i]; + for (size_t p = 0; p < n_blocks; ++p) { + ir_node *block = blocklist[p]; if (Block_block_visited(block)) continue; /* continually add predecessors with highest costs to worklist * (without using backedges) */ do { - block_costs_t *info = get_irn_link(block); + block_costs_t *info = (block_costs_t*)get_irn_link(block); ir_node *best_pred = NULL; float best_costs = -1; int n_cfgpred = get_Block_n_cfgpreds(block); - int i; pdeq_putr(worklist, block); mark_Block_block_visited(block); - for (i = 0; i < n_cfgpred; ++i) { + for (int i = 0; i < n_cfgpred; ++i) { ir_node *pred_block = get_Block_cfgpred_block(block, i); - block_costs_t *pred_info = get_irn_link(pred_block); + block_costs_t *pred_info = (block_costs_t*)get_irn_link(pred_block); /* ignore backedges */ if (pred_info->dfs_num > info->dfs_num) @@ -1822,7 +1781,7 @@ static void determine_block_order(void) /* now put all nodes in the worklist in our final order */ while (!pdeq_empty(worklist)) { - ir_node *pblock = pdeq_getr(worklist); + ir_node *pblock = (ir_node*)pdeq_getr(worklist); assert(order_p < n_blocks); order[order_p++] = pblock; } @@ -1841,15 +1800,18 @@ static void determine_block_order(void) n_block_order = n_blocks; } +static void free_block_order(void) +{ + xfree(block_order); +} + /** * Run the register allocator for the current register class. */ static void be_pref_alloc_cls(void) { - int i; - - lv = be_assure_liveness(irg); - be_liveness_assure_sets(lv); + be_assure_live_sets(irg); + lv = be_get_irg_liveness(irg); ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK); @@ -1858,10 +1820,9 @@ static void be_pref_alloc_cls(void) be_clear_links(irg); irg_block_walk_graph(irg, NULL, analyze_block, NULL); - if (create_congruence_classes) - combine_congruence_classes(); + combine_congruence_classes(); - for (i = 0; i < n_block_order; ++i) { + for (size_t i = 0; i < n_block_order; ++i) { ir_node *block = block_order[i]; allocate_coalesce_block(block, NULL); } @@ -1871,7 +1832,7 @@ static void be_pref_alloc_cls(void) static void dump(int mask, ir_graph *irg, const char *suffix) { - if (be_get_irg_options(irg)->dump_flags & mask) + if (be_options.dump_flags & mask) dump_ir_graph(irg, suffix); } @@ -1885,7 +1846,7 @@ static void spill(void) be_pre_spill_prepare_constr(irg, cls); be_timer_pop(T_RA_CONSTR); - dump(DUMP_RA, irg, "-spillprepare"); + dump(DUMP_RA, irg, "spillprepare"); /* spill */ be_timer_push(T_RA_SPILL); @@ -1896,7 +1857,7 @@ static void spill(void) check_for_memory_operands(irg); be_timer_pop(T_RA_SPILL_APPLY); - dump(DUMP_RA, irg, "-spill"); + dump(DUMP_RA, irg, "spill"); } /** @@ -1904,21 +1865,17 @@ static void spill(void) */ static void be_pref_alloc(ir_graph *new_irg) { - const arch_env_t *arch_env = be_get_irg_arch_env(new_irg); - int n_cls = arch_env_get_n_reg_class(arch_env); - int c; - obstack_init(&obst); - irg = new_irg; - execfreqs = be_get_irg_exec_freq(irg); + irg = new_irg; /* determine a good coloring order */ determine_block_order(); - for (c = 0; c < n_cls; ++c) { - cls = arch_env_get_reg_class(arch_env, c); - default_cls_req = NULL; + const arch_env_t *arch_env = be_get_irg_arch_env(new_irg); + int n_cls = arch_env->n_register_classes; + for (int c = 0; c < n_cls; ++c) { + cls = &arch_env->register_classes[c]; if (arch_register_class_flags(cls) & arch_register_class_flag_manual_ra) continue; @@ -1926,16 +1883,16 @@ static void be_pref_alloc(ir_graph *new_irg) n_regs = arch_register_class_n_regs(cls); normal_regs = rbitset_malloc(n_regs); - be_abi_set_non_ignore_regs(be_get_irg_abi(irg), cls, normal_regs); + be_set_allocatable_regs(irg, cls, normal_regs); spill(); /* verify schedule and register pressure */ be_timer_push(T_VERIFY); - if (be_get_irg_options(irg)->verify_option == BE_VERIFY_WARN) { + if (be_options.verify_option == BE_VERIFY_WARN) { be_verify_schedule(irg); be_verify_register_pressure(irg, cls); - } else if (be_get_irg_options(irg)->verify_option == BE_VERIFY_ASSERT) { + } else if (be_options.verify_option == BE_VERIFY_ASSERT) { assert(be_verify_schedule(irg) && "Schedule verification failed"); assert(be_verify_register_pressure(irg, cls) && "Register pressure verification failed"); @@ -1948,21 +1905,22 @@ static void be_pref_alloc(ir_graph *new_irg) /* we most probably constructed new Phis so liveness info is invalid * now */ - /* TODO: test liveness_introduce */ - be_liveness_invalidate(lv); + be_invalidate_live_sets(irg); free(normal_regs); stat_ev_ctx_pop("regcls"); } + free_block_order(); + be_timer_push(T_RA_SPILL_APPLY); be_abi_fix_stack_nodes(irg); be_timer_pop(T_RA_SPILL_APPLY); be_timer_push(T_VERIFY); - if (be_get_irg_options(irg)->verify_option == BE_VERIFY_WARN) { + if (be_options.verify_option == BE_VERIFY_WARN) { be_verify_register_allocation(irg); - } else if (be_get_irg_options(irg)->verify_option == BE_VERIFY_ASSERT) { + } else if (be_options.verify_option == BE_VERIFY_ASSERT) { assert(be_verify_register_allocation(irg) && "Register allocation invalid"); } @@ -1971,16 +1929,10 @@ static void be_pref_alloc(ir_graph *new_irg) obstack_free(&obst, NULL); } -BE_REGISTER_MODULE_CONSTRUCTOR(be_init_pref_alloc); +BE_REGISTER_MODULE_CONSTRUCTOR(be_init_pref_alloc) void be_init_pref_alloc(void) { - static be_ra_t be_ra_pref = { - be_pref_alloc, - }; - lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be"); - lc_opt_entry_t *prefalloc_group = lc_opt_get_grp(be_grp, "prefalloc"); - lc_opt_add_table(prefalloc_group, options); - + static be_ra_t be_ra_pref = { be_pref_alloc }; be_register_allocator("pref", &be_ra_pref); FIRM_DBG_REGISTER(dbg, "firm.be.prefalloc"); }