From: Matthias Braun Date: Fri, 18 Dec 2009 09:14:16 +0000 (+0000) Subject: rename straight alloc to pref alloc to match the name of the paper X-Git-Url: http://nsz.repo.hu/git/?a=commitdiff_plain;h=1f231ec18477e4384a4b3e105270262ae19566de;p=libfirm rename straight alloc to pref alloc to match the name of the paper [r26802] --- diff --git a/ir/be/bemodule.c b/ir/be/bemodule.c index 899be198e..deba9245d 100644 --- a/ir/be/bemodule.c +++ b/ir/be/bemodule.c @@ -69,7 +69,7 @@ void be_init_spillbelady2(void); void be_init_spillbelady3(void); void be_init_ssaconstr(void); void be_init_stabs(void); -void be_init_straight_alloc(void); +void be_init_pref_alloc(void); void be_init_ifg(void); void be_init_irgmod(void); void be_init_loopana(void); @@ -128,7 +128,7 @@ void be_init_modules(void) be_init_spillbelady3(); be_init_daemelspill(); be_init_ssaconstr(); - be_init_straight_alloc(); + be_init_pref_alloc(); be_init_state(); be_init_ifg(); be_init_stabs(); diff --git a/ir/be/benewalloc.c b/ir/be/benewalloc.c deleted file mode 100644 index 8b8e340c6..000000000 --- a/ir/be/benewalloc.c +++ /dev/null @@ -1,2051 +0,0 @@ -/* - * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved. - * - * This file is part of libFirm. - * - * This file may be distributed and/or modified under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation and appearing in the file LICENSE.GPL included in the - * packaging of this file. - * - * Licensees holding valid libFirm Professional Edition licenses may use - * this file in accordance with the libFirm Commercial License. - * Agreement provided with the Software. - * - * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE - * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE. - */ - -/** - * @file - * @brief New approach to allocation and copy coalescing - * @author Matthias Braun - * @date 14.2.2009 - * @version $Id$ - * - * ... WE NEED A NAME FOR THIS ... - * - * Only a proof of concept at this moment... - * - * The idea is to allocate registers in 2 passes: - * 1. A first pass to determine "preferred" registers for live-ranges. This - * calculates for each register and each live-range a value indicating - * the usefulness. (You can roughly think of the value as the negative - * costs needed for copies when the value is in the specific registers...) - * - * 2. Walk blocks and assigns registers in a greedy fashion. Preferring - * registers with high preferences. When register constraints are not met, - * add copies and split live-ranges. - * - * TODO: - * - make use of free registers in the permute_values code - * - think about a smarter sequence of visiting the blocks. Sorted by - * execfreq might be good, or looptree from inner to outermost loops going - * over blocks in a reverse postorder - * - propagate preferences through Phis - */ -#include "config.h" - -#include -#include -#include - -#include "error.h" -#include "execfreq.h" -#include "ircons.h" -#include "irdom.h" -#include "iredges_t.h" -#include "irgraph_t.h" -#include "irgwalk.h" -#include "irnode_t.h" -#include "irprintf.h" -#include "obst.h" -#include "raw_bitset.h" -#include "unionfind.h" -#include "pdeq.h" -#include "hungarian.h" - -#include "beabi.h" -#include "bechordal_t.h" -#include "be.h" -#include "beirg.h" -#include "belive_t.h" -#include "bemodule.h" -#include "benode.h" -#include "bera.h" -#include "besched.h" -#include "bespill.h" -#include "bespillutil.h" -#include "beverify.h" -#include "beutil.h" - -#define USE_FACTOR 1.0f -#define DEF_FACTOR 1.0f -#define NEIGHBOR_FACTOR 0.2f -#define AFF_SHOULD_BE_SAME 0.5f -#define AFF_PHI 1.0f -#define SPLIT_DELTA 1.0f -#define MAX_OPTIMISTIC_SPLIT_RECURSION 0 - -DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;) - -static struct obstack obst; -static be_irg_t *birg; -static ir_graph *irg; -static const arch_register_class_t *cls; -static const arch_register_req_t *default_cls_req; -static be_lv_t *lv; -static const ir_exec_freq *execfreqs; -static unsigned n_regs; -static unsigned *normal_regs; -static int *congruence_classes; -static ir_node **block_order; -static int n_block_order; -static int create_preferences = true; -static int create_congruence_classes = true; -static int propagate_phi_registers = true; - -static const lc_opt_table_entry_t options[] = { - LC_OPT_ENT_BOOL("prefs", "use preference based coloring", &create_preferences), - LC_OPT_ENT_BOOL("congruences", "create congruence classes", &create_congruence_classes), - LC_OPT_ENT_BOOL("prop_phi", "propagate phi registers", &propagate_phi_registers), - LC_OPT_LAST -}; - -/** currently active assignments (while processing a basic block) - * maps registers to values(their current copies) */ -static ir_node **assignments; - -/** - * allocation information: last_uses, register preferences - * the information is per firm-node. - */ -struct allocation_info_t { - unsigned last_uses; /**< bitset indicating last uses (input pos) */ - ir_node *current_value; /**< copy of the value that should be used */ - ir_node *original_value; /**< for copies point to original value */ - float prefs[0]; /**< register preferences */ -}; -typedef struct allocation_info_t allocation_info_t; - -/** helper datastructure used when sorting register preferences */ -struct reg_pref_t { - unsigned num; - float pref; -}; -typedef struct reg_pref_t reg_pref_t; - -/** per basic-block information */ -struct block_info_t { - bool processed; /**< indicate whether block is processed */ - ir_node *assignments[0]; /**< register assignments at end of block */ -}; -typedef struct block_info_t block_info_t; - -/** - * Get the allocation info for a node. - * The info is allocated on the first visit of a node. - */ -static allocation_info_t *get_allocation_info(ir_node *node) -{ - allocation_info_t *info = get_irn_link(node); - if (info == NULL) { - info = OALLOCFZ(&obst, allocation_info_t, prefs, n_regs); - info->current_value = node; - info->original_value = node; - set_irn_link(node, info); - } - - return info; -} - -static allocation_info_t *try_get_allocation_info(const ir_node *node) -{ - return (allocation_info_t*) get_irn_link(node); -} - -/** - * Get allocation information for a basic block - */ -static block_info_t *get_block_info(ir_node *block) -{ - block_info_t *info = get_irn_link(block); - - assert(is_Block(block)); - if (info == NULL) { - info = OALLOCFZ(&obst, block_info_t, assignments, n_regs); - set_irn_link(block, info); - } - - return info; -} - -/** - * Get default register requirement for the current register class - */ -static const arch_register_req_t *get_default_req_current_cls(void) -{ - if (default_cls_req == NULL) { - struct obstack *obst = get_irg_obstack(irg); - arch_register_req_t *req = OALLOCZ(obst, arch_register_req_t); - - req->type = arch_register_req_type_normal; - req->cls = cls; - - default_cls_req = req; - } - return default_cls_req; -} - -/** - * Link the allocation info of a node to a copy. - * Afterwards, both nodes uses the same allocation info. - * Copy must not have an allocation info assigned yet. - * - * @param copy the node that gets the allocation info assigned - * @param value the original node - */ -static void mark_as_copy_of(ir_node *copy, ir_node *value) -{ - ir_node *original; - allocation_info_t *info = get_allocation_info(value); - allocation_info_t *copy_info = get_allocation_info(copy); - - /* find original value */ - original = info->original_value; - if (original != value) { - info = get_allocation_info(original); - } - - assert(info->original_value == original); - info->current_value = copy; - - /* the copy should not be linked to something else yet */ - assert(copy_info->original_value == copy); - copy_info->original_value = original; - - /* copy over allocation preferences */ - memcpy(copy_info->prefs, info->prefs, n_regs * sizeof(copy_info->prefs[0])); -} - -/** - * Calculate the penalties for every register on a node and its live neighbors. - * - * @param live_nodes the set of live nodes at the current position, may be NULL - * @param penalty the penalty to subtract from - * @param limited a raw bitset containing the limited set for the node - * @param node the node - */ -static void give_penalties_for_limits(const ir_nodeset_t *live_nodes, - float penalty, const unsigned* limited, - ir_node *node) -{ - ir_nodeset_iterator_t iter; - unsigned r; - unsigned n_allowed; - allocation_info_t *info = get_allocation_info(node); - ir_node *neighbor; - - /* give penalty for all forbidden regs */ - for (r = 0; r < n_regs; ++r) { - if (rbitset_is_set(limited, r)) - continue; - - info->prefs[r] -= penalty; - } - - /* all other live values should get a penalty for allowed regs */ - if (live_nodes == NULL) - return; - - penalty *= NEIGHBOR_FACTOR; - n_allowed = rbitset_popcnt(limited, n_regs); - if (n_allowed > 1) { - /* only create a very weak penalty if multiple regs are allowed */ - penalty = (penalty * 0.8f) / n_allowed; - } - foreach_ir_nodeset(live_nodes, neighbor, iter) { - allocation_info_t *neighbor_info; - - /* TODO: if op is used on multiple inputs we might not do a - * continue here */ - if (neighbor == node) - continue; - - neighbor_info = get_allocation_info(neighbor); - for (r = 0; r < n_regs; ++r) { - if (!rbitset_is_set(limited, r)) - continue; - - neighbor_info->prefs[r] -= penalty; - } - } -} - -/** - * Calculate the preferences of a definition for the current register class. - * If the definition uses a limited set of registers, reduce the preferences - * for the limited register on the node and its neighbors. - * - * @param live_nodes the set of live nodes at the current node - * @param weight the weight - * @param node the current node - */ -static void check_defs(const ir_nodeset_t *live_nodes, float weight, - ir_node *node) -{ - const arch_register_req_t *req; - - if (get_irn_mode(node) == mode_T) { - const ir_edge_t *edge; - foreach_out_edge(node, edge) { - ir_node *proj = get_edge_src_irn(edge); - check_defs(live_nodes, weight, proj); - } - return; - } - - if (!arch_irn_consider_in_reg_alloc(cls, node)) - return; - - req = arch_get_register_req_out(node); - if (req->type & arch_register_req_type_limited) { - const unsigned *limited = req->limited; - float penalty = weight * DEF_FACTOR; - give_penalties_for_limits(live_nodes, penalty, limited, node); - } - - if (req->type & arch_register_req_type_should_be_same) { - ir_node *insn = skip_Proj(node); - allocation_info_t *info = get_allocation_info(node); - int arity = get_irn_arity(insn); - int i; - - float factor = 1.0f / rbitset_popcnt(&req->other_same, arity); - for (i = 0; i < arity; ++i) { - ir_node *op; - unsigned r; - allocation_info_t *op_info; - - if (!rbitset_is_set(&req->other_same, i)) - continue; - - op = get_irn_n(insn, i); - - /* if we the value at the should_be_same input doesn't die at the - * node, then it is no use to propagate the constraints (since a - * copy will emerge anyway) */ - if (ir_nodeset_contains(live_nodes, op)) - continue; - - op_info = get_allocation_info(op); - for (r = 0; r < n_regs; ++r) { - op_info->prefs[r] += info->prefs[r] * factor; - } - } - } -} - -/** - * Walker: Runs an a block calculates the preferences for any - * node and every register from the considered register class. - */ -static void analyze_block(ir_node *block, void *data) -{ - float weight = get_block_execfreq(execfreqs, block); - ir_nodeset_t live_nodes; - ir_node *node; - (void) data; - - ir_nodeset_init(&live_nodes); - be_liveness_end_of_block(lv, cls, block, &live_nodes); - - sched_foreach_reverse(block, node) { - allocation_info_t *info; - int i; - int arity; - - if (is_Phi(node)) - break; - - if (create_preferences) - check_defs(&live_nodes, weight, node); - - /* mark last uses */ - arity = get_irn_arity(node); - - /* the allocation info node currently only uses 1 unsigned value - to mark last used inputs. So we will fail for a node with more than - 32 inputs. */ - if (arity >= (int) sizeof(unsigned) * 8) { - panic("Node with more than %d inputs not supported yet", - (int) sizeof(unsigned) * 8); - } - - info = get_allocation_info(node); - for (i = 0; i < arity; ++i) { - ir_node *op = get_irn_n(node, i); - if (!arch_irn_consider_in_reg_alloc(cls, op)) - continue; - - /* last usage of a value? */ - if (!ir_nodeset_contains(&live_nodes, op)) { - rbitset_set(&info->last_uses, i); - } - } - - be_liveness_transfer(cls, node, &live_nodes); - - if (create_preferences) { - /* update weights based on usage constraints */ - for (i = 0; i < arity; ++i) { - const arch_register_req_t *req; - const unsigned *limited; - ir_node *op = get_irn_n(node, i); - - if (!arch_irn_consider_in_reg_alloc(cls, op)) - continue; - - req = arch_get_register_req(node, i); - if (!(req->type & arch_register_req_type_limited)) - continue; - - limited = req->limited; - give_penalties_for_limits(&live_nodes, weight * USE_FACTOR, limited, - op); - } - } - } - - ir_nodeset_destroy(&live_nodes); -} - -static void congruence_def(ir_nodeset_t *live_nodes, ir_node *node) -{ - const arch_register_req_t *req; - - if (get_irn_mode(node) == mode_T) { - const ir_edge_t *edge; - foreach_out_edge(node, edge) { - ir_node *def = get_edge_src_irn(edge); - congruence_def(live_nodes, def); - } - return; - } - - if (!arch_irn_consider_in_reg_alloc(cls, node)) - return; - - /* should be same constraint? */ - req = arch_get_register_req_out(node); - if (req->type & arch_register_req_type_should_be_same) { - ir_node *insn = skip_Proj(node); - int arity = get_irn_arity(insn); - int i; - unsigned node_idx = get_irn_idx(node); - node_idx = uf_find(congruence_classes, node_idx); - - for (i = 0; i < arity; ++i) { - ir_node *live; - ir_node *op; - int op_idx; - ir_nodeset_iterator_t iter; - bool interferes = false; - - if (!rbitset_is_set(&req->other_same, i)) - continue; - - op = get_irn_n(insn, i); - op_idx = get_irn_idx(op); - op_idx = uf_find(congruence_classes, op_idx); - - /* do we interfere with the value */ - foreach_ir_nodeset(live_nodes, live, iter) { - int lv_idx = get_irn_idx(live); - lv_idx = uf_find(congruence_classes, lv_idx); - if (lv_idx == op_idx) { - interferes = true; - break; - } - } - /* don't put in same affinity class if we interfere */ - if (interferes) - continue; - - node_idx = uf_union(congruence_classes, node_idx, op_idx); - DB((dbg, LEVEL_3, "Merge %+F and %+F congruence classes\n", - node, op)); - /* one should_be_same is enough... */ - break; - } - } -} - -static void create_congruence_class(ir_node *block, void *data) -{ - ir_nodeset_t live_nodes; - ir_node *node; - - (void) data; - ir_nodeset_init(&live_nodes); - be_liveness_end_of_block(lv, cls, block, &live_nodes); - - /* check should be same constraints */ - sched_foreach_reverse(block, node) { - if (is_Phi(node)) - break; - - congruence_def(&live_nodes, node); - be_liveness_transfer(cls, node, &live_nodes); - } - - /* check phi congruence classes */ - sched_foreach_reverse_from(node, node) { - int i; - int arity; - int node_idx; - assert(is_Phi(node)); - - if (!arch_irn_consider_in_reg_alloc(cls, node)) - continue; - - node_idx = get_irn_idx(node); - node_idx = uf_find(congruence_classes, node_idx); - - arity = get_irn_arity(node); - for (i = 0; i < arity; ++i) { - bool interferes = false; - ir_nodeset_iterator_t iter; - unsigned r; - int old_node_idx; - ir_node *live; - ir_node *phi; - allocation_info_t *head_info; - allocation_info_t *other_info; - ir_node *op = get_Phi_pred(node, i); - int op_idx = get_irn_idx(op); - op_idx = uf_find(congruence_classes, op_idx); - - /* do we interfere with the value */ - foreach_ir_nodeset(&live_nodes, live, iter) { - int lv_idx = get_irn_idx(live); - lv_idx = uf_find(congruence_classes, lv_idx); - if (lv_idx == op_idx) { - interferes = true; - break; - } - } - /* don't put in same affinity class if we interfere */ - if (interferes) - continue; - /* any other phi has the same input? */ - sched_foreach(block, phi) { - ir_node *oop; - int oop_idx; - if (!is_Phi(phi)) - break; - if (!arch_irn_consider_in_reg_alloc(cls, phi)) - continue; - oop = get_Phi_pred(phi, i); - if (oop == op) - continue; - oop_idx = get_irn_idx(oop); - oop_idx = uf_find(congruence_classes, oop_idx); - if (oop_idx == op_idx) { - interferes = true; - break; - } - } - if (interferes) - continue; - - /* merge the 2 congruence classes and sum up their preferences */ - old_node_idx = node_idx; - node_idx = uf_union(congruence_classes, node_idx, op_idx); - DB((dbg, LEVEL_3, "Merge %+F and %+F congruence classes\n", - node, op)); - - old_node_idx = node_idx == old_node_idx ? op_idx : old_node_idx; - head_info = get_allocation_info(get_idx_irn(irg, node_idx)); - other_info = get_allocation_info(get_idx_irn(irg, old_node_idx)); - for (r = 0; r < n_regs; ++r) { - head_info->prefs[r] += other_info->prefs[r]; - } - } - } -} - -static void set_congruence_prefs(ir_node *node, void *data) -{ - allocation_info_t *info; - allocation_info_t *head_info; - unsigned node_idx = get_irn_idx(node); - unsigned node_set = uf_find(congruence_classes, node_idx); - - (void) data; - - /* head of congruence class or not in any class */ - if (node_set == node_idx) - return; - - if (!arch_irn_consider_in_reg_alloc(cls, node)) - return; - - head_info = get_allocation_info(get_idx_irn(irg, node_set)); - info = get_allocation_info(node); - - memcpy(info->prefs, head_info->prefs, n_regs * sizeof(info->prefs[0])); -} - -static void combine_congruence_classes(void) -{ - size_t n = get_irg_last_idx(irg); - congruence_classes = XMALLOCN(int, n); - uf_init(congruence_classes, n); - - /* create congruence classes */ - irg_block_walk_graph(irg, create_congruence_class, NULL, NULL); - /* merge preferences */ - irg_walk_graph(irg, set_congruence_prefs, NULL, NULL); - free(congruence_classes); -} - - - - - -/** - * Assign register reg to the given node. - * - * @param node the node - * @param reg the register - */ -static void use_reg(ir_node *node, const arch_register_t *reg) -{ - unsigned r = arch_register_get_index(reg); - assignments[r] = node; - arch_set_irn_register(node, reg); -} - -static void free_reg_of_value(ir_node *node) -{ - const arch_register_t *reg; - unsigned r; - - if (!arch_irn_consider_in_reg_alloc(cls, node)) - return; - - reg = arch_get_irn_register(node); - r = arch_register_get_index(reg); - /* assignment->value may be NULL if a value is used at 2 inputs - so it gets freed twice. */ - assert(assignments[r] == node || assignments[r] == NULL); - assignments[r] = NULL; -} - -/** - * Compare two register preferences in decreasing order. - */ -static int compare_reg_pref(const void *e1, const void *e2) -{ - const reg_pref_t *rp1 = (const reg_pref_t*) e1; - const reg_pref_t *rp2 = (const reg_pref_t*) e2; - if (rp1->pref < rp2->pref) - return 1; - if (rp1->pref > rp2->pref) - return -1; - return 0; -} - -static void fill_sort_candidates(reg_pref_t *regprefs, - const allocation_info_t *info) -{ - unsigned r; - - for (r = 0; r < n_regs; ++r) { - float pref = info->prefs[r]; - regprefs[r].num = r; - regprefs[r].pref = pref; - } - /* TODO: use a stable sort here to avoid unnecessary register jumping */ - qsort(regprefs, n_regs, sizeof(regprefs[0]), compare_reg_pref); -} - -static bool try_optimistic_split(ir_node *to_split, ir_node *before, - float pref, float pref_delta, - unsigned *forbidden_regs, int recursion) -{ - const arch_register_t *from_reg; - const arch_register_t *reg; - ir_node *original_insn; - ir_node *block; - ir_node *copy; - unsigned r; - unsigned from_r; - unsigned i; - allocation_info_t *info = get_allocation_info(to_split); - reg_pref_t *prefs; - float delta; - float split_threshold; - - (void) pref; - - /* stupid hack: don't optimisticallt split don't spill nodes... - * (so we don't split away the values produced because of - * must_be_different constraints) */ - original_insn = skip_Proj(info->original_value); - if (arch_irn_get_flags(original_insn) & arch_irn_flags_dont_spill) - return false; - - from_reg = arch_get_irn_register(to_split); - from_r = arch_register_get_index(from_reg); - block = get_nodes_block(before); - split_threshold = get_block_execfreq(execfreqs, block) * SPLIT_DELTA; - - if (pref_delta < split_threshold*0.5) - return false; - - /* find the best free position where we could move to */ - prefs = ALLOCAN(reg_pref_t, n_regs); - fill_sort_candidates(prefs, info); - for (i = 0; i < n_regs; ++i) { - float apref; - float apref_delta; - bool res; - bool old_source_state; - - /* we need a normal register which is not an output register - an different from the current register of to_split */ - r = prefs[i].num; - if (!rbitset_is_set(normal_regs, r)) - continue; - if (rbitset_is_set(forbidden_regs, r)) - continue; - if (r == from_r) - continue; - - /* is the split worth it? */ - delta = pref_delta + prefs[i].pref; - if (delta < split_threshold) { - DB((dbg, LEVEL_3, "Not doing optimistical split of %+F (depth %d), win %f too low\n", - to_split, recursion, delta)); - return false; - } - - /* if the register is free then we can do the split */ - if (assignments[r] == NULL) - break; - - /* otherwise we might try recursively calling optimistic_split */ - if (recursion+1 > MAX_OPTIMISTIC_SPLIT_RECURSION) - continue; - - apref = prefs[i].pref; - apref_delta = i+1 < n_regs ? apref - prefs[i+1].pref : 0; - apref_delta += pref_delta - split_threshold; - - /* our source register isn't a usefull destination for recursive - splits */ - old_source_state = rbitset_is_set(forbidden_regs, from_r); - rbitset_set(forbidden_regs, from_r); - /* try recursive split */ - res = try_optimistic_split(assignments[r], before, apref, - apref_delta, forbidden_regs, recursion+1); - /* restore our destination */ - if (old_source_state) { - rbitset_set(forbidden_regs, from_r); - } else { - rbitset_clear(forbidden_regs, from_r); - } - - if (res) - break; - } - if (i >= n_regs) - return false; - - reg = arch_register_for_index(cls, r); - copy = be_new_Copy(cls, block, to_split); - mark_as_copy_of(copy, to_split); - /* hacky, but correct here */ - if (assignments[arch_register_get_index(from_reg)] == to_split) - free_reg_of_value(to_split); - use_reg(copy, reg); - sched_add_before(before, copy); - - DB((dbg, LEVEL_3, - "Optimistic live-range split %+F move %+F(%s) -> %s before %+F (win %f, depth %d)\n", - copy, to_split, from_reg->name, reg->name, before, delta, recursion)); - return true; -} - -/** - * Determine and assign a register for node @p node - */ -static void assign_reg(const ir_node *block, ir_node *node, - unsigned *forbidden_regs) -{ - const arch_register_t *reg; - allocation_info_t *info; - const arch_register_req_t *req; - reg_pref_t *reg_prefs; - ir_node *in_node; - unsigned i; - const unsigned *allowed_regs; - unsigned r; - - assert(!is_Phi(node)); - assert(arch_irn_consider_in_reg_alloc(cls, node)); - - /* preassigned register? */ - reg = arch_get_irn_register(node); - if (reg != NULL) { - DB((dbg, LEVEL_2, "Preassignment %+F -> %s\n", node, reg->name)); - use_reg(node, reg); - return; - } - - /* give should_be_same boni */ - info = get_allocation_info(node); - req = arch_get_register_req_out(node); - - in_node = skip_Proj(node); - if (req->type & arch_register_req_type_should_be_same) { - float weight = get_block_execfreq(execfreqs, block); - int arity = get_irn_arity(in_node); - int i; - - assert(arity <= (int) sizeof(req->other_same) * 8); - for (i = 0; i < arity; ++i) { - ir_node *in; - const arch_register_t *reg; - unsigned r; - if (!rbitset_is_set(&req->other_same, i)) - continue; - - in = get_irn_n(in_node, i); - reg = arch_get_irn_register(in); - assert(reg != NULL); - r = arch_register_get_index(reg); - - /* if the value didn't die here then we should not propagate the - * should_be_same info */ - if (assignments[r] == in) - continue; - - info->prefs[r] += weight * AFF_SHOULD_BE_SAME; - } - } - - /* create list of register candidates and sort by their preference */ - DB((dbg, LEVEL_2, "Candidates for %+F:", node)); - reg_prefs = alloca(n_regs * sizeof(reg_prefs[0])); - fill_sort_candidates(reg_prefs, info); - for (i = 0; i < n_regs; ++i) { - unsigned num = reg_prefs[i].num; - const arch_register_t *reg; - - if (!rbitset_is_set(normal_regs, num)) - continue; - - reg = arch_register_for_index(cls, num); - DB((dbg, LEVEL_2, " %s(%f)", reg->name, reg_prefs[i].pref)); - } - DB((dbg, LEVEL_2, "\n")); - - allowed_regs = normal_regs; - if (req->type & arch_register_req_type_limited) { - allowed_regs = req->limited; - } - - for (i = 0; i < n_regs; ++i) { - float pref, delta; - ir_node *before; - bool res; - - r = reg_prefs[i].num; - if (!rbitset_is_set(allowed_regs, r)) - continue; - if (assignments[r] == NULL) - break; - pref = reg_prefs[i].pref; - delta = i+1 < n_regs ? pref - reg_prefs[i+1].pref : 0; - before = skip_Proj(node); - res = try_optimistic_split(assignments[r], before, - pref, delta, forbidden_regs, 0); - if (res) - break; - } - if (i >= n_regs) { - panic("No register left for %+F\n", node); - } - - reg = arch_register_for_index(cls, r); - DB((dbg, LEVEL_2, "Assign %+F -> %s\n", node, reg->name)); - use_reg(node, reg); -} - -/** - * Add an permutation in front of a node and change the assignments - * due to this permutation. - * - * To understand this imagine a permutation like this: - * - * 1 -> 2 - * 2 -> 3 - * 3 -> 1, 5 - * 4 -> 6 - * 5 - * 6 - * 7 -> 7 - * - * First we count how many destinations a single value has. At the same time - * we can be sure that each destination register has at most 1 source register - * (it can have 0 which means we don't care what value is in it). - * We ignore all fullfilled permuations (like 7->7) - * In a first pass we create as much copy instructions as possible as they - * are generally cheaper than exchanges. We do this by counting into how many - * destinations a register has to be copied (in the example it's 2 for register - * 3, or 1 for the registers 1,2,4 and 7). - * We can then create a copy into every destination register when the usecount - * of that register is 0 (= noone else needs the value in the register). - * - * After this step we should have cycles left. We implement a cyclic permutation - * of n registers with n-1 transpositions. - * - * @param live_nodes the set of live nodes, updated due to live range split - * @param before the node before we add the permutation - * @param permutation the permutation array indices are the destination - * registers, the values in the array are the source - * registers. - */ -static void permute_values(ir_nodeset_t *live_nodes, ir_node *before, - unsigned *permutation) -{ - unsigned *n_used = ALLOCANZ(unsigned, n_regs); - ir_node *block; - unsigned r; - - /* determine how often each source register needs to be read */ - for (r = 0; r < n_regs; ++r) { - unsigned old_reg = permutation[r]; - ir_node *value; - - value = assignments[old_reg]; - if (value == NULL) { - /* nothing to do here, reg is not live. Mark it as fixpoint - * so we ignore it in the next steps */ - permutation[r] = r; - continue; - } - - ++n_used[old_reg]; - } - - block = get_nodes_block(before); - - /* step1: create copies where immediately possible */ - for (r = 0; r < n_regs; /* empty */) { - ir_node *copy; - ir_node *src; - const arch_register_t *reg; - unsigned old_r = permutation[r]; - - /* - no need to do anything for fixed points. - - we can't copy if the value in the dest reg is still needed */ - if (old_r == r || n_used[r] > 0) { - ++r; - continue; - } - - /* create a copy */ - src = assignments[old_r]; - copy = be_new_Copy(cls, block, src); - sched_add_before(before, copy); - reg = arch_register_for_index(cls, r); - DB((dbg, LEVEL_2, "Copy %+F (from %+F, before %+F) -> %s\n", - copy, src, before, reg->name)); - mark_as_copy_of(copy, src); - use_reg(copy, reg); - - if (live_nodes != NULL) { - ir_nodeset_insert(live_nodes, copy); - } - - /* old register has 1 user less, permutation is resolved */ - assert(arch_register_get_index(arch_get_irn_register(src)) == old_r); - permutation[r] = r; - - assert(n_used[old_r] > 0); - --n_used[old_r]; - if (n_used[old_r] == 0) { - if (live_nodes != NULL) { - ir_nodeset_remove(live_nodes, src); - } - free_reg_of_value(src); - } - - /* advance or jump back (if this copy enabled another copy) */ - if (old_r < r && n_used[old_r] == 0) { - r = old_r; - } else { - ++r; - } - } - - /* at this point we only have "cycles" left which we have to resolve with - * perm instructions - * TODO: if we have free registers left, then we should really use copy - * instructions for any cycle longer than 2 registers... - * (this is probably architecture dependent, there might be archs where - * copies are preferable even for 2-cycles) */ - - /* create perms with the rest */ - for (r = 0; r < n_regs; /* empty */) { - const arch_register_t *reg; - unsigned old_r = permutation[r]; - unsigned r2; - ir_node *in[2]; - ir_node *perm; - ir_node *proj0; - ir_node *proj1; - - if (old_r == r) { - ++r; - continue; - } - - /* we shouldn't have copies from 1 value to multiple destinations left*/ - assert(n_used[old_r] == 1); - - /* exchange old_r and r2; after that old_r is a fixed point */ - r2 = permutation[old_r]; - - in[0] = assignments[r2]; - in[1] = assignments[old_r]; - perm = be_new_Perm(cls, block, 2, in); - sched_add_before(before, perm); - DB((dbg, LEVEL_2, "Perm %+F (perm %+F,%+F, before %+F)\n", - perm, in[0], in[1], before)); - - proj0 = new_r_Proj(block, perm, get_irn_mode(in[0]), 0); - mark_as_copy_of(proj0, in[0]); - reg = arch_register_for_index(cls, old_r); - use_reg(proj0, reg); - - proj1 = new_r_Proj(block, perm, get_irn_mode(in[1]), 1); - mark_as_copy_of(proj1, in[1]); - reg = arch_register_for_index(cls, r2); - use_reg(proj1, reg); - - /* 1 value is now in the correct register */ - permutation[old_r] = old_r; - /* the source of r changed to r2 */ - permutation[r] = r2; - - /* if we have reached a fixpoint update data structures */ - if (live_nodes != NULL) { - ir_nodeset_remove(live_nodes, in[0]); - ir_nodeset_remove(live_nodes, in[1]); - ir_nodeset_remove(live_nodes, proj0); - ir_nodeset_insert(live_nodes, proj1); - } - } - -#ifdef DEBUG_libfirm - /* now we should only have fixpoints left */ - for (r = 0; r < n_regs; ++r) { - assert(permutation[r] == r); - } -#endif -} - -/** - * Free regs for values last used. - * - * @param live_nodes set of live nodes, will be updated - * @param node the node to consider - */ -static void free_last_uses(ir_nodeset_t *live_nodes, ir_node *node) -{ - allocation_info_t *info = get_allocation_info(node); - const unsigned *last_uses = &info->last_uses; - int arity = get_irn_arity(node); - int i; - - for (i = 0; i < arity; ++i) { - ir_node *op; - - /* check if one operand is the last use */ - if (!rbitset_is_set(last_uses, i)) - continue; - - op = get_irn_n(node, i); - free_reg_of_value(op); - ir_nodeset_remove(live_nodes, op); - } -} - -/** - * change inputs of a node to the current value (copies/perms) - */ -static void rewire_inputs(ir_node *node) -{ - int i; - int arity = get_irn_arity(node); - - for (i = 0; i < arity; ++i) { - ir_node *op = get_irn_n(node, i); - allocation_info_t *info = try_get_allocation_info(op); - - if (info == NULL) - continue; - - info = get_allocation_info(info->original_value); - if (info->current_value != op) { - set_irn_n(node, i, info->current_value); - } - } -} - -/** - * Create a bitset of registers occupied with value living through an - * instruction - */ -static void determine_live_through_regs(unsigned *bitset, ir_node *node) -{ - const allocation_info_t *info = get_allocation_info(node); - unsigned r; - int i; - int arity; - - /* mark all used registers as potentially live-through */ - for (r = 0; r < n_regs; ++r) { - if (assignments[r] == NULL) - continue; - if (!rbitset_is_set(normal_regs, r)) - continue; - - rbitset_set(bitset, r); - } - - /* remove registers of value dying at the instruction */ - arity = get_irn_arity(node); - for (i = 0; i < arity; ++i) { - ir_node *op; - const arch_register_t *reg; - - if (!rbitset_is_set(&info->last_uses, i)) - continue; - - op = get_irn_n(node, i); - reg = arch_get_irn_register(op); - rbitset_clear(bitset, arch_register_get_index(reg)); - } -} - -/** - * Enforce constraints at a node by live range splits. - * - * @param live_nodes the set of live nodes, might be changed - * @param node the current node - */ -static void enforce_constraints(ir_nodeset_t *live_nodes, ir_node *node, - unsigned *forbidden_regs) -{ - int arity = get_irn_arity(node); - int i, res; - hungarian_problem_t *bp; - unsigned l, r; - unsigned *assignment; - - /* construct a list of register occupied by live-through values */ - unsigned *live_through_regs = NULL; - - /* see if any use constraints are not met */ - bool good = true; - for (i = 0; i < arity; ++i) { - ir_node *op = get_irn_n(node, i); - const arch_register_t *reg; - const arch_register_req_t *req; - const unsigned *limited; - unsigned r; - - if (!arch_irn_consider_in_reg_alloc(cls, op)) - continue; - - /* are there any limitations for the i'th operand? */ - req = arch_get_register_req(node, i); - if (!(req->type & arch_register_req_type_limited)) - continue; - - limited = req->limited; - reg = arch_get_irn_register(op); - r = arch_register_get_index(reg); - if (!rbitset_is_set(limited, r)) { - /* found an assignment outside the limited set */ - good = false; - break; - } - } - - /* is any of the live-throughs using a constrained output register? */ - if (get_irn_mode(node) == mode_T) { - const ir_edge_t *edge; - - foreach_out_edge(node, edge) { - ir_node *proj = get_edge_src_irn(edge); - const arch_register_req_t *req; - - if (!arch_irn_consider_in_reg_alloc(cls, proj)) - continue; - - req = arch_get_register_req_out(proj); - if (!(req->type & arch_register_req_type_limited)) - continue; - - if (live_through_regs == NULL) { - rbitset_alloca(live_through_regs, n_regs); - determine_live_through_regs(live_through_regs, node); - } - - rbitset_or(forbidden_regs, req->limited, n_regs); - if (rbitsets_have_common(req->limited, live_through_regs, n_regs)) { - good = false; - } - } - } else { - if (arch_irn_consider_in_reg_alloc(cls, node)) { - const arch_register_req_t *req = arch_get_register_req_out(node); - if (req->type & arch_register_req_type_limited) { - rbitset_alloca(live_through_regs, n_regs); - determine_live_through_regs(live_through_regs, node); - if (rbitsets_have_common(req->limited, live_through_regs, n_regs)) { - good = false; - rbitset_or(forbidden_regs, req->limited, n_regs); - } - } - } - } - - if (good) - return; - - /* create these arrays if we haven't yet */ - if (live_through_regs == NULL) { - rbitset_alloca(live_through_regs, n_regs); - } - - /* at this point we have to construct a bipartite matching problem to see - * which values should go to which registers - * Note: We're building the matrix in "reverse" - source registers are - * right, destinations left because this will produce the solution - * in the format required for permute_values. - */ - bp = hungarian_new(n_regs, n_regs, HUNGARIAN_MATCH_PERFECT); - - /* add all combinations, then remove not allowed ones */ - for (l = 0; l < n_regs; ++l) { - if (!rbitset_is_set(normal_regs, l)) { - hungarian_add(bp, l, l, 1); - continue; - } - - for (r = 0; r < n_regs; ++r) { - if (!rbitset_is_set(normal_regs, r)) - continue; - /* livethrough values may not use constrainted output registers */ - if (rbitset_is_set(live_through_regs, l) - && rbitset_is_set(forbidden_regs, r)) - continue; - - hungarian_add(bp, r, l, l == r ? 9 : 8); - } - } - - for (i = 0; i < arity; ++i) { - ir_node *op = get_irn_n(node, i); - const arch_register_t *reg; - const arch_register_req_t *req; - const unsigned *limited; - unsigned current_reg; - - if (!arch_irn_consider_in_reg_alloc(cls, op)) - continue; - - req = arch_get_register_req(node, i); - if (!(req->type & arch_register_req_type_limited)) - continue; - - limited = req->limited; - reg = arch_get_irn_register(op); - current_reg = arch_register_get_index(reg); - for (r = 0; r < n_regs; ++r) { - if (rbitset_is_set(limited, r)) - continue; - hungarian_remv(bp, r, current_reg); - } - } - - //hungarian_print_cost_matrix(bp, 1); - hungarian_prepare_cost_matrix(bp, HUNGARIAN_MODE_MAXIMIZE_UTIL); - - assignment = ALLOCAN(unsigned, n_regs); - res = hungarian_solve(bp, (int*) assignment, NULL, 0); - assert(res == 0); - -#if 0 - fprintf(stderr, "Swap result:"); - for (i = 0; i < (int) n_regs; ++i) { - fprintf(stderr, " %d", assignment[i]); - } - fprintf(stderr, "\n"); -#endif - - hungarian_free(bp); - - permute_values(live_nodes, node, assignment); -} - -/** test wether a node @p n is a copy of the value of node @p of */ -static bool is_copy_of(ir_node *value, ir_node *test_value) -{ - allocation_info_t *test_info; - allocation_info_t *info; - - if (value == test_value) - return true; - - info = get_allocation_info(value); - test_info = get_allocation_info(test_value); - return test_info->original_value == info->original_value; -} - -/** - * find a value in the end-assignment of a basic block - * @returns the index into the assignment array if found - * -1 if not found - */ -static int find_value_in_block_info(block_info_t *info, ir_node *value) -{ - unsigned r; - ir_node **assignments = info->assignments; - for (r = 0; r < n_regs; ++r) { - ir_node *a_value = assignments[r]; - - if (a_value == NULL) - continue; - if (is_copy_of(a_value, value)) - return (int) r; - } - - return -1; -} - -/** - * Create the necessary permutations at the end of a basic block to fullfill - * the register assignment for phi-nodes in the next block - */ -static void add_phi_permutations(ir_node *block, int p) -{ - unsigned r; - unsigned *permutation; - ir_node **old_assignments; - bool need_permutation; - ir_node *node; - ir_node *pred = get_Block_cfgpred_block(block, p); - - block_info_t *pred_info = get_block_info(pred); - - /* predecessor not processed yet? nothing to do */ - if (!pred_info->processed) - return; - - permutation = ALLOCAN(unsigned, n_regs); - for (r = 0; r < n_regs; ++r) { - permutation[r] = r; - } - - /* check phi nodes */ - need_permutation = false; - node = sched_first(block); - for ( ; is_Phi(node); node = sched_next(node)) { - const arch_register_t *reg; - int regn; - int a; - ir_node *op; - - if (!arch_irn_consider_in_reg_alloc(cls, node)) - continue; - - op = get_Phi_pred(node, p); - if (!arch_irn_consider_in_reg_alloc(cls, op)) - continue; - - a = find_value_in_block_info(pred_info, op); - assert(a >= 0); - - reg = arch_get_irn_register(node); - regn = arch_register_get_index(reg); - if (regn != a) { - permutation[regn] = a; - need_permutation = true; - } - } - - if (need_permutation) { - /* permute values at end of predecessor */ - old_assignments = assignments; - assignments = pred_info->assignments; - permute_values(NULL, be_get_end_of_block_insertion_point(pred), - permutation); - assignments = old_assignments; - } - - /* change phi nodes to use the copied values */ - node = sched_first(block); - for ( ; is_Phi(node); node = sched_next(node)) { - int a; - ir_node *op; - - if (!arch_irn_consider_in_reg_alloc(cls, node)) - continue; - - op = get_Phi_pred(node, p); - /* no need to do anything for Unknown inputs */ - if (!arch_irn_consider_in_reg_alloc(cls, op)) - continue; - - /* we have permuted all values into the correct registers so we can - simply query which value occupies the phis register in the - predecessor */ - a = arch_register_get_index(arch_get_irn_register(node)); - op = pred_info->assignments[a]; - set_Phi_pred(node, p, op); - } -} - -/** - * Set preferences for a phis register based on the registers used on the - * phi inputs. - */ -static void adapt_phi_prefs(ir_node *phi) -{ - int i; - int arity = get_irn_arity(phi); - ir_node *block = get_nodes_block(phi); - allocation_info_t *info = get_allocation_info(phi); - - for (i = 0; i < arity; ++i) { - ir_node *op = get_irn_n(phi, i); - const arch_register_t *reg = arch_get_irn_register(op); - ir_node *pred_block; - block_info_t *pred_block_info; - float weight; - unsigned r; - - if (reg == NULL) - continue; - /* we only give the bonus if the predecessor already has registers - * assigned, otherwise we only see a dummy value - * and any conclusions about its register are useless */ - pred_block = get_Block_cfgpred_block(block, i); - pred_block_info = get_block_info(pred_block); - if (!pred_block_info->processed) - continue; - - /* give bonus for already assigned register */ - weight = get_block_execfreq(execfreqs, pred_block); - r = arch_register_get_index(reg); - info->prefs[r] += weight * AFF_PHI; - } -} - -/** - * After a phi has been assigned a register propagate preference inputs - * to the phi inputs. - */ -static void propagate_phi_register(ir_node *phi, unsigned assigned_r) -{ - int i; - ir_node *block = get_nodes_block(phi); - int arity = get_irn_arity(phi); - - for (i = 0; i < arity; ++i) { - ir_node *op = get_Phi_pred(phi, i); - allocation_info_t *info = get_allocation_info(op); - ir_node *pred_block = get_Block_cfgpred_block(block, i); - unsigned r; - float weight - = get_block_execfreq(execfreqs, pred_block) * AFF_PHI; - - if (info->prefs[assigned_r] >= weight) - continue; - - /* promote the prefered register */ - for (r = 0; r < n_regs; ++r) { - if (info->prefs[r] > -weight) { - info->prefs[r] = -weight; - } - } - info->prefs[assigned_r] = weight; - - if (is_Phi(op)) - propagate_phi_register(op, assigned_r); - } -} - -static void assign_phi_registers(ir_node *block) -{ - int n_phis = 0; - int n; - int res; - int *assignment; - ir_node *node; - hungarian_problem_t *bp; - - /* count phi nodes */ - sched_foreach(block, node) { - if (!is_Phi(node)) - break; - if (!arch_irn_consider_in_reg_alloc(cls, node)) - continue; - ++n_phis; - } - - if (n_phis == 0) - return; - - /* build a bipartite matching problem for all phi nodes */ - bp = hungarian_new(n_phis, n_regs, HUNGARIAN_MATCH_PERFECT); - n = 0; - sched_foreach(block, node) { - unsigned r; - - allocation_info_t *info; - if (!is_Phi(node)) - break; - if (!arch_irn_consider_in_reg_alloc(cls, node)) - continue; - - /* give boni for predecessor colorings */ - adapt_phi_prefs(node); - /* add stuff to bipartite problem */ - info = get_allocation_info(node); - DB((dbg, LEVEL_3, "Prefs for %+F: ", node)); - for (r = 0; r < n_regs; ++r) { - float costs; - - if (!rbitset_is_set(normal_regs, r)) - continue; - - costs = info->prefs[r]; - costs = costs < 0 ? -logf(-costs+1) : logf(costs+1); - costs *= 100; - costs += 10000; - hungarian_add(bp, n, r, costs); - DB((dbg, LEVEL_3, " %s(%f)", arch_register_for_index(cls, r)->name, - info->prefs[r])); - } - DB((dbg, LEVEL_3, "\n")); - ++n; - } - - //hungarian_print_cost_matrix(bp, 7); - hungarian_prepare_cost_matrix(bp, HUNGARIAN_MODE_MAXIMIZE_UTIL); - - assignment = ALLOCAN(int, n_regs); - res = hungarian_solve(bp, assignment, NULL, 0); - assert(res == 0); - - /* apply results */ - n = 0; - sched_foreach(block, node) { - unsigned r; - const arch_register_t *reg; - - if (!is_Phi(node)) - break; - if (!arch_irn_consider_in_reg_alloc(cls, node)) - continue; - - r = assignment[n++]; - assert(rbitset_is_set(normal_regs, r)); - reg = arch_register_for_index(cls, r); - DB((dbg, LEVEL_2, "Assign %+F -> %s\n", node, reg->name)); - use_reg(node, reg); - - /* adapt preferences for phi inputs */ - if (propagate_phi_registers) - propagate_phi_register(node, r); - } -} - -/** - * Walker: assign registers to all nodes of a block that - * need registers from the currently considered register class. - */ -static void allocate_coalesce_block(ir_node *block, void *data) -{ - int i; - ir_nodeset_t live_nodes; - ir_node *node; - int n_preds; - block_info_t *block_info; - block_info_t **pred_block_infos; - ir_node **phi_ins; - unsigned *forbidden_regs; /**< collects registers which must - not be used for optimistic splits */ - - (void) data; - DB((dbg, LEVEL_2, "* Block %+F\n", block)); - - /* clear assignments */ - block_info = get_block_info(block); - assignments = block_info->assignments; - - ir_nodeset_init(&live_nodes); - - /* gather regalloc infos of predecessor blocks */ - n_preds = get_Block_n_cfgpreds(block); - pred_block_infos = ALLOCAN(block_info_t*, n_preds); - for (i = 0; i < n_preds; ++i) { - ir_node *pred = get_Block_cfgpred_block(block, i); - block_info_t *pred_info = get_block_info(pred); - pred_block_infos[i] = pred_info; - } - - phi_ins = ALLOCAN(ir_node*, n_preds); - - /* collect live-in nodes and preassigned values */ - be_lv_foreach(lv, block, be_lv_state_in, i) { - const arch_register_t *reg; - int p; - bool need_phi = false; - - node = be_lv_get_irn(lv, block, i); - if (!arch_irn_consider_in_reg_alloc(cls, node)) - continue; - - /* check all predecessors for this value, if it is not everywhere the - same or unknown then we have to construct a phi - (we collect the potential phi inputs here) */ - for (p = 0; p < n_preds; ++p) { - block_info_t *pred_info = pred_block_infos[p]; - - if (!pred_info->processed) { - /* use node for now, it will get fixed later */ - phi_ins[p] = node; - need_phi = true; - } else { - int a = find_value_in_block_info(pred_info, node); - - /* must live out of predecessor */ - assert(a >= 0); - phi_ins[p] = pred_info->assignments[a]; - /* different value from last time? then we need a phi */ - if (p > 0 && phi_ins[p-1] != phi_ins[p]) { - need_phi = true; - } - } - } - - if (need_phi) { - ir_mode *mode = get_irn_mode(node); - const arch_register_req_t *req = get_default_req_current_cls(); - ir_node *phi; - - phi = new_r_Phi(block, n_preds, phi_ins, mode); - be_set_phi_reg_req(phi, req); - - DB((dbg, LEVEL_3, "Create Phi %+F (for %+F) -", phi, node)); -#ifdef DEBUG_libfirm - { - int i; - for (i = 0; i < n_preds; ++i) { - DB((dbg, LEVEL_3, " %+F", phi_ins[i])); - } - DB((dbg, LEVEL_3, "\n")); - } -#endif - mark_as_copy_of(phi, node); - sched_add_after(block, phi); - - node = phi; - } else { - allocation_info_t *info = get_allocation_info(node); - info->current_value = phi_ins[0]; - - /* Grab 1 of the inputs we constructed (might not be the same as - * "node" as we could see the same copy of the value in all - * predecessors */ - node = phi_ins[0]; - } - - /* if the node already has a register assigned use it */ - reg = arch_get_irn_register(node); - if (reg != NULL) { - use_reg(node, reg); - } - - /* remember that this node is live at the beginning of the block */ - ir_nodeset_insert(&live_nodes, node); - } - - rbitset_alloca(forbidden_regs, n_regs); - - /* handle phis... */ - assign_phi_registers(block); - - /* all live-ins must have a register */ -#ifdef DEBUG_libfirm - { - ir_nodeset_iterator_t iter; - foreach_ir_nodeset(&live_nodes, node, iter) { - const arch_register_t *reg = arch_get_irn_register(node); - assert(reg != NULL); - } - } -#endif - - /* assign instructions in the block */ - sched_foreach(block, node) { - int i; - int arity; - - /* phis are already assigned */ - if (is_Phi(node)) - continue; - - rewire_inputs(node); - - /* enforce use constraints */ - rbitset_clear_all(forbidden_regs, n_regs); - enforce_constraints(&live_nodes, node, forbidden_regs); - - rewire_inputs(node); - - /* we may not use registers used for inputs for optimistic splits */ - arity = get_irn_arity(node); - for (i = 0; i < arity; ++i) { - ir_node *op = get_irn_n(node, i); - const arch_register_t *reg; - if (!arch_irn_consider_in_reg_alloc(cls, op)) - continue; - - reg = arch_get_irn_register(op); - rbitset_set(forbidden_regs, arch_register_get_index(reg)); - } - - /* free registers of values last used at this instruction */ - free_last_uses(&live_nodes, node); - - /* assign output registers */ - /* TODO: 2 phases: first: pre-assigned ones, 2nd real regs */ - if (get_irn_mode(node) == mode_T) { - const ir_edge_t *edge; - foreach_out_edge(node, edge) { - ir_node *proj = get_edge_src_irn(edge); - if (!arch_irn_consider_in_reg_alloc(cls, proj)) - continue; - assign_reg(block, proj, forbidden_regs); - } - } else if (arch_irn_consider_in_reg_alloc(cls, node)) { - assign_reg(block, node, forbidden_regs); - } - } - - ir_nodeset_destroy(&live_nodes); - assignments = NULL; - - block_info->processed = true; - - /* permute values at end of predecessor blocks in case of phi-nodes */ - if (n_preds > 1) { - int p; - for (p = 0; p < n_preds; ++p) { - add_phi_permutations(block, p); - } - } - - /* if we have exactly 1 successor then we might be able to produce phi - copies now */ - if (get_irn_n_edges_kind(block, EDGE_KIND_BLOCK) == 1) { - const ir_edge_t *edge - = get_irn_out_edge_first_kind(block, EDGE_KIND_BLOCK); - ir_node *succ = get_edge_src_irn(edge); - int p = get_edge_src_pos(edge); - block_info_t *succ_info = get_block_info(succ); - - if (succ_info->processed) { - add_phi_permutations(succ, p); - } - } -} - -typedef struct block_costs_t block_costs_t; -struct block_costs_t { - float costs; /**< costs of the block */ - int dfs_num; /**< depth first search number (to detect backedges) */ -}; - -static int cmp_block_costs(const void *d1, const void *d2) -{ - const ir_node * const *block1 = d1; - const ir_node * const *block2 = d2; - const block_costs_t *info1 = get_irn_link(*block1); - const block_costs_t *info2 = get_irn_link(*block2); - return QSORT_CMP(info2->costs, info1->costs); -} - -static void determine_block_order(void) -{ - int i; - ir_node **blocklist = be_get_cfgpostorder(irg); - int n_blocks = ARR_LEN(blocklist); - int dfs_num = 0; - pdeq *worklist = new_pdeq(); - ir_node **order = XMALLOCN(ir_node*, n_blocks); - int order_p = 0; - - /* clear block links... */ - for (i = 0; i < n_blocks; ++i) { - ir_node *block = blocklist[i]; - set_irn_link(block, NULL); - } - - /* walk blocks in reverse postorder, the costs for each block are the - * sum of the costs of its predecessors (excluding the costs on backedges - * which we can't determine) */ - for (i = n_blocks-1; i >= 0; --i) { - block_costs_t *cost_info; - ir_node *block = blocklist[i]; - - float execfreq = get_block_execfreq(execfreqs, block); - float costs = execfreq; - int n_cfgpreds = get_Block_n_cfgpreds(block); - int p; - for (p = 0; p < n_cfgpreds; ++p) { - ir_node *pred_block = get_Block_cfgpred_block(block, p); - block_costs_t *pred_costs = get_irn_link(pred_block); - /* we don't have any info for backedges */ - if (pred_costs == NULL) - continue; - costs += pred_costs->costs; - } - - cost_info = OALLOCZ(&obst, block_costs_t); - cost_info->costs = costs; - cost_info->dfs_num = dfs_num++; - set_irn_link(block, cost_info); - } - - /* sort array by block costs */ - qsort(blocklist, n_blocks, sizeof(blocklist[0]), cmp_block_costs); - - ir_reserve_resources(irg, IR_RESOURCE_BLOCK_VISITED); - inc_irg_block_visited(irg); - - for (i = 0; i < n_blocks; ++i) { - ir_node *block = blocklist[i]; - if (Block_block_visited(block)) - continue; - - /* continually add predecessors with highest costs to worklist - * (without using backedges) */ - do { - block_costs_t *info = get_irn_link(block); - ir_node *best_pred = NULL; - float best_costs = -1; - int n_cfgpred = get_Block_n_cfgpreds(block); - int i; - - pdeq_putr(worklist, block); - mark_Block_block_visited(block); - for (i = 0; i < n_cfgpred; ++i) { - ir_node *pred_block = get_Block_cfgpred_block(block, i); - block_costs_t *pred_info = get_irn_link(pred_block); - - /* ignore backedges */ - if (pred_info->dfs_num > info->dfs_num) - continue; - - if (info->costs > best_costs) { - best_costs = info->costs; - best_pred = pred_block; - } - } - block = best_pred; - } while(block != NULL && !Block_block_visited(block)); - - /* now put all nodes in the worklist in our final order */ - while (!pdeq_empty(worklist)) { - ir_node *pblock = pdeq_getr(worklist); - assert(order_p < n_blocks); - order[order_p++] = pblock; - } - } - assert(order_p == n_blocks); - del_pdeq(worklist); - - ir_free_resources(irg, IR_RESOURCE_BLOCK_VISITED); - - DEL_ARR_F(blocklist); - - obstack_free(&obst, NULL); - obstack_init(&obst); - - block_order = order; - n_block_order = n_blocks; -} - -/** - * Run the register allocator for the current register class. - */ -static void be_straight_alloc_cls(void) -{ - int i; - - lv = be_assure_liveness(birg); - be_liveness_assure_sets(lv); - - ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK); - - DB((dbg, LEVEL_2, "=== Allocating registers of %s ===\n", cls->name)); - - be_clear_links(irg); - - irg_block_walk_graph(irg, NULL, analyze_block, NULL); - if (create_congruence_classes) - combine_congruence_classes(); - - for (i = 0; i < n_block_order; ++i) { - ir_node *block = block_order[i]; - allocate_coalesce_block(block, NULL); - } - - ir_free_resources(irg, IR_RESOURCE_IRN_LINK); -} - -static void dump(int mask, ir_graph *irg, const char *suffix, - void (*dumper)(ir_graph *, const char *)) -{ - if(birg->main_env->options->dump_flags & mask) - be_dump(irg, suffix, dumper); -} - -/** - * Run the spiller on the current graph. - */ -static void spill(void) -{ - /* make sure all nodes show their real register pressure */ - BE_TIMER_PUSH(t_ra_constr); - be_pre_spill_prepare_constr(birg, cls); - BE_TIMER_POP(t_ra_constr); - - dump(DUMP_RA, irg, "-spillprepare", dump_ir_block_graph_sched); - - /* spill */ - BE_TIMER_PUSH(t_ra_spill); - be_do_spill(birg, cls); - BE_TIMER_POP(t_ra_spill); - - BE_TIMER_PUSH(t_ra_spill_apply); - check_for_memory_operands(irg); - BE_TIMER_POP(t_ra_spill_apply); - - dump(DUMP_RA, irg, "-spill", dump_ir_block_graph_sched); -} - -/** - * The straight register allocator for a whole procedure. - */ -static void be_straight_alloc(be_irg_t *new_birg) -{ - const arch_env_t *arch_env = new_birg->main_env->arch_env; - int n_cls = arch_env_get_n_reg_class(arch_env); - int c; - - obstack_init(&obst); - - birg = new_birg; - irg = be_get_birg_irg(birg); - execfreqs = birg->exec_freq; - - /* determine a good coloring order */ - determine_block_order(); - - for (c = 0; c < n_cls; ++c) { - cls = arch_env_get_reg_class(arch_env, c); - default_cls_req = NULL; - if (arch_register_class_flags(cls) & arch_register_class_flag_manual_ra) - continue; - - stat_ev_ctx_push_str("regcls", cls->name); - - n_regs = arch_register_class_n_regs(cls); - normal_regs = rbitset_malloc(n_regs); - be_abi_set_non_ignore_regs(birg->abi, cls, normal_regs); - - spill(); - - /* verify schedule and register pressure */ - BE_TIMER_PUSH(t_verify); - if (birg->main_env->options->vrfy_option == BE_VRFY_WARN) { - be_verify_schedule(birg); - be_verify_register_pressure(birg, cls, irg); - } else if (birg->main_env->options->vrfy_option == BE_VRFY_ASSERT) { - assert(be_verify_schedule(birg) && "Schedule verification failed"); - assert(be_verify_register_pressure(birg, cls, irg) - && "Register pressure verification failed"); - } - BE_TIMER_POP(t_verify); - - BE_TIMER_PUSH(t_ra_color); - be_straight_alloc_cls(); - BE_TIMER_POP(t_ra_color); - - /* we most probably constructed new Phis so liveness info is invalid - * now */ - /* TODO: test liveness_introduce */ - be_liveness_invalidate(lv); - free(normal_regs); - - stat_ev_ctx_pop("regcls"); - } - - BE_TIMER_PUSH(t_ra_spill_apply); - be_abi_fix_stack_nodes(birg->abi); - BE_TIMER_POP(t_ra_spill_apply); - - BE_TIMER_PUSH(t_verify); - if (birg->main_env->options->vrfy_option == BE_VRFY_WARN) { - be_verify_register_allocation(birg); - } else if (birg->main_env->options->vrfy_option == BE_VRFY_ASSERT) { - assert(be_verify_register_allocation(birg) - && "Register allocation invalid"); - } - BE_TIMER_POP(t_verify); - - obstack_free(&obst, NULL); -} - -/** - * Initializes this module. - */ -void be_init_straight_alloc(void) -{ - static be_ra_t be_ra_straight = { - be_straight_alloc, - }; - lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be"); - lc_opt_entry_t *straightalloc_group = lc_opt_get_grp(be_grp, "straightalloc"); - lc_opt_add_table(straightalloc_group, options); - - be_register_allocator("straight", &be_ra_straight); - FIRM_DBG_REGISTER(dbg, "firm.be.straightalloc"); -} - -BE_REGISTER_MODULE_CONSTRUCTOR(be_init_straight_alloc); diff --git a/ir/be/beprefalloc.c b/ir/be/beprefalloc.c new file mode 100644 index 000000000..79aeb266c --- /dev/null +++ b/ir/be/beprefalloc.c @@ -0,0 +1,2043 @@ +/* + * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved. + * + * This file is part of libFirm. + * + * This file may be distributed and/or modified under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation and appearing in the file LICENSE.GPL included in the + * packaging of this file. + * + * Licensees holding valid libFirm Professional Edition licenses may use + * this file in accordance with the libFirm Commercial License. + * Agreement provided with the Software. + * + * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE + * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE. + */ + +/** + * @file + * @brief Preference Guided Register Assignment + * @author Matthias Braun + * @date 14.2.2009 + * @version $Id$ + * + * The idea is to allocate registers in 2 passes: + * 1. A first pass to determine "preferred" registers for live-ranges. This + * calculates for each register and each live-range a value indicating + * the usefulness. (You can roughly think of the value as the negative + * costs needed for copies when the value is in the specific registers...) + * + * 2. Walk blocks and assigns registers in a greedy fashion. Preferring + * registers with high preferences. When register constraints are not met, + * add copies and split live-ranges. + * + * TODO: + * - make use of free registers in the permute_values code + */ +#include "config.h" + +#include +#include +#include + +#include "error.h" +#include "execfreq.h" +#include "ircons.h" +#include "irdom.h" +#include "iredges_t.h" +#include "irgraph_t.h" +#include "irgwalk.h" +#include "irnode_t.h" +#include "irprintf.h" +#include "obst.h" +#include "raw_bitset.h" +#include "unionfind.h" +#include "pdeq.h" +#include "hungarian.h" + +#include "beabi.h" +#include "bechordal_t.h" +#include "be.h" +#include "beirg.h" +#include "belive_t.h" +#include "bemodule.h" +#include "benode.h" +#include "bera.h" +#include "besched.h" +#include "bespill.h" +#include "bespillutil.h" +#include "beverify.h" +#include "beutil.h" + +#define USE_FACTOR 1.0f +#define DEF_FACTOR 1.0f +#define NEIGHBOR_FACTOR 0.2f +#define AFF_SHOULD_BE_SAME 0.5f +#define AFF_PHI 1.0f +#define SPLIT_DELTA 1.0f +#define MAX_OPTIMISTIC_SPLIT_RECURSION 0 + +DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;) + +static struct obstack obst; +static be_irg_t *birg; +static ir_graph *irg; +static const arch_register_class_t *cls; +static const arch_register_req_t *default_cls_req; +static be_lv_t *lv; +static const ir_exec_freq *execfreqs; +static unsigned n_regs; +static unsigned *normal_regs; +static int *congruence_classes; +static ir_node **block_order; +static int n_block_order; +static int create_preferences = true; +static int create_congruence_classes = true; +static int propagate_phi_registers = true; + +static const lc_opt_table_entry_t options[] = { + LC_OPT_ENT_BOOL("prefs", "use preference based coloring", &create_preferences), + LC_OPT_ENT_BOOL("congruences", "create congruence classes", &create_congruence_classes), + LC_OPT_ENT_BOOL("prop_phi", "propagate phi registers", &propagate_phi_registers), + LC_OPT_LAST +}; + +/** currently active assignments (while processing a basic block) + * maps registers to values(their current copies) */ +static ir_node **assignments; + +/** + * allocation information: last_uses, register preferences + * the information is per firm-node. + */ +struct allocation_info_t { + unsigned last_uses; /**< bitset indicating last uses (input pos) */ + ir_node *current_value; /**< copy of the value that should be used */ + ir_node *original_value; /**< for copies point to original value */ + float prefs[0]; /**< register preferences */ +}; +typedef struct allocation_info_t allocation_info_t; + +/** helper datastructure used when sorting register preferences */ +struct reg_pref_t { + unsigned num; + float pref; +}; +typedef struct reg_pref_t reg_pref_t; + +/** per basic-block information */ +struct block_info_t { + bool processed; /**< indicate whether block is processed */ + ir_node *assignments[0]; /**< register assignments at end of block */ +}; +typedef struct block_info_t block_info_t; + +/** + * Get the allocation info for a node. + * The info is allocated on the first visit of a node. + */ +static allocation_info_t *get_allocation_info(ir_node *node) +{ + allocation_info_t *info = get_irn_link(node); + if (info == NULL) { + info = OALLOCFZ(&obst, allocation_info_t, prefs, n_regs); + info->current_value = node; + info->original_value = node; + set_irn_link(node, info); + } + + return info; +} + +static allocation_info_t *try_get_allocation_info(const ir_node *node) +{ + return (allocation_info_t*) get_irn_link(node); +} + +/** + * Get allocation information for a basic block + */ +static block_info_t *get_block_info(ir_node *block) +{ + block_info_t *info = get_irn_link(block); + + assert(is_Block(block)); + if (info == NULL) { + info = OALLOCFZ(&obst, block_info_t, assignments, n_regs); + set_irn_link(block, info); + } + + return info; +} + +/** + * Get default register requirement for the current register class + */ +static const arch_register_req_t *get_default_req_current_cls(void) +{ + if (default_cls_req == NULL) { + struct obstack *obst = get_irg_obstack(irg); + arch_register_req_t *req = OALLOCZ(obst, arch_register_req_t); + + req->type = arch_register_req_type_normal; + req->cls = cls; + + default_cls_req = req; + } + return default_cls_req; +} + +/** + * Link the allocation info of a node to a copy. + * Afterwards, both nodes uses the same allocation info. + * Copy must not have an allocation info assigned yet. + * + * @param copy the node that gets the allocation info assigned + * @param value the original node + */ +static void mark_as_copy_of(ir_node *copy, ir_node *value) +{ + ir_node *original; + allocation_info_t *info = get_allocation_info(value); + allocation_info_t *copy_info = get_allocation_info(copy); + + /* find original value */ + original = info->original_value; + if (original != value) { + info = get_allocation_info(original); + } + + assert(info->original_value == original); + info->current_value = copy; + + /* the copy should not be linked to something else yet */ + assert(copy_info->original_value == copy); + copy_info->original_value = original; + + /* copy over allocation preferences */ + memcpy(copy_info->prefs, info->prefs, n_regs * sizeof(copy_info->prefs[0])); +} + +/** + * Calculate the penalties for every register on a node and its live neighbors. + * + * @param live_nodes the set of live nodes at the current position, may be NULL + * @param penalty the penalty to subtract from + * @param limited a raw bitset containing the limited set for the node + * @param node the node + */ +static void give_penalties_for_limits(const ir_nodeset_t *live_nodes, + float penalty, const unsigned* limited, + ir_node *node) +{ + ir_nodeset_iterator_t iter; + unsigned r; + unsigned n_allowed; + allocation_info_t *info = get_allocation_info(node); + ir_node *neighbor; + + /* give penalty for all forbidden regs */ + for (r = 0; r < n_regs; ++r) { + if (rbitset_is_set(limited, r)) + continue; + + info->prefs[r] -= penalty; + } + + /* all other live values should get a penalty for allowed regs */ + if (live_nodes == NULL) + return; + + penalty *= NEIGHBOR_FACTOR; + n_allowed = rbitset_popcnt(limited, n_regs); + if (n_allowed > 1) { + /* only create a very weak penalty if multiple regs are allowed */ + penalty = (penalty * 0.8f) / n_allowed; + } + foreach_ir_nodeset(live_nodes, neighbor, iter) { + allocation_info_t *neighbor_info; + + /* TODO: if op is used on multiple inputs we might not do a + * continue here */ + if (neighbor == node) + continue; + + neighbor_info = get_allocation_info(neighbor); + for (r = 0; r < n_regs; ++r) { + if (!rbitset_is_set(limited, r)) + continue; + + neighbor_info->prefs[r] -= penalty; + } + } +} + +/** + * Calculate the preferences of a definition for the current register class. + * If the definition uses a limited set of registers, reduce the preferences + * for the limited register on the node and its neighbors. + * + * @param live_nodes the set of live nodes at the current node + * @param weight the weight + * @param node the current node + */ +static void check_defs(const ir_nodeset_t *live_nodes, float weight, + ir_node *node) +{ + const arch_register_req_t *req; + + if (get_irn_mode(node) == mode_T) { + const ir_edge_t *edge; + foreach_out_edge(node, edge) { + ir_node *proj = get_edge_src_irn(edge); + check_defs(live_nodes, weight, proj); + } + return; + } + + if (!arch_irn_consider_in_reg_alloc(cls, node)) + return; + + req = arch_get_register_req_out(node); + if (req->type & arch_register_req_type_limited) { + const unsigned *limited = req->limited; + float penalty = weight * DEF_FACTOR; + give_penalties_for_limits(live_nodes, penalty, limited, node); + } + + if (req->type & arch_register_req_type_should_be_same) { + ir_node *insn = skip_Proj(node); + allocation_info_t *info = get_allocation_info(node); + int arity = get_irn_arity(insn); + int i; + + float factor = 1.0f / rbitset_popcnt(&req->other_same, arity); + for (i = 0; i < arity; ++i) { + ir_node *op; + unsigned r; + allocation_info_t *op_info; + + if (!rbitset_is_set(&req->other_same, i)) + continue; + + op = get_irn_n(insn, i); + + /* if we the value at the should_be_same input doesn't die at the + * node, then it is no use to propagate the constraints (since a + * copy will emerge anyway) */ + if (ir_nodeset_contains(live_nodes, op)) + continue; + + op_info = get_allocation_info(op); + for (r = 0; r < n_regs; ++r) { + op_info->prefs[r] += info->prefs[r] * factor; + } + } + } +} + +/** + * Walker: Runs an a block calculates the preferences for any + * node and every register from the considered register class. + */ +static void analyze_block(ir_node *block, void *data) +{ + float weight = get_block_execfreq(execfreqs, block); + ir_nodeset_t live_nodes; + ir_node *node; + (void) data; + + ir_nodeset_init(&live_nodes); + be_liveness_end_of_block(lv, cls, block, &live_nodes); + + sched_foreach_reverse(block, node) { + allocation_info_t *info; + int i; + int arity; + + if (is_Phi(node)) + break; + + if (create_preferences) + check_defs(&live_nodes, weight, node); + + /* mark last uses */ + arity = get_irn_arity(node); + + /* the allocation info node currently only uses 1 unsigned value + to mark last used inputs. So we will fail for a node with more than + 32 inputs. */ + if (arity >= (int) sizeof(unsigned) * 8) { + panic("Node with more than %d inputs not supported yet", + (int) sizeof(unsigned) * 8); + } + + info = get_allocation_info(node); + for (i = 0; i < arity; ++i) { + ir_node *op = get_irn_n(node, i); + if (!arch_irn_consider_in_reg_alloc(cls, op)) + continue; + + /* last usage of a value? */ + if (!ir_nodeset_contains(&live_nodes, op)) { + rbitset_set(&info->last_uses, i); + } + } + + be_liveness_transfer(cls, node, &live_nodes); + + if (create_preferences) { + /* update weights based on usage constraints */ + for (i = 0; i < arity; ++i) { + const arch_register_req_t *req; + const unsigned *limited; + ir_node *op = get_irn_n(node, i); + + if (!arch_irn_consider_in_reg_alloc(cls, op)) + continue; + + req = arch_get_register_req(node, i); + if (!(req->type & arch_register_req_type_limited)) + continue; + + limited = req->limited; + give_penalties_for_limits(&live_nodes, weight * USE_FACTOR, + limited, op); + } + } + } + + ir_nodeset_destroy(&live_nodes); +} + +static void congruence_def(ir_nodeset_t *live_nodes, ir_node *node) +{ + const arch_register_req_t *req; + + if (get_irn_mode(node) == mode_T) { + const ir_edge_t *edge; + foreach_out_edge(node, edge) { + ir_node *def = get_edge_src_irn(edge); + congruence_def(live_nodes, def); + } + return; + } + + if (!arch_irn_consider_in_reg_alloc(cls, node)) + return; + + /* should be same constraint? */ + req = arch_get_register_req_out(node); + if (req->type & arch_register_req_type_should_be_same) { + ir_node *insn = skip_Proj(node); + int arity = get_irn_arity(insn); + int i; + unsigned node_idx = get_irn_idx(node); + node_idx = uf_find(congruence_classes, node_idx); + + for (i = 0; i < arity; ++i) { + ir_node *live; + ir_node *op; + int op_idx; + ir_nodeset_iterator_t iter; + bool interferes = false; + + if (!rbitset_is_set(&req->other_same, i)) + continue; + + op = get_irn_n(insn, i); + op_idx = get_irn_idx(op); + op_idx = uf_find(congruence_classes, op_idx); + + /* do we interfere with the value */ + foreach_ir_nodeset(live_nodes, live, iter) { + int lv_idx = get_irn_idx(live); + lv_idx = uf_find(congruence_classes, lv_idx); + if (lv_idx == op_idx) { + interferes = true; + break; + } + } + /* don't put in same affinity class if we interfere */ + if (interferes) + continue; + + node_idx = uf_union(congruence_classes, node_idx, op_idx); + DB((dbg, LEVEL_3, "Merge %+F and %+F congruence classes\n", + node, op)); + /* one should_be_same is enough... */ + break; + } + } +} + +static void create_congruence_class(ir_node *block, void *data) +{ + ir_nodeset_t live_nodes; + ir_node *node; + + (void) data; + ir_nodeset_init(&live_nodes); + be_liveness_end_of_block(lv, cls, block, &live_nodes); + + /* check should be same constraints */ + sched_foreach_reverse(block, node) { + if (is_Phi(node)) + break; + + congruence_def(&live_nodes, node); + be_liveness_transfer(cls, node, &live_nodes); + } + + /* check phi congruence classes */ + sched_foreach_reverse_from(node, node) { + int i; + int arity; + int node_idx; + assert(is_Phi(node)); + + if (!arch_irn_consider_in_reg_alloc(cls, node)) + continue; + + node_idx = get_irn_idx(node); + node_idx = uf_find(congruence_classes, node_idx); + + arity = get_irn_arity(node); + for (i = 0; i < arity; ++i) { + bool interferes = false; + ir_nodeset_iterator_t iter; + unsigned r; + int old_node_idx; + ir_node *live; + ir_node *phi; + allocation_info_t *head_info; + allocation_info_t *other_info; + ir_node *op = get_Phi_pred(node, i); + int op_idx = get_irn_idx(op); + op_idx = uf_find(congruence_classes, op_idx); + + /* do we interfere with the value */ + foreach_ir_nodeset(&live_nodes, live, iter) { + int lv_idx = get_irn_idx(live); + lv_idx = uf_find(congruence_classes, lv_idx); + if (lv_idx == op_idx) { + interferes = true; + break; + } + } + /* don't put in same affinity class if we interfere */ + if (interferes) + continue; + /* any other phi has the same input? */ + sched_foreach(block, phi) { + ir_node *oop; + int oop_idx; + if (!is_Phi(phi)) + break; + if (!arch_irn_consider_in_reg_alloc(cls, phi)) + continue; + oop = get_Phi_pred(phi, i); + if (oop == op) + continue; + oop_idx = get_irn_idx(oop); + oop_idx = uf_find(congruence_classes, oop_idx); + if (oop_idx == op_idx) { + interferes = true; + break; + } + } + if (interferes) + continue; + + /* merge the 2 congruence classes and sum up their preferences */ + old_node_idx = node_idx; + node_idx = uf_union(congruence_classes, node_idx, op_idx); + DB((dbg, LEVEL_3, "Merge %+F and %+F congruence classes\n", + node, op)); + + old_node_idx = node_idx == old_node_idx ? op_idx : old_node_idx; + head_info = get_allocation_info(get_idx_irn(irg, node_idx)); + other_info = get_allocation_info(get_idx_irn(irg, old_node_idx)); + for (r = 0; r < n_regs; ++r) { + head_info->prefs[r] += other_info->prefs[r]; + } + } + } +} + +static void set_congruence_prefs(ir_node *node, void *data) +{ + allocation_info_t *info; + allocation_info_t *head_info; + unsigned node_idx = get_irn_idx(node); + unsigned node_set = uf_find(congruence_classes, node_idx); + + (void) data; + + /* head of congruence class or not in any class */ + if (node_set == node_idx) + return; + + if (!arch_irn_consider_in_reg_alloc(cls, node)) + return; + + head_info = get_allocation_info(get_idx_irn(irg, node_set)); + info = get_allocation_info(node); + + memcpy(info->prefs, head_info->prefs, n_regs * sizeof(info->prefs[0])); +} + +static void combine_congruence_classes(void) +{ + size_t n = get_irg_last_idx(irg); + congruence_classes = XMALLOCN(int, n); + uf_init(congruence_classes, n); + + /* create congruence classes */ + irg_block_walk_graph(irg, create_congruence_class, NULL, NULL); + /* merge preferences */ + irg_walk_graph(irg, set_congruence_prefs, NULL, NULL); + free(congruence_classes); +} + + + + + +/** + * Assign register reg to the given node. + * + * @param node the node + * @param reg the register + */ +static void use_reg(ir_node *node, const arch_register_t *reg) +{ + unsigned r = arch_register_get_index(reg); + assignments[r] = node; + arch_set_irn_register(node, reg); +} + +static void free_reg_of_value(ir_node *node) +{ + const arch_register_t *reg; + unsigned r; + + if (!arch_irn_consider_in_reg_alloc(cls, node)) + return; + + reg = arch_get_irn_register(node); + r = arch_register_get_index(reg); + /* assignment->value may be NULL if a value is used at 2 inputs + so it gets freed twice. */ + assert(assignments[r] == node || assignments[r] == NULL); + assignments[r] = NULL; +} + +/** + * Compare two register preferences in decreasing order. + */ +static int compare_reg_pref(const void *e1, const void *e2) +{ + const reg_pref_t *rp1 = (const reg_pref_t*) e1; + const reg_pref_t *rp2 = (const reg_pref_t*) e2; + if (rp1->pref < rp2->pref) + return 1; + if (rp1->pref > rp2->pref) + return -1; + return 0; +} + +static void fill_sort_candidates(reg_pref_t *regprefs, + const allocation_info_t *info) +{ + unsigned r; + + for (r = 0; r < n_regs; ++r) { + float pref = info->prefs[r]; + regprefs[r].num = r; + regprefs[r].pref = pref; + } + /* TODO: use a stable sort here to avoid unnecessary register jumping */ + qsort(regprefs, n_regs, sizeof(regprefs[0]), compare_reg_pref); +} + +static bool try_optimistic_split(ir_node *to_split, ir_node *before, + float pref, float pref_delta, + unsigned *forbidden_regs, int recursion) +{ + const arch_register_t *from_reg; + const arch_register_t *reg; + ir_node *original_insn; + ir_node *block; + ir_node *copy; + unsigned r; + unsigned from_r; + unsigned i; + allocation_info_t *info = get_allocation_info(to_split); + reg_pref_t *prefs; + float delta; + float split_threshold; + + (void) pref; + + /* stupid hack: don't optimisticallt split don't spill nodes... + * (so we don't split away the values produced because of + * must_be_different constraints) */ + original_insn = skip_Proj(info->original_value); + if (arch_irn_get_flags(original_insn) & arch_irn_flags_dont_spill) + return false; + + from_reg = arch_get_irn_register(to_split); + from_r = arch_register_get_index(from_reg); + block = get_nodes_block(before); + split_threshold = get_block_execfreq(execfreqs, block) * SPLIT_DELTA; + + if (pref_delta < split_threshold*0.5) + return false; + + /* find the best free position where we could move to */ + prefs = ALLOCAN(reg_pref_t, n_regs); + fill_sort_candidates(prefs, info); + for (i = 0; i < n_regs; ++i) { + float apref; + float apref_delta; + bool res; + bool old_source_state; + + /* we need a normal register which is not an output register + an different from the current register of to_split */ + r = prefs[i].num; + if (!rbitset_is_set(normal_regs, r)) + continue; + if (rbitset_is_set(forbidden_regs, r)) + continue; + if (r == from_r) + continue; + + /* is the split worth it? */ + delta = pref_delta + prefs[i].pref; + if (delta < split_threshold) { + DB((dbg, LEVEL_3, "Not doing optimistical split of %+F (depth %d), win %f too low\n", + to_split, recursion, delta)); + return false; + } + + /* if the register is free then we can do the split */ + if (assignments[r] == NULL) + break; + + /* otherwise we might try recursively calling optimistic_split */ + if (recursion+1 > MAX_OPTIMISTIC_SPLIT_RECURSION) + continue; + + apref = prefs[i].pref; + apref_delta = i+1 < n_regs ? apref - prefs[i+1].pref : 0; + apref_delta += pref_delta - split_threshold; + + /* our source register isn't a usefull destination for recursive + splits */ + old_source_state = rbitset_is_set(forbidden_regs, from_r); + rbitset_set(forbidden_regs, from_r); + /* try recursive split */ + res = try_optimistic_split(assignments[r], before, apref, + apref_delta, forbidden_regs, recursion+1); + /* restore our destination */ + if (old_source_state) { + rbitset_set(forbidden_regs, from_r); + } else { + rbitset_clear(forbidden_regs, from_r); + } + + if (res) + break; + } + if (i >= n_regs) + return false; + + reg = arch_register_for_index(cls, r); + copy = be_new_Copy(cls, block, to_split); + mark_as_copy_of(copy, to_split); + /* hacky, but correct here */ + if (assignments[arch_register_get_index(from_reg)] == to_split) + free_reg_of_value(to_split); + use_reg(copy, reg); + sched_add_before(before, copy); + + DB((dbg, LEVEL_3, + "Optimistic live-range split %+F move %+F(%s) -> %s before %+F (win %f, depth %d)\n", + copy, to_split, from_reg->name, reg->name, before, delta, recursion)); + return true; +} + +/** + * Determine and assign a register for node @p node + */ +static void assign_reg(const ir_node *block, ir_node *node, + unsigned *forbidden_regs) +{ + const arch_register_t *reg; + allocation_info_t *info; + const arch_register_req_t *req; + reg_pref_t *reg_prefs; + ir_node *in_node; + unsigned i; + const unsigned *allowed_regs; + unsigned r; + + assert(!is_Phi(node)); + assert(arch_irn_consider_in_reg_alloc(cls, node)); + + /* preassigned register? */ + reg = arch_get_irn_register(node); + if (reg != NULL) { + DB((dbg, LEVEL_2, "Preassignment %+F -> %s\n", node, reg->name)); + use_reg(node, reg); + return; + } + + /* give should_be_same boni */ + info = get_allocation_info(node); + req = arch_get_register_req_out(node); + + in_node = skip_Proj(node); + if (req->type & arch_register_req_type_should_be_same) { + float weight = get_block_execfreq(execfreqs, block); + int arity = get_irn_arity(in_node); + int i; + + assert(arity <= (int) sizeof(req->other_same) * 8); + for (i = 0; i < arity; ++i) { + ir_node *in; + const arch_register_t *reg; + unsigned r; + if (!rbitset_is_set(&req->other_same, i)) + continue; + + in = get_irn_n(in_node, i); + reg = arch_get_irn_register(in); + assert(reg != NULL); + r = arch_register_get_index(reg); + + /* if the value didn't die here then we should not propagate the + * should_be_same info */ + if (assignments[r] == in) + continue; + + info->prefs[r] += weight * AFF_SHOULD_BE_SAME; + } + } + + /* create list of register candidates and sort by their preference */ + DB((dbg, LEVEL_2, "Candidates for %+F:", node)); + reg_prefs = alloca(n_regs * sizeof(reg_prefs[0])); + fill_sort_candidates(reg_prefs, info); + for (i = 0; i < n_regs; ++i) { + unsigned num = reg_prefs[i].num; + const arch_register_t *reg; + + if (!rbitset_is_set(normal_regs, num)) + continue; + + reg = arch_register_for_index(cls, num); + DB((dbg, LEVEL_2, " %s(%f)", reg->name, reg_prefs[i].pref)); + } + DB((dbg, LEVEL_2, "\n")); + + allowed_regs = normal_regs; + if (req->type & arch_register_req_type_limited) { + allowed_regs = req->limited; + } + + for (i = 0; i < n_regs; ++i) { + float pref, delta; + ir_node *before; + bool res; + + r = reg_prefs[i].num; + if (!rbitset_is_set(allowed_regs, r)) + continue; + if (assignments[r] == NULL) + break; + pref = reg_prefs[i].pref; + delta = i+1 < n_regs ? pref - reg_prefs[i+1].pref : 0; + before = skip_Proj(node); + res = try_optimistic_split(assignments[r], before, + pref, delta, forbidden_regs, 0); + if (res) + break; + } + if (i >= n_regs) { + panic("No register left for %+F\n", node); + } + + reg = arch_register_for_index(cls, r); + DB((dbg, LEVEL_2, "Assign %+F -> %s\n", node, reg->name)); + use_reg(node, reg); +} + +/** + * Add an permutation in front of a node and change the assignments + * due to this permutation. + * + * To understand this imagine a permutation like this: + * + * 1 -> 2 + * 2 -> 3 + * 3 -> 1, 5 + * 4 -> 6 + * 5 + * 6 + * 7 -> 7 + * + * First we count how many destinations a single value has. At the same time + * we can be sure that each destination register has at most 1 source register + * (it can have 0 which means we don't care what value is in it). + * We ignore all fullfilled permuations (like 7->7) + * In a first pass we create as much copy instructions as possible as they + * are generally cheaper than exchanges. We do this by counting into how many + * destinations a register has to be copied (in the example it's 2 for register + * 3, or 1 for the registers 1,2,4 and 7). + * We can then create a copy into every destination register when the usecount + * of that register is 0 (= noone else needs the value in the register). + * + * After this step we should have cycles left. We implement a cyclic permutation + * of n registers with n-1 transpositions. + * + * @param live_nodes the set of live nodes, updated due to live range split + * @param before the node before we add the permutation + * @param permutation the permutation array indices are the destination + * registers, the values in the array are the source + * registers. + */ +static void permute_values(ir_nodeset_t *live_nodes, ir_node *before, + unsigned *permutation) +{ + unsigned *n_used = ALLOCANZ(unsigned, n_regs); + ir_node *block; + unsigned r; + + /* determine how often each source register needs to be read */ + for (r = 0; r < n_regs; ++r) { + unsigned old_reg = permutation[r]; + ir_node *value; + + value = assignments[old_reg]; + if (value == NULL) { + /* nothing to do here, reg is not live. Mark it as fixpoint + * so we ignore it in the next steps */ + permutation[r] = r; + continue; + } + + ++n_used[old_reg]; + } + + block = get_nodes_block(before); + + /* step1: create copies where immediately possible */ + for (r = 0; r < n_regs; /* empty */) { + ir_node *copy; + ir_node *src; + const arch_register_t *reg; + unsigned old_r = permutation[r]; + + /* - no need to do anything for fixed points. + - we can't copy if the value in the dest reg is still needed */ + if (old_r == r || n_used[r] > 0) { + ++r; + continue; + } + + /* create a copy */ + src = assignments[old_r]; + copy = be_new_Copy(cls, block, src); + sched_add_before(before, copy); + reg = arch_register_for_index(cls, r); + DB((dbg, LEVEL_2, "Copy %+F (from %+F, before %+F) -> %s\n", + copy, src, before, reg->name)); + mark_as_copy_of(copy, src); + use_reg(copy, reg); + + if (live_nodes != NULL) { + ir_nodeset_insert(live_nodes, copy); + } + + /* old register has 1 user less, permutation is resolved */ + assert(arch_register_get_index(arch_get_irn_register(src)) == old_r); + permutation[r] = r; + + assert(n_used[old_r] > 0); + --n_used[old_r]; + if (n_used[old_r] == 0) { + if (live_nodes != NULL) { + ir_nodeset_remove(live_nodes, src); + } + free_reg_of_value(src); + } + + /* advance or jump back (if this copy enabled another copy) */ + if (old_r < r && n_used[old_r] == 0) { + r = old_r; + } else { + ++r; + } + } + + /* at this point we only have "cycles" left which we have to resolve with + * perm instructions + * TODO: if we have free registers left, then we should really use copy + * instructions for any cycle longer than 2 registers... + * (this is probably architecture dependent, there might be archs where + * copies are preferable even for 2-cycles) */ + + /* create perms with the rest */ + for (r = 0; r < n_regs; /* empty */) { + const arch_register_t *reg; + unsigned old_r = permutation[r]; + unsigned r2; + ir_node *in[2]; + ir_node *perm; + ir_node *proj0; + ir_node *proj1; + + if (old_r == r) { + ++r; + continue; + } + + /* we shouldn't have copies from 1 value to multiple destinations left*/ + assert(n_used[old_r] == 1); + + /* exchange old_r and r2; after that old_r is a fixed point */ + r2 = permutation[old_r]; + + in[0] = assignments[r2]; + in[1] = assignments[old_r]; + perm = be_new_Perm(cls, block, 2, in); + sched_add_before(before, perm); + DB((dbg, LEVEL_2, "Perm %+F (perm %+F,%+F, before %+F)\n", + perm, in[0], in[1], before)); + + proj0 = new_r_Proj(block, perm, get_irn_mode(in[0]), 0); + mark_as_copy_of(proj0, in[0]); + reg = arch_register_for_index(cls, old_r); + use_reg(proj0, reg); + + proj1 = new_r_Proj(block, perm, get_irn_mode(in[1]), 1); + mark_as_copy_of(proj1, in[1]); + reg = arch_register_for_index(cls, r2); + use_reg(proj1, reg); + + /* 1 value is now in the correct register */ + permutation[old_r] = old_r; + /* the source of r changed to r2 */ + permutation[r] = r2; + + /* if we have reached a fixpoint update data structures */ + if (live_nodes != NULL) { + ir_nodeset_remove(live_nodes, in[0]); + ir_nodeset_remove(live_nodes, in[1]); + ir_nodeset_remove(live_nodes, proj0); + ir_nodeset_insert(live_nodes, proj1); + } + } + +#ifdef DEBUG_libfirm + /* now we should only have fixpoints left */ + for (r = 0; r < n_regs; ++r) { + assert(permutation[r] == r); + } +#endif +} + +/** + * Free regs for values last used. + * + * @param live_nodes set of live nodes, will be updated + * @param node the node to consider + */ +static void free_last_uses(ir_nodeset_t *live_nodes, ir_node *node) +{ + allocation_info_t *info = get_allocation_info(node); + const unsigned *last_uses = &info->last_uses; + int arity = get_irn_arity(node); + int i; + + for (i = 0; i < arity; ++i) { + ir_node *op; + + /* check if one operand is the last use */ + if (!rbitset_is_set(last_uses, i)) + continue; + + op = get_irn_n(node, i); + free_reg_of_value(op); + ir_nodeset_remove(live_nodes, op); + } +} + +/** + * change inputs of a node to the current value (copies/perms) + */ +static void rewire_inputs(ir_node *node) +{ + int i; + int arity = get_irn_arity(node); + + for (i = 0; i < arity; ++i) { + ir_node *op = get_irn_n(node, i); + allocation_info_t *info = try_get_allocation_info(op); + + if (info == NULL) + continue; + + info = get_allocation_info(info->original_value); + if (info->current_value != op) { + set_irn_n(node, i, info->current_value); + } + } +} + +/** + * Create a bitset of registers occupied with value living through an + * instruction + */ +static void determine_live_through_regs(unsigned *bitset, ir_node *node) +{ + const allocation_info_t *info = get_allocation_info(node); + unsigned r; + int i; + int arity; + + /* mark all used registers as potentially live-through */ + for (r = 0; r < n_regs; ++r) { + if (assignments[r] == NULL) + continue; + if (!rbitset_is_set(normal_regs, r)) + continue; + + rbitset_set(bitset, r); + } + + /* remove registers of value dying at the instruction */ + arity = get_irn_arity(node); + for (i = 0; i < arity; ++i) { + ir_node *op; + const arch_register_t *reg; + + if (!rbitset_is_set(&info->last_uses, i)) + continue; + + op = get_irn_n(node, i); + reg = arch_get_irn_register(op); + rbitset_clear(bitset, arch_register_get_index(reg)); + } +} + +/** + * Enforce constraints at a node by live range splits. + * + * @param live_nodes the set of live nodes, might be changed + * @param node the current node + */ +static void enforce_constraints(ir_nodeset_t *live_nodes, ir_node *node, + unsigned *forbidden_regs) +{ + int arity = get_irn_arity(node); + int i, res; + hungarian_problem_t *bp; + unsigned l, r; + unsigned *assignment; + + /* construct a list of register occupied by live-through values */ + unsigned *live_through_regs = NULL; + + /* see if any use constraints are not met */ + bool good = true; + for (i = 0; i < arity; ++i) { + ir_node *op = get_irn_n(node, i); + const arch_register_t *reg; + const arch_register_req_t *req; + const unsigned *limited; + unsigned r; + + if (!arch_irn_consider_in_reg_alloc(cls, op)) + continue; + + /* are there any limitations for the i'th operand? */ + req = arch_get_register_req(node, i); + if (!(req->type & arch_register_req_type_limited)) + continue; + + limited = req->limited; + reg = arch_get_irn_register(op); + r = arch_register_get_index(reg); + if (!rbitset_is_set(limited, r)) { + /* found an assignment outside the limited set */ + good = false; + break; + } + } + + /* is any of the live-throughs using a constrained output register? */ + if (get_irn_mode(node) == mode_T) { + const ir_edge_t *edge; + + foreach_out_edge(node, edge) { + ir_node *proj = get_edge_src_irn(edge); + const arch_register_req_t *req; + + if (!arch_irn_consider_in_reg_alloc(cls, proj)) + continue; + + req = arch_get_register_req_out(proj); + if (!(req->type & arch_register_req_type_limited)) + continue; + + if (live_through_regs == NULL) { + rbitset_alloca(live_through_regs, n_regs); + determine_live_through_regs(live_through_regs, node); + } + + rbitset_or(forbidden_regs, req->limited, n_regs); + if (rbitsets_have_common(req->limited, live_through_regs, n_regs)) { + good = false; + } + } + } else { + if (arch_irn_consider_in_reg_alloc(cls, node)) { + const arch_register_req_t *req = arch_get_register_req_out(node); + if (req->type & arch_register_req_type_limited) { + rbitset_alloca(live_through_regs, n_regs); + determine_live_through_regs(live_through_regs, node); + if (rbitsets_have_common(req->limited, live_through_regs, n_regs)) { + good = false; + rbitset_or(forbidden_regs, req->limited, n_regs); + } + } + } + } + + if (good) + return; + + /* create these arrays if we haven't yet */ + if (live_through_regs == NULL) { + rbitset_alloca(live_through_regs, n_regs); + } + + /* at this point we have to construct a bipartite matching problem to see + * which values should go to which registers + * Note: We're building the matrix in "reverse" - source registers are + * right, destinations left because this will produce the solution + * in the format required for permute_values. + */ + bp = hungarian_new(n_regs, n_regs, HUNGARIAN_MATCH_PERFECT); + + /* add all combinations, then remove not allowed ones */ + for (l = 0; l < n_regs; ++l) { + if (!rbitset_is_set(normal_regs, l)) { + hungarian_add(bp, l, l, 1); + continue; + } + + for (r = 0; r < n_regs; ++r) { + if (!rbitset_is_set(normal_regs, r)) + continue; + /* livethrough values may not use constrainted output registers */ + if (rbitset_is_set(live_through_regs, l) + && rbitset_is_set(forbidden_regs, r)) + continue; + + hungarian_add(bp, r, l, l == r ? 9 : 8); + } + } + + for (i = 0; i < arity; ++i) { + ir_node *op = get_irn_n(node, i); + const arch_register_t *reg; + const arch_register_req_t *req; + const unsigned *limited; + unsigned current_reg; + + if (!arch_irn_consider_in_reg_alloc(cls, op)) + continue; + + req = arch_get_register_req(node, i); + if (!(req->type & arch_register_req_type_limited)) + continue; + + limited = req->limited; + reg = arch_get_irn_register(op); + current_reg = arch_register_get_index(reg); + for (r = 0; r < n_regs; ++r) { + if (rbitset_is_set(limited, r)) + continue; + hungarian_remv(bp, r, current_reg); + } + } + + //hungarian_print_cost_matrix(bp, 1); + hungarian_prepare_cost_matrix(bp, HUNGARIAN_MODE_MAXIMIZE_UTIL); + + assignment = ALLOCAN(unsigned, n_regs); + res = hungarian_solve(bp, (int*) assignment, NULL, 0); + assert(res == 0); + +#if 0 + fprintf(stderr, "Swap result:"); + for (i = 0; i < (int) n_regs; ++i) { + fprintf(stderr, " %d", assignment[i]); + } + fprintf(stderr, "\n"); +#endif + + hungarian_free(bp); + + permute_values(live_nodes, node, assignment); +} + +/** test wether a node @p n is a copy of the value of node @p of */ +static bool is_copy_of(ir_node *value, ir_node *test_value) +{ + allocation_info_t *test_info; + allocation_info_t *info; + + if (value == test_value) + return true; + + info = get_allocation_info(value); + test_info = get_allocation_info(test_value); + return test_info->original_value == info->original_value; +} + +/** + * find a value in the end-assignment of a basic block + * @returns the index into the assignment array if found + * -1 if not found + */ +static int find_value_in_block_info(block_info_t *info, ir_node *value) +{ + unsigned r; + ir_node **assignments = info->assignments; + for (r = 0; r < n_regs; ++r) { + ir_node *a_value = assignments[r]; + + if (a_value == NULL) + continue; + if (is_copy_of(a_value, value)) + return (int) r; + } + + return -1; +} + +/** + * Create the necessary permutations at the end of a basic block to fullfill + * the register assignment for phi-nodes in the next block + */ +static void add_phi_permutations(ir_node *block, int p) +{ + unsigned r; + unsigned *permutation; + ir_node **old_assignments; + bool need_permutation; + ir_node *node; + ir_node *pred = get_Block_cfgpred_block(block, p); + + block_info_t *pred_info = get_block_info(pred); + + /* predecessor not processed yet? nothing to do */ + if (!pred_info->processed) + return; + + permutation = ALLOCAN(unsigned, n_regs); + for (r = 0; r < n_regs; ++r) { + permutation[r] = r; + } + + /* check phi nodes */ + need_permutation = false; + node = sched_first(block); + for ( ; is_Phi(node); node = sched_next(node)) { + const arch_register_t *reg; + int regn; + int a; + ir_node *op; + + if (!arch_irn_consider_in_reg_alloc(cls, node)) + continue; + + op = get_Phi_pred(node, p); + if (!arch_irn_consider_in_reg_alloc(cls, op)) + continue; + + a = find_value_in_block_info(pred_info, op); + assert(a >= 0); + + reg = arch_get_irn_register(node); + regn = arch_register_get_index(reg); + if (regn != a) { + permutation[regn] = a; + need_permutation = true; + } + } + + if (need_permutation) { + /* permute values at end of predecessor */ + old_assignments = assignments; + assignments = pred_info->assignments; + permute_values(NULL, be_get_end_of_block_insertion_point(pred), + permutation); + assignments = old_assignments; + } + + /* change phi nodes to use the copied values */ + node = sched_first(block); + for ( ; is_Phi(node); node = sched_next(node)) { + int a; + ir_node *op; + + if (!arch_irn_consider_in_reg_alloc(cls, node)) + continue; + + op = get_Phi_pred(node, p); + /* no need to do anything for Unknown inputs */ + if (!arch_irn_consider_in_reg_alloc(cls, op)) + continue; + + /* we have permuted all values into the correct registers so we can + simply query which value occupies the phis register in the + predecessor */ + a = arch_register_get_index(arch_get_irn_register(node)); + op = pred_info->assignments[a]; + set_Phi_pred(node, p, op); + } +} + +/** + * Set preferences for a phis register based on the registers used on the + * phi inputs. + */ +static void adapt_phi_prefs(ir_node *phi) +{ + int i; + int arity = get_irn_arity(phi); + ir_node *block = get_nodes_block(phi); + allocation_info_t *info = get_allocation_info(phi); + + for (i = 0; i < arity; ++i) { + ir_node *op = get_irn_n(phi, i); + const arch_register_t *reg = arch_get_irn_register(op); + ir_node *pred_block; + block_info_t *pred_block_info; + float weight; + unsigned r; + + if (reg == NULL) + continue; + /* we only give the bonus if the predecessor already has registers + * assigned, otherwise we only see a dummy value + * and any conclusions about its register are useless */ + pred_block = get_Block_cfgpred_block(block, i); + pred_block_info = get_block_info(pred_block); + if (!pred_block_info->processed) + continue; + + /* give bonus for already assigned register */ + weight = get_block_execfreq(execfreqs, pred_block); + r = arch_register_get_index(reg); + info->prefs[r] += weight * AFF_PHI; + } +} + +/** + * After a phi has been assigned a register propagate preference inputs + * to the phi inputs. + */ +static void propagate_phi_register(ir_node *phi, unsigned assigned_r) +{ + int i; + ir_node *block = get_nodes_block(phi); + int arity = get_irn_arity(phi); + + for (i = 0; i < arity; ++i) { + ir_node *op = get_Phi_pred(phi, i); + allocation_info_t *info = get_allocation_info(op); + ir_node *pred_block = get_Block_cfgpred_block(block, i); + unsigned r; + float weight + = get_block_execfreq(execfreqs, pred_block) * AFF_PHI; + + if (info->prefs[assigned_r] >= weight) + continue; + + /* promote the prefered register */ + for (r = 0; r < n_regs; ++r) { + if (info->prefs[r] > -weight) { + info->prefs[r] = -weight; + } + } + info->prefs[assigned_r] = weight; + + if (is_Phi(op)) + propagate_phi_register(op, assigned_r); + } +} + +static void assign_phi_registers(ir_node *block) +{ + int n_phis = 0; + int n; + int res; + int *assignment; + ir_node *node; + hungarian_problem_t *bp; + + /* count phi nodes */ + sched_foreach(block, node) { + if (!is_Phi(node)) + break; + if (!arch_irn_consider_in_reg_alloc(cls, node)) + continue; + ++n_phis; + } + + if (n_phis == 0) + return; + + /* build a bipartite matching problem for all phi nodes */ + bp = hungarian_new(n_phis, n_regs, HUNGARIAN_MATCH_PERFECT); + n = 0; + sched_foreach(block, node) { + unsigned r; + + allocation_info_t *info; + if (!is_Phi(node)) + break; + if (!arch_irn_consider_in_reg_alloc(cls, node)) + continue; + + /* give boni for predecessor colorings */ + adapt_phi_prefs(node); + /* add stuff to bipartite problem */ + info = get_allocation_info(node); + DB((dbg, LEVEL_3, "Prefs for %+F: ", node)); + for (r = 0; r < n_regs; ++r) { + float costs; + + if (!rbitset_is_set(normal_regs, r)) + continue; + + costs = info->prefs[r]; + costs = costs < 0 ? -logf(-costs+1) : logf(costs+1); + costs *= 100; + costs += 10000; + hungarian_add(bp, n, r, costs); + DB((dbg, LEVEL_3, " %s(%f)", arch_register_for_index(cls, r)->name, + info->prefs[r])); + } + DB((dbg, LEVEL_3, "\n")); + ++n; + } + + //hungarian_print_cost_matrix(bp, 7); + hungarian_prepare_cost_matrix(bp, HUNGARIAN_MODE_MAXIMIZE_UTIL); + + assignment = ALLOCAN(int, n_regs); + res = hungarian_solve(bp, assignment, NULL, 0); + assert(res == 0); + + /* apply results */ + n = 0; + sched_foreach(block, node) { + unsigned r; + const arch_register_t *reg; + + if (!is_Phi(node)) + break; + if (!arch_irn_consider_in_reg_alloc(cls, node)) + continue; + + r = assignment[n++]; + assert(rbitset_is_set(normal_regs, r)); + reg = arch_register_for_index(cls, r); + DB((dbg, LEVEL_2, "Assign %+F -> %s\n", node, reg->name)); + use_reg(node, reg); + + /* adapt preferences for phi inputs */ + if (propagate_phi_registers) + propagate_phi_register(node, r); + } +} + +/** + * Walker: assign registers to all nodes of a block that + * need registers from the currently considered register class. + */ +static void allocate_coalesce_block(ir_node *block, void *data) +{ + int i; + ir_nodeset_t live_nodes; + ir_node *node; + int n_preds; + block_info_t *block_info; + block_info_t **pred_block_infos; + ir_node **phi_ins; + unsigned *forbidden_regs; /**< collects registers which must + not be used for optimistic splits */ + + (void) data; + DB((dbg, LEVEL_2, "* Block %+F\n", block)); + + /* clear assignments */ + block_info = get_block_info(block); + assignments = block_info->assignments; + + ir_nodeset_init(&live_nodes); + + /* gather regalloc infos of predecessor blocks */ + n_preds = get_Block_n_cfgpreds(block); + pred_block_infos = ALLOCAN(block_info_t*, n_preds); + for (i = 0; i < n_preds; ++i) { + ir_node *pred = get_Block_cfgpred_block(block, i); + block_info_t *pred_info = get_block_info(pred); + pred_block_infos[i] = pred_info; + } + + phi_ins = ALLOCAN(ir_node*, n_preds); + + /* collect live-in nodes and preassigned values */ + be_lv_foreach(lv, block, be_lv_state_in, i) { + const arch_register_t *reg; + int p; + bool need_phi = false; + + node = be_lv_get_irn(lv, block, i); + if (!arch_irn_consider_in_reg_alloc(cls, node)) + continue; + + /* check all predecessors for this value, if it is not everywhere the + same or unknown then we have to construct a phi + (we collect the potential phi inputs here) */ + for (p = 0; p < n_preds; ++p) { + block_info_t *pred_info = pred_block_infos[p]; + + if (!pred_info->processed) { + /* use node for now, it will get fixed later */ + phi_ins[p] = node; + need_phi = true; + } else { + int a = find_value_in_block_info(pred_info, node); + + /* must live out of predecessor */ + assert(a >= 0); + phi_ins[p] = pred_info->assignments[a]; + /* different value from last time? then we need a phi */ + if (p > 0 && phi_ins[p-1] != phi_ins[p]) { + need_phi = true; + } + } + } + + if (need_phi) { + ir_mode *mode = get_irn_mode(node); + const arch_register_req_t *req = get_default_req_current_cls(); + ir_node *phi; + + phi = new_r_Phi(block, n_preds, phi_ins, mode); + be_set_phi_reg_req(phi, req); + + DB((dbg, LEVEL_3, "Create Phi %+F (for %+F) -", phi, node)); +#ifdef DEBUG_libfirm + { + int i; + for (i = 0; i < n_preds; ++i) { + DB((dbg, LEVEL_3, " %+F", phi_ins[i])); + } + DB((dbg, LEVEL_3, "\n")); + } +#endif + mark_as_copy_of(phi, node); + sched_add_after(block, phi); + + node = phi; + } else { + allocation_info_t *info = get_allocation_info(node); + info->current_value = phi_ins[0]; + + /* Grab 1 of the inputs we constructed (might not be the same as + * "node" as we could see the same copy of the value in all + * predecessors */ + node = phi_ins[0]; + } + + /* if the node already has a register assigned use it */ + reg = arch_get_irn_register(node); + if (reg != NULL) { + use_reg(node, reg); + } + + /* remember that this node is live at the beginning of the block */ + ir_nodeset_insert(&live_nodes, node); + } + + rbitset_alloca(forbidden_regs, n_regs); + + /* handle phis... */ + assign_phi_registers(block); + + /* all live-ins must have a register */ +#ifdef DEBUG_libfirm + { + ir_nodeset_iterator_t iter; + foreach_ir_nodeset(&live_nodes, node, iter) { + const arch_register_t *reg = arch_get_irn_register(node); + assert(reg != NULL); + } + } +#endif + + /* assign instructions in the block */ + sched_foreach(block, node) { + int i; + int arity; + + /* phis are already assigned */ + if (is_Phi(node)) + continue; + + rewire_inputs(node); + + /* enforce use constraints */ + rbitset_clear_all(forbidden_regs, n_regs); + enforce_constraints(&live_nodes, node, forbidden_regs); + + rewire_inputs(node); + + /* we may not use registers used for inputs for optimistic splits */ + arity = get_irn_arity(node); + for (i = 0; i < arity; ++i) { + ir_node *op = get_irn_n(node, i); + const arch_register_t *reg; + if (!arch_irn_consider_in_reg_alloc(cls, op)) + continue; + + reg = arch_get_irn_register(op); + rbitset_set(forbidden_regs, arch_register_get_index(reg)); + } + + /* free registers of values last used at this instruction */ + free_last_uses(&live_nodes, node); + + /* assign output registers */ + /* TODO: 2 phases: first: pre-assigned ones, 2nd real regs */ + if (get_irn_mode(node) == mode_T) { + const ir_edge_t *edge; + foreach_out_edge(node, edge) { + ir_node *proj = get_edge_src_irn(edge); + if (!arch_irn_consider_in_reg_alloc(cls, proj)) + continue; + assign_reg(block, proj, forbidden_regs); + } + } else if (arch_irn_consider_in_reg_alloc(cls, node)) { + assign_reg(block, node, forbidden_regs); + } + } + + ir_nodeset_destroy(&live_nodes); + assignments = NULL; + + block_info->processed = true; + + /* permute values at end of predecessor blocks in case of phi-nodes */ + if (n_preds > 1) { + int p; + for (p = 0; p < n_preds; ++p) { + add_phi_permutations(block, p); + } + } + + /* if we have exactly 1 successor then we might be able to produce phi + copies now */ + if (get_irn_n_edges_kind(block, EDGE_KIND_BLOCK) == 1) { + const ir_edge_t *edge + = get_irn_out_edge_first_kind(block, EDGE_KIND_BLOCK); + ir_node *succ = get_edge_src_irn(edge); + int p = get_edge_src_pos(edge); + block_info_t *succ_info = get_block_info(succ); + + if (succ_info->processed) { + add_phi_permutations(succ, p); + } + } +} + +typedef struct block_costs_t block_costs_t; +struct block_costs_t { + float costs; /**< costs of the block */ + int dfs_num; /**< depth first search number (to detect backedges) */ +}; + +static int cmp_block_costs(const void *d1, const void *d2) +{ + const ir_node * const *block1 = d1; + const ir_node * const *block2 = d2; + const block_costs_t *info1 = get_irn_link(*block1); + const block_costs_t *info2 = get_irn_link(*block2); + return QSORT_CMP(info2->costs, info1->costs); +} + +static void determine_block_order(void) +{ + int i; + ir_node **blocklist = be_get_cfgpostorder(irg); + int n_blocks = ARR_LEN(blocklist); + int dfs_num = 0; + pdeq *worklist = new_pdeq(); + ir_node **order = XMALLOCN(ir_node*, n_blocks); + int order_p = 0; + + /* clear block links... */ + for (i = 0; i < n_blocks; ++i) { + ir_node *block = blocklist[i]; + set_irn_link(block, NULL); + } + + /* walk blocks in reverse postorder, the costs for each block are the + * sum of the costs of its predecessors (excluding the costs on backedges + * which we can't determine) */ + for (i = n_blocks-1; i >= 0; --i) { + block_costs_t *cost_info; + ir_node *block = blocklist[i]; + + float execfreq = get_block_execfreq(execfreqs, block); + float costs = execfreq; + int n_cfgpreds = get_Block_n_cfgpreds(block); + int p; + for (p = 0; p < n_cfgpreds; ++p) { + ir_node *pred_block = get_Block_cfgpred_block(block, p); + block_costs_t *pred_costs = get_irn_link(pred_block); + /* we don't have any info for backedges */ + if (pred_costs == NULL) + continue; + costs += pred_costs->costs; + } + + cost_info = OALLOCZ(&obst, block_costs_t); + cost_info->costs = costs; + cost_info->dfs_num = dfs_num++; + set_irn_link(block, cost_info); + } + + /* sort array by block costs */ + qsort(blocklist, n_blocks, sizeof(blocklist[0]), cmp_block_costs); + + ir_reserve_resources(irg, IR_RESOURCE_BLOCK_VISITED); + inc_irg_block_visited(irg); + + for (i = 0; i < n_blocks; ++i) { + ir_node *block = blocklist[i]; + if (Block_block_visited(block)) + continue; + + /* continually add predecessors with highest costs to worklist + * (without using backedges) */ + do { + block_costs_t *info = get_irn_link(block); + ir_node *best_pred = NULL; + float best_costs = -1; + int n_cfgpred = get_Block_n_cfgpreds(block); + int i; + + pdeq_putr(worklist, block); + mark_Block_block_visited(block); + for (i = 0; i < n_cfgpred; ++i) { + ir_node *pred_block = get_Block_cfgpred_block(block, i); + block_costs_t *pred_info = get_irn_link(pred_block); + + /* ignore backedges */ + if (pred_info->dfs_num > info->dfs_num) + continue; + + if (info->costs > best_costs) { + best_costs = info->costs; + best_pred = pred_block; + } + } + block = best_pred; + } while(block != NULL && !Block_block_visited(block)); + + /* now put all nodes in the worklist in our final order */ + while (!pdeq_empty(worklist)) { + ir_node *pblock = pdeq_getr(worklist); + assert(order_p < n_blocks); + order[order_p++] = pblock; + } + } + assert(order_p == n_blocks); + del_pdeq(worklist); + + ir_free_resources(irg, IR_RESOURCE_BLOCK_VISITED); + + DEL_ARR_F(blocklist); + + obstack_free(&obst, NULL); + obstack_init(&obst); + + block_order = order; + n_block_order = n_blocks; +} + +/** + * Run the register allocator for the current register class. + */ +static void be_pref_alloc_cls(void) +{ + int i; + + lv = be_assure_liveness(birg); + be_liveness_assure_sets(lv); + + ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK); + + DB((dbg, LEVEL_2, "=== Allocating registers of %s ===\n", cls->name)); + + be_clear_links(irg); + + irg_block_walk_graph(irg, NULL, analyze_block, NULL); + if (create_congruence_classes) + combine_congruence_classes(); + + for (i = 0; i < n_block_order; ++i) { + ir_node *block = block_order[i]; + allocate_coalesce_block(block, NULL); + } + + ir_free_resources(irg, IR_RESOURCE_IRN_LINK); +} + +static void dump(int mask, ir_graph *irg, const char *suffix, + void (*dumper)(ir_graph *, const char *)) +{ + if(birg->main_env->options->dump_flags & mask) + be_dump(irg, suffix, dumper); +} + +/** + * Run the spiller on the current graph. + */ +static void spill(void) +{ + /* make sure all nodes show their real register pressure */ + BE_TIMER_PUSH(t_ra_constr); + be_pre_spill_prepare_constr(birg, cls); + BE_TIMER_POP(t_ra_constr); + + dump(DUMP_RA, irg, "-spillprepare", dump_ir_block_graph_sched); + + /* spill */ + BE_TIMER_PUSH(t_ra_spill); + be_do_spill(birg, cls); + BE_TIMER_POP(t_ra_spill); + + BE_TIMER_PUSH(t_ra_spill_apply); + check_for_memory_operands(irg); + BE_TIMER_POP(t_ra_spill_apply); + + dump(DUMP_RA, irg, "-spill", dump_ir_block_graph_sched); +} + +/** + * The pref register allocator for a whole procedure. + */ +static void be_pref_alloc(be_irg_t *new_birg) +{ + const arch_env_t *arch_env = new_birg->main_env->arch_env; + int n_cls = arch_env_get_n_reg_class(arch_env); + int c; + + obstack_init(&obst); + + birg = new_birg; + irg = be_get_birg_irg(birg); + execfreqs = birg->exec_freq; + + /* determine a good coloring order */ + determine_block_order(); + + for (c = 0; c < n_cls; ++c) { + cls = arch_env_get_reg_class(arch_env, c); + default_cls_req = NULL; + if (arch_register_class_flags(cls) & arch_register_class_flag_manual_ra) + continue; + + stat_ev_ctx_push_str("regcls", cls->name); + + n_regs = arch_register_class_n_regs(cls); + normal_regs = rbitset_malloc(n_regs); + be_abi_set_non_ignore_regs(birg->abi, cls, normal_regs); + + spill(); + + /* verify schedule and register pressure */ + BE_TIMER_PUSH(t_verify); + if (birg->main_env->options->vrfy_option == BE_VRFY_WARN) { + be_verify_schedule(birg); + be_verify_register_pressure(birg, cls, irg); + } else if (birg->main_env->options->vrfy_option == BE_VRFY_ASSERT) { + assert(be_verify_schedule(birg) && "Schedule verification failed"); + assert(be_verify_register_pressure(birg, cls, irg) + && "Register pressure verification failed"); + } + BE_TIMER_POP(t_verify); + + BE_TIMER_PUSH(t_ra_color); + be_pref_alloc_cls(); + BE_TIMER_POP(t_ra_color); + + /* we most probably constructed new Phis so liveness info is invalid + * now */ + /* TODO: test liveness_introduce */ + be_liveness_invalidate(lv); + free(normal_regs); + + stat_ev_ctx_pop("regcls"); + } + + BE_TIMER_PUSH(t_ra_spill_apply); + be_abi_fix_stack_nodes(birg->abi); + BE_TIMER_POP(t_ra_spill_apply); + + BE_TIMER_PUSH(t_verify); + if (birg->main_env->options->vrfy_option == BE_VRFY_WARN) { + be_verify_register_allocation(birg); + } else if (birg->main_env->options->vrfy_option == BE_VRFY_ASSERT) { + assert(be_verify_register_allocation(birg) + && "Register allocation invalid"); + } + BE_TIMER_POP(t_verify); + + obstack_free(&obst, NULL); +} + +/** + * Initializes this module. + */ +void be_init_pref_alloc(void) +{ + static be_ra_t be_ra_pref = { + be_pref_alloc, + }; + lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be"); + lc_opt_entry_t *prefalloc_group = lc_opt_get_grp(be_grp, "prefalloc"); + lc_opt_add_table(prefalloc_group, options); + + be_register_allocator("pref", &be_ra_pref); + FIRM_DBG_REGISTER(dbg, "firm.be.prefalloc"); +} + +BE_REGISTER_MODULE_CONSTRUCTOR(be_init_pref_alloc);