/*
- * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
+ * Copyright (C) 1995-2011 University of Karlsruhe. All right reserved.
*
* This file is part of libFirm.
*
* @brief Preference Guided Register Assignment
* @author Matthias Braun
* @date 14.2.2009
- * @version $Id$
*
* The idea is to allocate registers in 2 passes:
* 1. A first pass to determine "preferred" registers for live-ranges. This
#include <float.h>
#include <stdbool.h>
#include <math.h>
+#include "lpp.h"
#include "error.h"
#include "execfreq.h"
#include "irnode_t.h"
#include "irprintf.h"
#include "irdump.h"
+#include "irtools.h"
+#include "util.h"
#include "obst.h"
#include "raw_bitset.h"
#include "unionfind.h"
static unsigned *normal_regs;
static int *congruence_classes;
static ir_node **block_order;
-static int n_block_order;
+static size_t n_block_order;
static int create_preferences = true;
static int create_congruence_classes = true;
static int propagate_phi_registers = true;
*/
static allocation_info_t *get_allocation_info(ir_node *node)
{
- allocation_info_t *info = get_irn_link(node);
+ allocation_info_t *info = (allocation_info_t*)get_irn_link(node);
if (info == NULL) {
info = OALLOCFZ(&obst, allocation_info_t, prefs, n_regs);
info->current_value = node;
*/
static block_info_t *get_block_info(ir_node *block)
{
- block_info_t *info = get_irn_link(block);
+ block_info_t *info = (block_info_t*)get_irn_link(block);
assert(is_Block(block));
if (info == NULL) {
{
ir_nodeset_iterator_t iter;
unsigned r;
- unsigned n_allowed;
+ size_t n_allowed;
allocation_info_t *info = get_allocation_info(node);
ir_node *neighbor;
static void check_defs(const ir_nodeset_t *live_nodes, float weight,
ir_node *node)
{
- const arch_register_req_t *req = arch_get_register_req_out(node);
+ const arch_register_req_t *req = arch_get_irn_register_req(node);
if (req->type & arch_register_req_type_limited) {
const unsigned *limited = req->limited;
float penalty = weight * DEF_FACTOR;
info = get_allocation_info(node);
for (i = 0; i < arity; ++i) {
ir_node *op = get_irn_n(node, i);
- const arch_register_req_t *req = arch_get_register_req_out(op);
+ const arch_register_req_t *req = arch_get_irn_register_req(op);
if (req->cls != cls)
continue;
if (!arch_irn_consider_in_reg_alloc(cls, op))
continue;
- req = arch_get_register_req(node, i);
+ req = arch_get_irn_register_req_in(node, i);
if (!(req->type & arch_register_req_type_limited))
continue;
static void congruence_def(ir_nodeset_t *live_nodes, const ir_node *node)
{
- const arch_register_req_t *req = arch_get_register_req_out(node);
+ const arch_register_req_t *req = arch_get_irn_register_req(node);
/* should be same constraint? */
if (req->type & arch_register_req_type_should_be_same) {
ir_node *original_insn;
ir_node *block;
ir_node *copy;
- unsigned r;
+ unsigned r = 0;
unsigned from_r;
unsigned i;
allocation_info_t *info = get_allocation_info(to_split);
reg_pref_t *prefs;
- float delta;
+ float delta = 0;
float split_threshold;
(void) pref;
* (so we don't split away the values produced because of
* must_be_different constraints) */
original_insn = skip_Proj(info->original_value);
- if (arch_irn_get_flags(original_insn) & arch_irn_flags_dont_spill)
+ if (arch_get_irn_flags(original_insn) & arch_irn_flags_dont_spill)
return false;
from_reg = arch_get_irn_register(to_split);
return false;
reg = arch_register_for_index(cls, r);
- copy = be_new_Copy(cls, block, to_split);
+ copy = be_new_Copy(block, to_split);
mark_as_copy_of(copy, to_split);
/* hacky, but correct here */
if (assignments[arch_register_get_index(from_reg)] == to_split)
static void assign_reg(const ir_node *block, ir_node *node,
unsigned *forbidden_regs)
{
- const arch_register_t *reg;
+ const arch_register_t *final_reg;
allocation_info_t *info;
const arch_register_req_t *req;
reg_pref_t *reg_prefs;
ir_node *in_node;
- unsigned i;
- const unsigned *allowed_regs;
unsigned r;
+ const unsigned *allowed_regs;
+ unsigned final_reg_index = 0;
assert(!is_Phi(node));
/* preassigned register? */
- reg = arch_get_irn_register(node);
- if (reg != NULL) {
- DB((dbg, LEVEL_2, "Preassignment %+F -> %s\n", node, reg->name));
- use_reg(node, reg);
+ final_reg = arch_get_irn_register(node);
+ if (final_reg != NULL) {
+ DB((dbg, LEVEL_2, "Preassignment %+F -> %s\n", node, final_reg->name));
+ use_reg(node, final_reg);
return;
}
- req = arch_get_register_req_out(node);
+ req = arch_get_irn_register_req(node);
/* ignore reqs must be preassigned */
assert (! (req->type & arch_register_req_type_ignore));
for (i = 0; i < arity; ++i) {
ir_node *in;
const arch_register_t *reg;
- unsigned r;
+ unsigned reg_index;
if (!rbitset_is_set(&req->other_same, i))
continue;
in = get_irn_n(in_node, i);
reg = arch_get_irn_register(in);
assert(reg != NULL);
- r = arch_register_get_index(reg);
+ reg_index = arch_register_get_index(reg);
/* if the value didn't die here then we should not propagate the
* should_be_same info */
- if (assignments[r] == in)
+ if (assignments[reg_index] == in)
continue;
- info->prefs[r] += weight * AFF_SHOULD_BE_SAME;
+ info->prefs[reg_index] += weight * AFF_SHOULD_BE_SAME;
}
}
/* create list of register candidates and sort by their preference */
DB((dbg, LEVEL_2, "Candidates for %+F:", node));
- reg_prefs = alloca(n_regs * sizeof(reg_prefs[0]));
+ reg_prefs = ALLOCAN(reg_pref_t, n_regs);
fill_sort_candidates(reg_prefs, info);
- for (i = 0; i < n_regs; ++i) {
- unsigned num = reg_prefs[i].num;
+ for (r = 0; r < n_regs; ++r) {
+ unsigned num = reg_prefs[r].num;
const arch_register_t *reg;
if (!rbitset_is_set(normal_regs, num))
continue;
-
reg = arch_register_for_index(cls, num);
- DB((dbg, LEVEL_2, " %s(%f)", reg->name, reg_prefs[i].pref));
+ DB((dbg, LEVEL_2, " %s(%f)", reg->name, reg_prefs[r].pref));
}
DB((dbg, LEVEL_2, "\n"));
allowed_regs = req->limited;
}
- for (i = 0; i < n_regs; ++i) {
+ for (r = 0; r < n_regs; ++r) {
float pref, delta;
ir_node *before;
bool res;
- r = reg_prefs[i].num;
- if (!rbitset_is_set(allowed_regs, r))
+ final_reg_index = reg_prefs[r].num;
+ if (!rbitset_is_set(allowed_regs, final_reg_index))
continue;
- if (assignments[r] == NULL)
+ /* alignment constraint? */
+ if (req->width > 1 && (req->type & arch_register_req_type_aligned)
+ && (final_reg_index % req->width) != 0)
+ continue;
+
+ if (assignments[final_reg_index] == NULL)
break;
- pref = reg_prefs[i].pref;
- delta = i+1 < n_regs ? pref - reg_prefs[i+1].pref : 0;
+ pref = reg_prefs[r].pref;
+ delta = r+1 < n_regs ? pref - reg_prefs[r+1].pref : 0;
before = skip_Proj(node);
- res = try_optimistic_split(assignments[r], before,
+ res = try_optimistic_split(assignments[final_reg_index], before,
pref, delta, forbidden_regs, 0);
if (res)
break;
}
- if (i >= n_regs) {
+ if (r >= n_regs) {
/* the common reason to hit this panic is when 1 of your nodes is not
* register pressure faithful */
panic("No register left for %+F\n", node);
}
- reg = arch_register_for_index(cls, r);
- DB((dbg, LEVEL_2, "Assign %+F -> %s\n", node, reg->name));
- use_reg(node, reg);
+ final_reg = arch_register_for_index(cls, final_reg_index);
+ DB((dbg, LEVEL_2, "Assign %+F -> %s\n", node, final_reg->name));
+ use_reg(node, final_reg);
}
/**
* First we count how many destinations a single value has. At the same time
* we can be sure that each destination register has at most 1 source register
* (it can have 0 which means we don't care what value is in it).
- * We ignore all fullfilled permuations (like 7->7)
+ * We ignore all fulfilled permuations (like 7->7)
* In a first pass we create as much copy instructions as possible as they
* are generally cheaper than exchanges. We do this by counting into how many
* destinations a register has to be copied (in the example it's 2 for register
* We can then create a copy into every destination register when the usecount
* of that register is 0 (= noone else needs the value in the register).
*
- * After this step we should have cycles left. We implement a cyclic permutation
- * of n registers with n-1 transpositions.
+ * After this step we should only have cycles left. We implement a cyclic
+ * permutation of n registers with n-1 transpositions.
*
* @param live_nodes the set of live nodes, updated due to live range split
* @param before the node before we add the permutation
* registers.
*/
static void permute_values(ir_nodeset_t *live_nodes, ir_node *before,
- unsigned *permutation)
+ unsigned *permutation)
{
unsigned *n_used = ALLOCANZ(unsigned, n_regs);
ir_node *block;
/* create a copy */
src = assignments[old_r];
- copy = be_new_Copy(cls, block, src);
+ copy = be_new_Copy(block, src);
sched_add_before(before, copy);
reg = arch_register_for_index(cls, r);
DB((dbg, LEVEL_2, "Copy %+F (from %+F, before %+F) -> %s\n",
}
}
+static void solve_lpp(ir_nodeset_t *live_nodes, ir_node *node,
+ unsigned *forbidden_regs, unsigned *live_through_regs)
+{
+ unsigned *forbidden_edges = rbitset_malloc(n_regs * n_regs);
+ int *lpp_vars = XMALLOCNZ(int, n_regs*n_regs);
+ int arity = get_irn_arity(node);
+ int i;
+ unsigned l;
+ unsigned r;
+
+ lpp_t *lpp = lpp_new("prefalloc", lpp_minimize);
+ //lpp_set_time_limit(lpp, 20);
+ lpp_set_log(lpp, stdout);
+
+ /** mark some edges as forbidden */
+ for (i = 0; i < arity; ++i) {
+ ir_node *op = get_irn_n(node, i);
+ const arch_register_t *reg;
+ const arch_register_req_t *req;
+ const unsigned *limited;
+ unsigned current_reg;
+
+ if (!arch_irn_consider_in_reg_alloc(cls, op))
+ continue;
+
+ req = arch_get_irn_register_req_in(node, i);
+ if (!(req->type & arch_register_req_type_limited))
+ continue;
+
+ limited = req->limited;
+ reg = arch_get_irn_register(op);
+ current_reg = arch_register_get_index(reg);
+ for (r = 0; r < n_regs; ++r) {
+ if (rbitset_is_set(limited, r))
+ continue;
+
+ rbitset_set(forbidden_edges, current_reg*n_regs + r);
+ }
+ }
+
+ /* add all combinations, except for not allowed ones */
+ for (l = 0; l < n_regs; ++l) {
+ if (!rbitset_is_set(normal_regs, l)) {
+ char name[15];
+ snprintf(name, sizeof(name), "%u_to_%u", l, l);
+ lpp_vars[l*n_regs+l] = lpp_add_var(lpp, name, lpp_binary, 1);
+ continue;
+ }
+
+ for (r = 0; r < n_regs; ++r) {
+ if (!rbitset_is_set(normal_regs, r))
+ continue;
+ if (rbitset_is_set(forbidden_edges, l*n_regs + r))
+ continue;
+ /* livethrough values may not use constrained output registers */
+ if (rbitset_is_set(live_through_regs, l)
+ && rbitset_is_set(forbidden_regs, r))
+ continue;
+
+ char name[15];
+ snprintf(name, sizeof(name), "%u_to_%u", l, r);
+
+ double costs = l==r ? 9 : 8;
+ lpp_vars[l*n_regs+r]
+ = lpp_add_var(lpp, name, lpp_binary, costs);
+ assert(lpp_vars[l*n_regs+r] > 0);
+ }
+ }
+ /* add constraints */
+ for (l = 0; l < n_regs; ++l) {
+ int constraint;
+ /* only 1 destination per register */
+ constraint = -1;
+ for (r = 0; r < n_regs; ++r) {
+ int var = lpp_vars[l*n_regs+r];
+ if (var == 0)
+ continue;
+ if (constraint < 0) {
+ char name[64];
+ snprintf(name, sizeof(name), "%u_to_dest", l);
+ constraint = lpp_add_cst(lpp, name, lpp_equal, 1);
+ }
+ lpp_set_factor_fast(lpp, constraint, var, 1);
+ }
+ /* each destination used by at most 1 value */
+ constraint = -1;
+ for (r = 0; r < n_regs; ++r) {
+ int var = lpp_vars[r*n_regs+l];
+ if (var == 0)
+ continue;
+ if (constraint < 0) {
+ char name[64];
+ snprintf(name, sizeof(name), "one_to_%u", l);
+ constraint = lpp_add_cst(lpp, name, lpp_less_equal, 1);
+ }
+ lpp_set_factor_fast(lpp, constraint, var, 1);
+ }
+ }
+
+ lpp_dump_plain(lpp, fopen("lppdump.txt", "w"));
+
+ /* solve lpp */
+ {
+ ir_graph *irg = get_irn_irg(node);
+ be_options_t *options = be_get_irg_options(irg);
+ unsigned *assignment;
+ lpp_solve(lpp, options->ilp_server, options->ilp_solver);
+ if (!lpp_is_sol_valid(lpp))
+ panic("ilp solution not valid!");
+
+ assignment = ALLOCAN(unsigned, n_regs);
+ for (l = 0; l < n_regs; ++l) {
+ unsigned dest_reg = (unsigned)-1;
+ for (r = 0; r < n_regs; ++r) {
+ int var = lpp_vars[l*n_regs+r];
+ if (var == 0)
+ continue;
+ double val = lpp_get_var_sol(lpp, var);
+ if (val == 1) {
+ assert(dest_reg == (unsigned)-1);
+ dest_reg = r;
+ }
+ }
+ assert(dest_reg != (unsigned)-1);
+ assignment[dest_reg] = l;
+ }
+
+ fprintf(stderr, "Assignment: ");
+ for (l = 0; l < n_regs; ++l) {
+ fprintf(stderr, "%u ", assignment[l]);
+ }
+ fprintf(stderr, "\n");
+ fflush(stdout);
+ permute_values(live_nodes, node, assignment);
+ }
+ lpp_free(lpp);
+}
+
+static bool is_aligned(unsigned num, unsigned alignment)
+{
+ unsigned mask = alignment-1;
+ assert(is_po2(alignment));
+ return (num&mask) == 0;
+}
+
/**
* Enforce constraints at a node by live range splits.
*
/* construct a list of register occupied by live-through values */
unsigned *live_through_regs = NULL;
- /* see if any use constraints are not met */
+ /* see if any use constraints are not met and whether double-width
+ * values are involved */
+ bool double_width = false;
bool good = true;
for (i = 0; i < arity; ++i) {
ir_node *op = get_irn_n(node, i);
const arch_register_t *reg;
const arch_register_req_t *req;
const unsigned *limited;
- unsigned r;
+ unsigned reg_index;
if (!arch_irn_consider_in_reg_alloc(cls, op))
continue;
/* are there any limitations for the i'th operand? */
- req = arch_get_register_req(node, i);
+ req = arch_get_irn_register_req_in(node, i);
+ if (req->width > 1)
+ double_width = true;
+ reg = arch_get_irn_register(op);
+ reg_index = arch_register_get_index(reg);
+ if (req->type & arch_register_req_type_aligned) {
+ if (!is_aligned(reg_index, req->width)) {
+ good = false;
+ continue;
+ }
+ }
if (!(req->type & arch_register_req_type_limited))
continue;
- limited = req->limited;
- reg = arch_get_irn_register(op);
- r = arch_register_get_index(reg);
- if (!rbitset_is_set(limited, r)) {
+ limited = req->limited;
+ if (!rbitset_is_set(limited, reg_index)) {
/* found an assignment outside the limited set */
good = false;
- break;
+ continue;
}
}
/* is any of the live-throughs using a constrained output register? */
be_foreach_definition(node, cls, value,
+ if (req_->width > 1)
+ double_width = true;
if (! (req_->type & arch_register_req_type_limited))
continue;
if (live_through_regs == NULL) {
rbitset_alloca(live_through_regs, n_regs);
}
+ if (double_width) {
+ /* only the ILP variant can solve this yet */
+ solve_lpp(live_nodes, node, forbidden_regs, live_through_regs);
+ return;
+ }
+
/* at this point we have to construct a bipartite matching problem to see
* which values should go to which registers
* Note: We're building the matrix in "reverse" - source registers are
if (!arch_irn_consider_in_reg_alloc(cls, op))
continue;
- req = arch_get_register_req(node, i);
+ req = arch_get_irn_register_req_in(node, i);
if (!(req->type & arch_register_req_type_limited))
continue;
permute_values(live_nodes, node, assignment);
}
-/** test wether a node @p n is a copy of the value of node @p of */
+/** test whether a node @p n is a copy of the value of node @p of */
static bool is_copy_of(ir_node *value, ir_node *test_value)
{
allocation_info_t *test_info;
static int find_value_in_block_info(block_info_t *info, ir_node *value)
{
unsigned r;
- ir_node **assignments = info->assignments;
+ ir_node **end_assignments = info->assignments;
for (r = 0; r < n_regs; ++r) {
- ir_node *a_value = assignments[r];
+ ir_node *a_value = end_assignments[r];
if (a_value == NULL)
continue;
int p;
node = be_lv_get_irn(lv, block, i);
- req = arch_get_register_req_out(node);
+ req = arch_get_irn_register_req(node);
if (req->cls != cls)
continue;
DB((dbg, LEVEL_3, "Create Phi %+F (for %+F) -", phi, node));
#ifdef DEBUG_libfirm
{
- int i;
- for (i = 0; i < n_preds; ++i) {
- DB((dbg, LEVEL_3, " %+F", phi_ins[i]));
+ int pi;
+ for (pi = 0; pi < n_preds; ++pi) {
+ DB((dbg, LEVEL_3, " %+F", phi_ins[pi]));
}
DB((dbg, LEVEL_3, "\n"));
}
/* assign instructions in the block */
sched_foreach(block, node) {
- int i;
int arity;
ir_node *value;
static int cmp_block_costs(const void *d1, const void *d2)
{
- const ir_node * const *block1 = d1;
- const ir_node * const *block2 = d2;
- const block_costs_t *info1 = get_irn_link(*block1);
- const block_costs_t *info2 = get_irn_link(*block2);
+ const ir_node * const *block1 = (const ir_node**)d1;
+ const ir_node * const *block2 = (const ir_node**)d2;
+ const block_costs_t *info1 = (const block_costs_t*)get_irn_link(*block1);
+ const block_costs_t *info2 = (const block_costs_t*)get_irn_link(*block2);
return QSORT_CMP(info2->costs, info1->costs);
}
static void determine_block_order(void)
{
- int i;
+ size_t p;
ir_node **blocklist = be_get_cfgpostorder(irg);
- int n_blocks = ARR_LEN(blocklist);
+ size_t n_blocks = ARR_LEN(blocklist);
int dfs_num = 0;
pdeq *worklist = new_pdeq();
ir_node **order = XMALLOCN(ir_node*, n_blocks);
- int order_p = 0;
+ size_t order_p = 0;
/* clear block links... */
- for (i = 0; i < n_blocks; ++i) {
- ir_node *block = blocklist[i];
+ for (p = 0; p < n_blocks; ++p) {
+ ir_node *block = blocklist[p];
set_irn_link(block, NULL);
}
/* walk blocks in reverse postorder, the costs for each block are the
* sum of the costs of its predecessors (excluding the costs on backedges
* which we can't determine) */
- for (i = n_blocks-1; i >= 0; --i) {
+ for (p = n_blocks; p > 0;) {
block_costs_t *cost_info;
- ir_node *block = blocklist[i];
+ ir_node *block = blocklist[--p];
float execfreq = (float)get_block_execfreq(execfreqs, block);
float costs = execfreq;
int n_cfgpreds = get_Block_n_cfgpreds(block);
- int p;
- for (p = 0; p < n_cfgpreds; ++p) {
- ir_node *pred_block = get_Block_cfgpred_block(block, p);
- block_costs_t *pred_costs = get_irn_link(pred_block);
+ int p2;
+ for (p2 = 0; p2 < n_cfgpreds; ++p2) {
+ ir_node *pred_block = get_Block_cfgpred_block(block, p2);
+ block_costs_t *pred_costs = (block_costs_t*)get_irn_link(pred_block);
/* we don't have any info for backedges */
if (pred_costs == NULL)
continue;
ir_reserve_resources(irg, IR_RESOURCE_BLOCK_VISITED);
inc_irg_block_visited(irg);
- for (i = 0; i < n_blocks; ++i) {
- ir_node *block = blocklist[i];
+ for (p = 0; p < n_blocks; ++p) {
+ ir_node *block = blocklist[p];
if (Block_block_visited(block))
continue;
/* continually add predecessors with highest costs to worklist
* (without using backedges) */
do {
- block_costs_t *info = get_irn_link(block);
+ block_costs_t *info = (block_costs_t*)get_irn_link(block);
ir_node *best_pred = NULL;
float best_costs = -1;
int n_cfgpred = get_Block_n_cfgpreds(block);
mark_Block_block_visited(block);
for (i = 0; i < n_cfgpred; ++i) {
ir_node *pred_block = get_Block_cfgpred_block(block, i);
- block_costs_t *pred_info = get_irn_link(pred_block);
+ block_costs_t *pred_info = (block_costs_t*)get_irn_link(pred_block);
/* ignore backedges */
if (pred_info->dfs_num > info->dfs_num)
/* now put all nodes in the worklist in our final order */
while (!pdeq_empty(worklist)) {
- ir_node *pblock = pdeq_getr(worklist);
+ ir_node *pblock = (ir_node*)pdeq_getr(worklist);
assert(order_p < n_blocks);
order[order_p++] = pblock;
}
*/
static void be_pref_alloc_cls(void)
{
- int i;
+ size_t i;
- lv = be_assure_liveness(irg);
- be_liveness_assure_sets(lv);
+ be_assure_live_sets(irg);
+ lv = be_get_irg_liveness(irg);
ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK);
be_pre_spill_prepare_constr(irg, cls);
be_timer_pop(T_RA_CONSTR);
- dump(DUMP_RA, irg, "-spillprepare");
+ dump(DUMP_RA, irg, "spillprepare");
/* spill */
be_timer_push(T_RA_SPILL);
check_for_memory_operands(irg);
be_timer_pop(T_RA_SPILL_APPLY);
- dump(DUMP_RA, irg, "-spill");
+ dump(DUMP_RA, irg, "spill");
}
/**
/* we most probably constructed new Phis so liveness info is invalid
* now */
- /* TODO: test liveness_introduce */
- be_liveness_invalidate(lv);
+ be_invalidate_live_sets(irg);
free(normal_regs);
stat_ev_ctx_pop("regcls");
obstack_free(&obst, NULL);
}
-BE_REGISTER_MODULE_CONSTRUCTOR(be_init_pref_alloc);
+BE_REGISTER_MODULE_CONSTRUCTOR(be_init_pref_alloc)
void be_init_pref_alloc(void)
{
static be_ra_t be_ra_pref = {