/*
- * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
+ * Copyright (C) 1995-2011 University of Karlsruhe. All right reserved.
*
* This file is part of libFirm.
*
* @brief Preference Guided Register Assignment
* @author Matthias Braun
* @date 14.2.2009
- * @version $Id$
*
* The idea is to allocate registers in 2 passes:
* 1. A first pass to determine "preferred" registers for live-ranges. This
#include <float.h>
#include <stdbool.h>
#include <math.h>
+#include "lpp.h"
#include "error.h"
#include "execfreq.h"
#include "irnode_t.h"
#include "irprintf.h"
#include "irdump.h"
+#include "irtools.h"
+#include "util.h"
#include "obst.h"
#include "raw_bitset.h"
#include "unionfind.h"
#include "bespillutil.h"
#include "beverify.h"
#include "beutil.h"
+#include "bestack.h"
#define USE_FACTOR 1.0f
#define DEF_FACTOR 1.0f
static struct obstack obst;
static ir_graph *irg;
static const arch_register_class_t *cls;
-static const arch_register_req_t *default_cls_req;
static be_lv_t *lv;
static const ir_exec_freq *execfreqs;
static unsigned n_regs;
static unsigned *normal_regs;
static int *congruence_classes;
static ir_node **block_order;
-static int n_block_order;
+static size_t n_block_order;
static int create_preferences = true;
static int create_congruence_classes = true;
static int propagate_phi_registers = true;
* the information is per firm-node.
*/
struct allocation_info_t {
- unsigned last_uses; /**< bitset indicating last uses (input pos) */
+ unsigned last_uses[2]; /**< bitset indicating last uses (input pos) */
ir_node *current_value; /**< copy of the value that should be used */
ir_node *original_value; /**< for copies point to original value */
float prefs[0]; /**< register preferences */
*/
static allocation_info_t *get_allocation_info(ir_node *node)
{
- allocation_info_t *info = get_irn_link(node);
+ allocation_info_t *info = (allocation_info_t*)get_irn_link(node);
if (info == NULL) {
info = OALLOCFZ(&obst, allocation_info_t, prefs, n_regs);
info->current_value = node;
*/
static block_info_t *get_block_info(ir_node *block)
{
- block_info_t *info = get_irn_link(block);
+ block_info_t *info = (block_info_t*)get_irn_link(block);
assert(is_Block(block));
if (info == NULL) {
return info;
}
-/**
- * Get default register requirement for the current register class
- */
-static const arch_register_req_t *get_default_req_current_cls(void)
-{
- if (default_cls_req == NULL) {
- struct obstack *obst = get_irg_obstack(irg);
- arch_register_req_t *req = OALLOCZ(obst, arch_register_req_t);
-
- req->type = arch_register_req_type_normal;
- req->cls = cls;
-
- default_cls_req = req;
- }
- return default_cls_req;
-}
-
/**
* Link the allocation info of a node to a copy.
* Afterwards, both nodes uses the same allocation info.
{
ir_nodeset_iterator_t iter;
unsigned r;
- unsigned n_allowed;
+ size_t n_allowed;
allocation_info_t *info = get_allocation_info(node);
ir_node *neighbor;
static void check_defs(const ir_nodeset_t *live_nodes, float weight,
ir_node *node)
{
- const arch_register_req_t *req;
-
- if (get_irn_mode(node) == mode_T) {
- const ir_edge_t *edge;
- foreach_out_edge(node, edge) {
- ir_node *proj = get_edge_src_irn(edge);
- check_defs(live_nodes, weight, proj);
- }
- return;
- }
-
- if (!arch_irn_consider_in_reg_alloc(cls, node))
- return;
-
- req = arch_get_register_req_out(node);
+ const arch_register_req_t *req = arch_get_irn_register_req(node);
if (req->type & arch_register_req_type_limited) {
const unsigned *limited = req->limited;
float penalty = weight * DEF_FACTOR;
if (is_Phi(node))
break;
- if (create_preferences)
- check_defs(&live_nodes, weight, node);
+ if (create_preferences) {
+ ir_node *value;
+ be_foreach_definition(node, cls, value,
+ check_defs(&live_nodes, weight, value);
+ );
+ }
/* mark last uses */
arity = get_irn_arity(node);
/* the allocation info node currently only uses 1 unsigned value
to mark last used inputs. So we will fail for a node with more than
32 inputs. */
- if (arity >= (int) sizeof(unsigned) * 8) {
+ if (arity >= (int) sizeof(info->last_uses) * 8) {
panic("Node with more than %d inputs not supported yet",
- (int) sizeof(unsigned) * 8);
+ (int) sizeof(info->last_uses) * 8);
}
info = get_allocation_info(node);
for (i = 0; i < arity; ++i) {
- ir_node *op = get_irn_n(node, i);
- if (!arch_irn_consider_in_reg_alloc(cls, op))
+ ir_node *op = get_irn_n(node, i);
+ const arch_register_req_t *req = arch_get_irn_register_req(op);
+ if (req->cls != cls)
continue;
/* last usage of a value? */
if (!ir_nodeset_contains(&live_nodes, op)) {
- rbitset_set(&info->last_uses, i);
+ rbitset_set(info->last_uses, i);
}
}
if (!arch_irn_consider_in_reg_alloc(cls, op))
continue;
- req = arch_get_register_req(node, i);
+ req = arch_get_irn_register_req_in(node, i);
if (!(req->type & arch_register_req_type_limited))
continue;
ir_nodeset_destroy(&live_nodes);
}
-static void congruence_def(ir_nodeset_t *live_nodes, ir_node *node)
+static void congruence_def(ir_nodeset_t *live_nodes, const ir_node *node)
{
- const arch_register_req_t *req;
-
- if (get_irn_mode(node) == mode_T) {
- const ir_edge_t *edge;
- foreach_out_edge(node, edge) {
- ir_node *def = get_edge_src_irn(edge);
- congruence_def(live_nodes, def);
- }
- return;
- }
-
- if (!arch_irn_consider_in_reg_alloc(cls, node))
- return;
+ const arch_register_req_t *req = arch_get_irn_register_req(node);
/* should be same constraint? */
- req = arch_get_register_req_out(node);
if (req->type & arch_register_req_type_should_be_same) {
- ir_node *insn = skip_Proj(node);
+ const ir_node *insn = skip_Proj_const(node);
int arity = get_irn_arity(insn);
int i;
unsigned node_idx = get_irn_idx(node);
/* check should be same constraints */
sched_foreach_reverse(block, node) {
+ ir_node *value;
if (is_Phi(node))
break;
- congruence_def(&live_nodes, node);
+ be_foreach_definition(node, cls, value,
+ congruence_def(&live_nodes, value);
+ );
be_liveness_transfer(cls, node, &live_nodes);
}
-
-
/**
* Assign register reg to the given node.
*
ir_node *original_insn;
ir_node *block;
ir_node *copy;
- unsigned r;
+ unsigned r = 0;
unsigned from_r;
unsigned i;
allocation_info_t *info = get_allocation_info(to_split);
reg_pref_t *prefs;
- float delta;
+ float delta = 0;
float split_threshold;
(void) pref;
* (so we don't split away the values produced because of
* must_be_different constraints) */
original_insn = skip_Proj(info->original_value);
- if (arch_irn_get_flags(original_insn) & arch_irn_flags_dont_spill)
+ if (arch_get_irn_flags(original_insn) & arch_irn_flags_dont_spill)
return false;
from_reg = arch_get_irn_register(to_split);
return false;
reg = arch_register_for_index(cls, r);
- copy = be_new_Copy(cls, block, to_split);
+ copy = be_new_Copy(block, to_split);
mark_as_copy_of(copy, to_split);
/* hacky, but correct here */
if (assignments[arch_register_get_index(from_reg)] == to_split)
static void assign_reg(const ir_node *block, ir_node *node,
unsigned *forbidden_regs)
{
- const arch_register_t *reg;
+ const arch_register_t *final_reg;
allocation_info_t *info;
const arch_register_req_t *req;
reg_pref_t *reg_prefs;
ir_node *in_node;
- unsigned i;
- const unsigned *allowed_regs;
unsigned r;
+ const unsigned *allowed_regs;
+ unsigned final_reg_index = 0;
assert(!is_Phi(node));
- assert(arch_irn_consider_in_reg_alloc(cls, node));
-
/* preassigned register? */
- reg = arch_get_irn_register(node);
- if (reg != NULL) {
- DB((dbg, LEVEL_2, "Preassignment %+F -> %s\n", node, reg->name));
- use_reg(node, reg);
+ final_reg = arch_get_irn_register(node);
+ if (final_reg != NULL) {
+ DB((dbg, LEVEL_2, "Preassignment %+F -> %s\n", node, final_reg->name));
+ use_reg(node, final_reg);
return;
}
- /* give should_be_same boni */
- info = get_allocation_info(node);
- req = arch_get_register_req_out(node);
+ req = arch_get_irn_register_req(node);
+ /* ignore reqs must be preassigned */
+ assert (! (req->type & arch_register_req_type_ignore));
+ /* give should_be_same boni */
+ info = get_allocation_info(node);
in_node = skip_Proj(node);
if (req->type & arch_register_req_type_should_be_same) {
float weight = (float)get_block_execfreq(execfreqs, block);
for (i = 0; i < arity; ++i) {
ir_node *in;
const arch_register_t *reg;
- unsigned r;
+ unsigned reg_index;
if (!rbitset_is_set(&req->other_same, i))
continue;
in = get_irn_n(in_node, i);
reg = arch_get_irn_register(in);
assert(reg != NULL);
- r = arch_register_get_index(reg);
+ reg_index = arch_register_get_index(reg);
/* if the value didn't die here then we should not propagate the
* should_be_same info */
- if (assignments[r] == in)
+ if (assignments[reg_index] == in)
continue;
- info->prefs[r] += weight * AFF_SHOULD_BE_SAME;
+ info->prefs[reg_index] += weight * AFF_SHOULD_BE_SAME;
}
}
/* create list of register candidates and sort by their preference */
DB((dbg, LEVEL_2, "Candidates for %+F:", node));
- reg_prefs = alloca(n_regs * sizeof(reg_prefs[0]));
+ reg_prefs = ALLOCAN(reg_pref_t, n_regs);
fill_sort_candidates(reg_prefs, info);
- for (i = 0; i < n_regs; ++i) {
- unsigned num = reg_prefs[i].num;
+ for (r = 0; r < n_regs; ++r) {
+ unsigned num = reg_prefs[r].num;
const arch_register_t *reg;
if (!rbitset_is_set(normal_regs, num))
continue;
-
reg = arch_register_for_index(cls, num);
- DB((dbg, LEVEL_2, " %s(%f)", reg->name, reg_prefs[i].pref));
+ DB((dbg, LEVEL_2, " %s(%f)", reg->name, reg_prefs[r].pref));
}
DB((dbg, LEVEL_2, "\n"));
allowed_regs = req->limited;
}
- for (i = 0; i < n_regs; ++i) {
+ for (r = 0; r < n_regs; ++r) {
float pref, delta;
ir_node *before;
bool res;
- r = reg_prefs[i].num;
- if (!rbitset_is_set(allowed_regs, r))
+ final_reg_index = reg_prefs[r].num;
+ if (!rbitset_is_set(allowed_regs, final_reg_index))
continue;
- if (assignments[r] == NULL)
+ /* alignment constraint? */
+ if (req->width > 1 && (req->type & arch_register_req_type_aligned)
+ && (final_reg_index % req->width) != 0)
+ continue;
+
+ if (assignments[final_reg_index] == NULL)
break;
- pref = reg_prefs[i].pref;
- delta = i+1 < n_regs ? pref - reg_prefs[i+1].pref : 0;
+ pref = reg_prefs[r].pref;
+ delta = r+1 < n_regs ? pref - reg_prefs[r+1].pref : 0;
before = skip_Proj(node);
- res = try_optimistic_split(assignments[r], before,
+ res = try_optimistic_split(assignments[final_reg_index], before,
pref, delta, forbidden_regs, 0);
if (res)
break;
}
- if (i >= n_regs) {
+ if (r >= n_regs) {
/* the common reason to hit this panic is when 1 of your nodes is not
* register pressure faithful */
panic("No register left for %+F\n", node);
}
- reg = arch_register_for_index(cls, r);
- DB((dbg, LEVEL_2, "Assign %+F -> %s\n", node, reg->name));
- use_reg(node, reg);
+ final_reg = arch_register_for_index(cls, final_reg_index);
+ DB((dbg, LEVEL_2, "Assign %+F -> %s\n", node, final_reg->name));
+ use_reg(node, final_reg);
}
/**
* First we count how many destinations a single value has. At the same time
* we can be sure that each destination register has at most 1 source register
* (it can have 0 which means we don't care what value is in it).
- * We ignore all fullfilled permuations (like 7->7)
+ * We ignore all fulfilled permuations (like 7->7)
* In a first pass we create as much copy instructions as possible as they
* are generally cheaper than exchanges. We do this by counting into how many
* destinations a register has to be copied (in the example it's 2 for register
* We can then create a copy into every destination register when the usecount
* of that register is 0 (= noone else needs the value in the register).
*
- * After this step we should have cycles left. We implement a cyclic permutation
- * of n registers with n-1 transpositions.
+ * After this step we should only have cycles left. We implement a cyclic
+ * permutation of n registers with n-1 transpositions.
*
* @param live_nodes the set of live nodes, updated due to live range split
* @param before the node before we add the permutation
* registers.
*/
static void permute_values(ir_nodeset_t *live_nodes, ir_node *before,
- unsigned *permutation)
+ unsigned *permutation)
{
unsigned *n_used = ALLOCANZ(unsigned, n_regs);
ir_node *block;
/* create a copy */
src = assignments[old_r];
- copy = be_new_Copy(cls, block, src);
+ copy = be_new_Copy(block, src);
sched_add_before(before, copy);
reg = arch_register_for_index(cls, r);
DB((dbg, LEVEL_2, "Copy %+F (from %+F, before %+F) -> %s\n",
*/
static void free_last_uses(ir_nodeset_t *live_nodes, ir_node *node)
{
- allocation_info_t *info = get_allocation_info(node);
- const unsigned *last_uses = &info->last_uses;
- int arity = get_irn_arity(node);
- int i;
+ allocation_info_t *info = get_allocation_info(node);
+ const unsigned *last_uses = info->last_uses;
+ int arity = get_irn_arity(node);
+ int i;
for (i = 0; i < arity; ++i) {
ir_node *op;
ir_node *op;
const arch_register_t *reg;
- if (!rbitset_is_set(&info->last_uses, i))
+ if (!rbitset_is_set(info->last_uses, i))
continue;
op = get_irn_n(node, i);
}
}
+static void solve_lpp(ir_nodeset_t *live_nodes, ir_node *node,
+ unsigned *forbidden_regs, unsigned *live_through_regs)
+{
+ unsigned *forbidden_edges = rbitset_malloc(n_regs * n_regs);
+ int *lpp_vars = XMALLOCNZ(int, n_regs*n_regs);
+ int arity = get_irn_arity(node);
+ int i;
+ unsigned l;
+ unsigned r;
+
+ lpp_t *lpp = lpp_new("prefalloc", lpp_minimize);
+ //lpp_set_time_limit(lpp, 20);
+ lpp_set_log(lpp, stdout);
+
+ /** mark some edges as forbidden */
+ for (i = 0; i < arity; ++i) {
+ ir_node *op = get_irn_n(node, i);
+ const arch_register_t *reg;
+ const arch_register_req_t *req;
+ const unsigned *limited;
+ unsigned current_reg;
+
+ if (!arch_irn_consider_in_reg_alloc(cls, op))
+ continue;
+
+ req = arch_get_irn_register_req_in(node, i);
+ if (!(req->type & arch_register_req_type_limited))
+ continue;
+
+ limited = req->limited;
+ reg = arch_get_irn_register(op);
+ current_reg = arch_register_get_index(reg);
+ for (r = 0; r < n_regs; ++r) {
+ if (rbitset_is_set(limited, r))
+ continue;
+
+ rbitset_set(forbidden_edges, current_reg*n_regs + r);
+ }
+ }
+
+ /* add all combinations, except for not allowed ones */
+ for (l = 0; l < n_regs; ++l) {
+ if (!rbitset_is_set(normal_regs, l)) {
+ char name[15];
+ snprintf(name, sizeof(name), "%u_to_%u", l, l);
+ lpp_vars[l*n_regs+l] = lpp_add_var(lpp, name, lpp_binary, 1);
+ continue;
+ }
+
+ for (r = 0; r < n_regs; ++r) {
+ if (!rbitset_is_set(normal_regs, r))
+ continue;
+ if (rbitset_is_set(forbidden_edges, l*n_regs + r))
+ continue;
+ /* livethrough values may not use constrained output registers */
+ if (rbitset_is_set(live_through_regs, l)
+ && rbitset_is_set(forbidden_regs, r))
+ continue;
+
+ char name[15];
+ snprintf(name, sizeof(name), "%u_to_%u", l, r);
+
+ double costs = l==r ? 9 : 8;
+ lpp_vars[l*n_regs+r]
+ = lpp_add_var(lpp, name, lpp_binary, costs);
+ assert(lpp_vars[l*n_regs+r] > 0);
+ }
+ }
+ /* add constraints */
+ for (l = 0; l < n_regs; ++l) {
+ int constraint;
+ /* only 1 destination per register */
+ constraint = -1;
+ for (r = 0; r < n_regs; ++r) {
+ int var = lpp_vars[l*n_regs+r];
+ if (var == 0)
+ continue;
+ if (constraint < 0) {
+ char name[64];
+ snprintf(name, sizeof(name), "%u_to_dest", l);
+ constraint = lpp_add_cst(lpp, name, lpp_equal, 1);
+ }
+ lpp_set_factor_fast(lpp, constraint, var, 1);
+ }
+ /* each destination used by at most 1 value */
+ constraint = -1;
+ for (r = 0; r < n_regs; ++r) {
+ int var = lpp_vars[r*n_regs+l];
+ if (var == 0)
+ continue;
+ if (constraint < 0) {
+ char name[64];
+ snprintf(name, sizeof(name), "one_to_%u", l);
+ constraint = lpp_add_cst(lpp, name, lpp_less_equal, 1);
+ }
+ lpp_set_factor_fast(lpp, constraint, var, 1);
+ }
+ }
+
+ lpp_dump_plain(lpp, fopen("lppdump.txt", "w"));
+
+ /* solve lpp */
+ {
+ ir_graph *irg = get_irn_irg(node);
+ be_options_t *options = be_get_irg_options(irg);
+ unsigned *assignment;
+ lpp_solve(lpp, options->ilp_server, options->ilp_solver);
+ if (!lpp_is_sol_valid(lpp))
+ panic("ilp solution not valid!");
+
+ assignment = ALLOCAN(unsigned, n_regs);
+ for (l = 0; l < n_regs; ++l) {
+ unsigned dest_reg = (unsigned)-1;
+ for (r = 0; r < n_regs; ++r) {
+ int var = lpp_vars[l*n_regs+r];
+ if (var == 0)
+ continue;
+ double val = lpp_get_var_sol(lpp, var);
+ if (val == 1) {
+ assert(dest_reg == (unsigned)-1);
+ dest_reg = r;
+ }
+ }
+ assert(dest_reg != (unsigned)-1);
+ assignment[dest_reg] = l;
+ }
+
+ fprintf(stderr, "Assignment: ");
+ for (l = 0; l < n_regs; ++l) {
+ fprintf(stderr, "%u ", assignment[l]);
+ }
+ fprintf(stderr, "\n");
+ fflush(stdout);
+ permute_values(live_nodes, node, assignment);
+ }
+ lpp_free(lpp);
+}
+
+static bool is_aligned(unsigned num, unsigned alignment)
+{
+ unsigned mask = alignment-1;
+ assert(is_po2(alignment));
+ return (num&mask) == 0;
+}
+
/**
* Enforce constraints at a node by live range splits.
*
hungarian_problem_t *bp;
unsigned l, r;
unsigned *assignment;
+ ir_node *value;
/* construct a list of register occupied by live-through values */
unsigned *live_through_regs = NULL;
- /* see if any use constraints are not met */
+ /* see if any use constraints are not met and whether double-width
+ * values are involved */
+ bool double_width = false;
bool good = true;
for (i = 0; i < arity; ++i) {
ir_node *op = get_irn_n(node, i);
const arch_register_t *reg;
const arch_register_req_t *req;
const unsigned *limited;
- unsigned r;
+ unsigned reg_index;
if (!arch_irn_consider_in_reg_alloc(cls, op))
continue;
/* are there any limitations for the i'th operand? */
- req = arch_get_register_req(node, i);
+ req = arch_get_irn_register_req_in(node, i);
+ if (req->width > 1)
+ double_width = true;
+ reg = arch_get_irn_register(op);
+ reg_index = arch_register_get_index(reg);
+ if (req->type & arch_register_req_type_aligned) {
+ if (!is_aligned(reg_index, req->width)) {
+ good = false;
+ continue;
+ }
+ }
if (!(req->type & arch_register_req_type_limited))
continue;
- limited = req->limited;
- reg = arch_get_irn_register(op);
- r = arch_register_get_index(reg);
- if (!rbitset_is_set(limited, r)) {
+ limited = req->limited;
+ if (!rbitset_is_set(limited, reg_index)) {
/* found an assignment outside the limited set */
good = false;
- break;
+ continue;
}
}
/* is any of the live-throughs using a constrained output register? */
- if (get_irn_mode(node) == mode_T) {
- const ir_edge_t *edge;
-
- foreach_out_edge(node, edge) {
- ir_node *proj = get_edge_src_irn(edge);
- const arch_register_req_t *req;
-
- if (!arch_irn_consider_in_reg_alloc(cls, proj))
- continue;
-
- req = arch_get_register_req_out(proj);
- if (!(req->type & arch_register_req_type_limited))
- continue;
-
- if (live_through_regs == NULL) {
- rbitset_alloca(live_through_regs, n_regs);
- determine_live_through_regs(live_through_regs, node);
- }
-
- rbitset_or(forbidden_regs, req->limited, n_regs);
- if (rbitsets_have_common(req->limited, live_through_regs, n_regs)) {
- good = false;
- }
- }
- } else {
- if (arch_irn_consider_in_reg_alloc(cls, node)) {
- const arch_register_req_t *req = arch_get_register_req_out(node);
- if (req->type & arch_register_req_type_limited) {
- rbitset_alloca(live_through_regs, n_regs);
- determine_live_through_regs(live_through_regs, node);
- if (rbitsets_have_common(req->limited, live_through_regs, n_regs)) {
- good = false;
- rbitset_or(forbidden_regs, req->limited, n_regs);
- }
- }
+ be_foreach_definition(node, cls, value,
+ if (req_->width > 1)
+ double_width = true;
+ if (! (req_->type & arch_register_req_type_limited))
+ continue;
+ if (live_through_regs == NULL) {
+ rbitset_alloca(live_through_regs, n_regs);
+ determine_live_through_regs(live_through_regs, node);
}
- }
+ rbitset_or(forbidden_regs, req_->limited, n_regs);
+ if (rbitsets_have_common(req_->limited, live_through_regs, n_regs))
+ good = false;
+ );
if (good)
return;
rbitset_alloca(live_through_regs, n_regs);
}
+ if (double_width) {
+ /* only the ILP variant can solve this yet */
+ solve_lpp(live_nodes, node, forbidden_regs, live_through_regs);
+ return;
+ }
+
/* at this point we have to construct a bipartite matching problem to see
* which values should go to which registers
* Note: We're building the matrix in "reverse" - source registers are
if (!arch_irn_consider_in_reg_alloc(cls, op))
continue;
- req = arch_get_register_req(node, i);
+ req = arch_get_irn_register_req_in(node, i);
if (!(req->type & arch_register_req_type_limited))
continue;
permute_values(live_nodes, node, assignment);
}
-/** test wether a node @p n is a copy of the value of node @p of */
+/** test whether a node @p n is a copy of the value of node @p of */
static bool is_copy_of(ir_node *value, ir_node *test_value)
{
allocation_info_t *test_info;
static int find_value_in_block_info(block_info_t *info, ir_node *value)
{
unsigned r;
- ir_node **assignments = info->assignments;
+ ir_node **end_assignments = info->assignments;
for (r = 0; r < n_regs; ++r) {
- ir_node *a_value = assignments[r];
+ ir_node *a_value = end_assignments[r];
if (a_value == NULL)
continue;
unsigned *permutation;
ir_node **old_assignments;
bool need_permutation;
- ir_node *node;
+ ir_node *phi;
ir_node *pred = get_Block_cfgpred_block(block, p);
block_info_t *pred_info = get_block_info(pred);
/* check phi nodes */
need_permutation = false;
- node = sched_first(block);
- for ( ; is_Phi(node); node = sched_next(node)) {
+ phi = sched_first(block);
+ for ( ; is_Phi(phi); phi = sched_next(phi)) {
const arch_register_t *reg;
+ const arch_register_t *op_reg;
int regn;
int a;
ir_node *op;
- if (!arch_irn_consider_in_reg_alloc(cls, node))
- continue;
-
- op = get_Phi_pred(node, p);
- if (!arch_irn_consider_in_reg_alloc(cls, op))
+ if (!arch_irn_consider_in_reg_alloc(cls, phi))
continue;
- a = find_value_in_block_info(pred_info, op);
+ op = get_Phi_pred(phi, p);
+ a = find_value_in_block_info(pred_info, op);
assert(a >= 0);
- reg = arch_get_irn_register(node);
+ reg = arch_get_irn_register(phi);
regn = arch_register_get_index(reg);
- if (regn != a) {
- permutation[regn] = a;
- need_permutation = true;
- }
+ /* same register? nothing to do */
+ if (regn == a)
+ continue;
+
+ op = pred_info->assignments[a];
+ op_reg = arch_get_irn_register(op);
+ /* virtual or joker registers are ok too */
+ if ((op_reg->type & arch_register_type_joker)
+ || (op_reg->type & arch_register_type_virtual))
+ continue;
+
+ permutation[regn] = a;
+ need_permutation = true;
}
if (need_permutation) {
old_assignments = assignments;
assignments = pred_info->assignments;
permute_values(NULL, be_get_end_of_block_insertion_point(pred),
- permutation);
+ permutation);
assignments = old_assignments;
}
/* change phi nodes to use the copied values */
- node = sched_first(block);
- for ( ; is_Phi(node); node = sched_next(node)) {
+ phi = sched_first(block);
+ for ( ; is_Phi(phi); phi = sched_next(phi)) {
int a;
ir_node *op;
- if (!arch_irn_consider_in_reg_alloc(cls, node))
+ if (!arch_irn_consider_in_reg_alloc(cls, phi))
continue;
- op = get_Phi_pred(node, p);
- /* no need to do anything for Unknown inputs */
- if (!arch_irn_consider_in_reg_alloc(cls, op))
- continue;
+ op = get_Phi_pred(phi, p);
/* we have permuted all values into the correct registers so we can
simply query which value occupies the phis register in the
predecessor */
- a = arch_register_get_index(arch_get_irn_register(node));
+ a = arch_register_get_index(arch_get_irn_register(phi));
op = pred_info->assignments[a];
- set_Phi_pred(node, p, op);
+ set_Phi_pred(phi, p, op);
}
}
*/
static void allocate_coalesce_block(ir_node *block, void *data)
{
- int i;
- ir_nodeset_t live_nodes;
- ir_node *node;
- int n_preds;
- block_info_t *block_info;
- block_info_t **pred_block_infos;
- ir_node **phi_ins;
- unsigned *forbidden_regs; /**< collects registers which must
+ int i;
+ ir_nodeset_t live_nodes;
+ ir_node *node;
+ int n_preds;
+ block_info_t *block_info;
+ block_info_t **pred_block_infos;
+ ir_node **phi_ins;
+ unsigned *forbidden_regs; /**< collects registers which must
not be used for optimistic splits */
(void) data;
ir_nodeset_init(&live_nodes);
/* gather regalloc infos of predecessor blocks */
- n_preds = get_Block_n_cfgpreds(block);
- pred_block_infos = ALLOCAN(block_info_t*, n_preds);
+ n_preds = get_Block_n_cfgpreds(block);
+ pred_block_infos = ALLOCAN(block_info_t*, n_preds);
for (i = 0; i < n_preds; ++i) {
ir_node *pred = get_Block_cfgpred_block(block, i);
block_info_t *pred_info = get_block_info(pred);
/* collect live-in nodes and preassigned values */
be_lv_foreach(lv, block, be_lv_state_in, i) {
- const arch_register_t *reg;
- int p;
- bool need_phi = false;
+ bool need_phi = false;
+ const arch_register_req_t *req;
+ const arch_register_t *reg;
+ int p;
node = be_lv_get_irn(lv, block, i);
- if (!arch_irn_consider_in_reg_alloc(cls, node))
+ req = arch_get_irn_register_req(node);
+ if (req->cls != cls)
+ continue;
+
+ if (req->type & arch_register_req_type_ignore) {
+ allocation_info_t *info = get_allocation_info(node);
+ info->current_value = node;
+
+ reg = arch_get_irn_register(node);
+ assert(reg != NULL); /* ignore values must be preassigned */
+ use_reg(node, reg);
continue;
+ }
/* check all predecessors for this value, if it is not everywhere the
same or unknown then we have to construct a phi
}
if (need_phi) {
- ir_mode *mode = get_irn_mode(node);
- const arch_register_req_t *req = get_default_req_current_cls();
- ir_node *phi;
-
- phi = new_r_Phi(block, n_preds, phi_ins, mode);
- be_set_phi_reg_req(phi, req);
+ ir_mode *mode = get_irn_mode(node);
+ ir_node *phi = be_new_Phi(block, n_preds, phi_ins, mode, cls);
DB((dbg, LEVEL_3, "Create Phi %+F (for %+F) -", phi, node));
#ifdef DEBUG_libfirm
{
- int i;
- for (i = 0; i < n_preds; ++i) {
- DB((dbg, LEVEL_3, " %+F", phi_ins[i]));
+ int pi;
+ for (pi = 0; pi < n_preds; ++pi) {
+ DB((dbg, LEVEL_3, " %+F", phi_ins[pi]));
}
DB((dbg, LEVEL_3, "\n"));
}
/* assign instructions in the block */
sched_foreach(block, node) {
- int i;
int arity;
+ ir_node *value;
/* phis are already assigned */
if (is_Phi(node))
free_last_uses(&live_nodes, node);
/* assign output registers */
- /* TODO: 2 phases: first: pre-assigned ones, 2nd real regs */
- if (get_irn_mode(node) == mode_T) {
- const ir_edge_t *edge;
- foreach_out_edge(node, edge) {
- ir_node *proj = get_edge_src_irn(edge);
- if (!arch_irn_consider_in_reg_alloc(cls, proj))
- continue;
- assign_reg(block, proj, forbidden_regs);
- }
- } else if (arch_irn_consider_in_reg_alloc(cls, node)) {
- assign_reg(block, node, forbidden_regs);
- }
+ be_foreach_definition_(node, cls, value,
+ assign_reg(block, value, forbidden_regs);
+ );
}
ir_nodeset_destroy(&live_nodes);
static int cmp_block_costs(const void *d1, const void *d2)
{
- const ir_node * const *block1 = d1;
- const ir_node * const *block2 = d2;
- const block_costs_t *info1 = get_irn_link(*block1);
- const block_costs_t *info2 = get_irn_link(*block2);
+ const ir_node * const *block1 = (const ir_node**)d1;
+ const ir_node * const *block2 = (const ir_node**)d2;
+ const block_costs_t *info1 = (const block_costs_t*)get_irn_link(*block1);
+ const block_costs_t *info2 = (const block_costs_t*)get_irn_link(*block2);
return QSORT_CMP(info2->costs, info1->costs);
}
static void determine_block_order(void)
{
- int i;
+ size_t p;
ir_node **blocklist = be_get_cfgpostorder(irg);
- int n_blocks = ARR_LEN(blocklist);
+ size_t n_blocks = ARR_LEN(blocklist);
int dfs_num = 0;
pdeq *worklist = new_pdeq();
ir_node **order = XMALLOCN(ir_node*, n_blocks);
- int order_p = 0;
+ size_t order_p = 0;
/* clear block links... */
- for (i = 0; i < n_blocks; ++i) {
- ir_node *block = blocklist[i];
+ for (p = 0; p < n_blocks; ++p) {
+ ir_node *block = blocklist[p];
set_irn_link(block, NULL);
}
/* walk blocks in reverse postorder, the costs for each block are the
* sum of the costs of its predecessors (excluding the costs on backedges
* which we can't determine) */
- for (i = n_blocks-1; i >= 0; --i) {
+ for (p = n_blocks; p > 0;) {
block_costs_t *cost_info;
- ir_node *block = blocklist[i];
+ ir_node *block = blocklist[--p];
float execfreq = (float)get_block_execfreq(execfreqs, block);
float costs = execfreq;
int n_cfgpreds = get_Block_n_cfgpreds(block);
- int p;
- for (p = 0; p < n_cfgpreds; ++p) {
- ir_node *pred_block = get_Block_cfgpred_block(block, p);
- block_costs_t *pred_costs = get_irn_link(pred_block);
+ int p2;
+ for (p2 = 0; p2 < n_cfgpreds; ++p2) {
+ ir_node *pred_block = get_Block_cfgpred_block(block, p2);
+ block_costs_t *pred_costs = (block_costs_t*)get_irn_link(pred_block);
/* we don't have any info for backedges */
if (pred_costs == NULL)
continue;
ir_reserve_resources(irg, IR_RESOURCE_BLOCK_VISITED);
inc_irg_block_visited(irg);
- for (i = 0; i < n_blocks; ++i) {
- ir_node *block = blocklist[i];
+ for (p = 0; p < n_blocks; ++p) {
+ ir_node *block = blocklist[p];
if (Block_block_visited(block))
continue;
/* continually add predecessors with highest costs to worklist
* (without using backedges) */
do {
- block_costs_t *info = get_irn_link(block);
+ block_costs_t *info = (block_costs_t*)get_irn_link(block);
ir_node *best_pred = NULL;
float best_costs = -1;
int n_cfgpred = get_Block_n_cfgpreds(block);
mark_Block_block_visited(block);
for (i = 0; i < n_cfgpred; ++i) {
ir_node *pred_block = get_Block_cfgpred_block(block, i);
- block_costs_t *pred_info = get_irn_link(pred_block);
+ block_costs_t *pred_info = (block_costs_t*)get_irn_link(pred_block);
/* ignore backedges */
if (pred_info->dfs_num > info->dfs_num)
/* now put all nodes in the worklist in our final order */
while (!pdeq_empty(worklist)) {
- ir_node *pblock = pdeq_getr(worklist);
+ ir_node *pblock = (ir_node*)pdeq_getr(worklist);
assert(order_p < n_blocks);
order[order_p++] = pblock;
}
*/
static void be_pref_alloc_cls(void)
{
- int i;
+ size_t i;
- lv = be_assure_liveness(irg);
- be_liveness_assure_sets(lv);
+ be_assure_live_sets(irg);
+ lv = be_get_irg_liveness(irg);
ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK);
be_pre_spill_prepare_constr(irg, cls);
be_timer_pop(T_RA_CONSTR);
- dump(DUMP_RA, irg, "-spillprepare");
+ dump(DUMP_RA, irg, "spillprepare");
/* spill */
be_timer_push(T_RA_SPILL);
check_for_memory_operands(irg);
be_timer_pop(T_RA_SPILL_APPLY);
- dump(DUMP_RA, irg, "-spill");
+ dump(DUMP_RA, irg, "spill");
}
/**
static void be_pref_alloc(ir_graph *new_irg)
{
const arch_env_t *arch_env = be_get_irg_arch_env(new_irg);
- int n_cls = arch_env_get_n_reg_class(arch_env);
+ int n_cls = arch_env->n_register_classes;
int c;
obstack_init(&obst);
determine_block_order();
for (c = 0; c < n_cls; ++c) {
- cls = arch_env_get_reg_class(arch_env, c);
- default_cls_req = NULL;
+ cls = &arch_env->register_classes[c];
if (arch_register_class_flags(cls) & arch_register_class_flag_manual_ra)
continue;
n_regs = arch_register_class_n_regs(cls);
normal_regs = rbitset_malloc(n_regs);
- be_abi_set_non_ignore_regs(be_get_irg_abi(irg), cls, normal_regs);
+ be_set_allocatable_regs(irg, cls, normal_regs);
spill();
/* verify schedule and register pressure */
be_timer_push(T_VERIFY);
- if (be_get_irg_options(irg)->vrfy_option == BE_VRFY_WARN) {
+ if (be_get_irg_options(irg)->verify_option == BE_VERIFY_WARN) {
be_verify_schedule(irg);
be_verify_register_pressure(irg, cls);
- } else if (be_get_irg_options(irg)->vrfy_option == BE_VRFY_ASSERT) {
+ } else if (be_get_irg_options(irg)->verify_option == BE_VERIFY_ASSERT) {
assert(be_verify_schedule(irg) && "Schedule verification failed");
assert(be_verify_register_pressure(irg, cls)
&& "Register pressure verification failed");
/* we most probably constructed new Phis so liveness info is invalid
* now */
- /* TODO: test liveness_introduce */
- be_liveness_invalidate(lv);
+ be_invalidate_live_sets(irg);
free(normal_regs);
stat_ev_ctx_pop("regcls");
be_timer_pop(T_RA_SPILL_APPLY);
be_timer_push(T_VERIFY);
- if (be_get_irg_options(irg)->vrfy_option == BE_VRFY_WARN) {
+ if (be_get_irg_options(irg)->verify_option == BE_VERIFY_WARN) {
be_verify_register_allocation(irg);
- } else if (be_get_irg_options(irg)->vrfy_option == BE_VRFY_ASSERT) {
+ } else if (be_get_irg_options(irg)->verify_option == BE_VERIFY_ASSERT) {
assert(be_verify_register_allocation(irg)
&& "Register allocation invalid");
}
obstack_free(&obst, NULL);
}
-BE_REGISTER_MODULE_CONSTRUCTOR(be_init_pref_alloc);
+BE_REGISTER_MODULE_CONSTRUCTOR(be_init_pref_alloc)
void be_init_pref_alloc(void)
{
static be_ra_t be_ra_pref = {