* the usefulness. (You can roughly think of the value as the negative
* costs needed for copies when the value is in the specific registers...)
*
- * 2. Walk blocks and assigns registers in a greedy fashion. Preferring registers
- * with high preferences. When register constraints are not met, add copies
- * and split live-ranges.
+ * 2. Walk blocks and assigns registers in a greedy fashion. Preferring
+ * registers with high preferences. When register constraints are not met,
+ * add copies and split live-ranges.
*
* TODO:
- * - output constraints are not ensured. The algorithm fails to copy values
- * away, so the registers for constrained outputs are free.
- * - must_be_different constraint is not respected
- * - No parallel copies at basic block borders are created, no additional phis
- * created after copies have been inserted.
- * - Phi color assignment should give bonus points towards registers already
- * assigned at predecessors.
+ * - make use of free registers in the permute_values code
* - think about a smarter sequence of visiting the blocks. Sorted by
* execfreq might be good, or looptree from inner to outermost loops going
* over blocks in a reverse postorder
+ * - propagate preferences through Phis
*/
#include "config.h"
#include <float.h>
+#include <stdbool.h>
+#include <math.h>
-#include "obst.h"
-#include "irnode_t.h"
-#include "irgraph_t.h"
-#include "iredges_t.h"
+#include "error.h"
+#include "execfreq.h"
#include "ircons.h"
+#include "irdom.h"
+#include "iredges_t.h"
+#include "irgraph_t.h"
#include "irgwalk.h"
-#include "execfreq.h"
+#include "irnode_t.h"
+#include "irprintf.h"
+#include "obst.h"
+#include "raw_bitset.h"
+#include "unionfind.h"
+#include "pdeq.h"
+#include "hungarian.h"
+#include "beabi.h"
+#include "bechordal_t.h"
#include "be.h"
-#include "bera.h"
+#include "beirg.h"
#include "belive_t.h"
#include "bemodule.h"
-#include "bechordal_t.h"
-#include "besched_t.h"
-#include "beirg_t.h"
#include "benode_t.h"
-#include "bespilloptions.h"
+#include "bera.h"
+#include "besched.h"
+#include "bespill.h"
+#include "bespillutil.h"
#include "beverify.h"
+#include "beutil.h"
-#include "bipartite.h"
-#include "hungarian.h"
-
-#define USE_FACTOR 1.0f
-#define DEF_FACTOR 1.0f
-#define NEIGHBOR_FACTOR 0.2f
-#define SHOULD_BE_SAME 1.0f
+#define USE_FACTOR 1.0f
+#define DEF_FACTOR 1.0f
+#define NEIGHBOR_FACTOR 0.2f
+#define AFF_SHOULD_BE_SAME 0.5f
+#define AFF_PHI 1.0f
+#define SPLIT_DELTA 1.0f
+#define MAX_OPTIMISTIC_SPLIT_RECURSION 2
DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
static be_irg_t *birg;
static ir_graph *irg;
static const arch_register_class_t *cls;
+static const arch_register_req_t *default_cls_req;
static be_lv_t *lv;
static const ir_exec_freq *execfreqs;
static unsigned n_regs;
-static bitset_t *ignore_regs;
-
-typedef struct assignment_t assignment_t;
-struct assignment_t {
- ir_node *value; /**< currently assigned value */
-};
+static unsigned *normal_regs;
+static int *congruence_classes;
+static ir_node **block_order;
+static int n_block_order;
-static assignment_t *assignments;
+/** currently active assignments (while processing a basic block)
+ * maps registers to values(their current copies) */
+static ir_node **assignments;
-typedef struct allocation_info_t allocation_info_t;
+/**
+ * allocation information: last_uses, register preferences
+ * the information is per firm-node.
+ */
struct allocation_info_t {
- unsigned last_uses; /**< bitset indicating last uses (input pos) */
- assignment_t *current_assignment;
- float prefs[0]; /**< register preferences */
+ unsigned last_uses; /**< bitset indicating last uses (input pos) */
+ ir_node *current_value; /**< copy of the value that should be used */
+ ir_node *original_value; /**< for copies point to original value */
+ float prefs[0]; /**< register preferences */
};
+typedef struct allocation_info_t allocation_info_t;
-typedef struct reg_pref_t reg_pref_t;
+/** helper datastructure used when sorting register preferences */
struct reg_pref_t {
unsigned num;
float pref;
};
+typedef struct reg_pref_t reg_pref_t;
+
+/** per basic-block information */
+struct block_info_t {
+ bool processed; /**< indicate wether block is processed */
+ ir_node *assignments[0]; /**< register assignments at end of block */
+};
+typedef struct block_info_t block_info_t;
/**
* Get the allocation info for a node.
*/
static allocation_info_t *get_allocation_info(ir_node *node)
{
- allocation_info_t *info;
- if (!irn_visited(node)) {
- size_t size = sizeof(info[0]) + n_regs * sizeof(float);
- info = obstack_alloc(&obst, size);
- memset(info, 0, size);
+ allocation_info_t *info = get_irn_link(node);
+ if (info == NULL) {
+ info = OALLOCFZ(&obst, allocation_info_t, prefs, n_regs);
+ info->current_value = node;
+ info->original_value = node;
set_irn_link(node, info);
- mark_irn_visited(node);
- } else {
- info = get_irn_link(node);
}
return info;
}
+/**
+ * Get allocation information for a basic block
+ */
+static block_info_t *get_block_info(ir_node *block)
+{
+ block_info_t *info = get_irn_link(block);
+
+ assert(is_Block(block));
+ if (info == NULL) {
+ info = OALLOCFZ(&obst, block_info_t, assignments, n_regs);
+ set_irn_link(block, info);
+ }
+
+ return info;
+}
+
+/**
+ * Get default register requirement for the current register class
+ */
+static const arch_register_req_t *get_default_req_current_cls(void)
+{
+ if (default_cls_req == NULL) {
+ struct obstack *obst = get_irg_obstack(irg);
+ arch_register_req_t *req = OALLOCZ(obst, arch_register_req_t);
+
+ req->type = arch_register_req_type_normal;
+ req->cls = cls;
+
+ default_cls_req = req;
+ }
+ return default_cls_req;
+}
+
/**
* Link the allocation info of a node to a copy.
* Afterwards, both nodes uses the same allocation info.
* @param copy the node that gets the allocation info assigned
* @param value the original node
*/
-static void link_to(ir_node *copy, ir_node *value)
+static void mark_as_copy_of(ir_node *copy, ir_node *value)
{
- allocation_info_t *info = get_allocation_info(value);
- assert(!irn_visited(copy));
- set_irn_link(copy, info);
- mark_irn_visited(copy);
+ ir_node *original;
+ allocation_info_t *info = get_allocation_info(value);
+ allocation_info_t *copy_info = get_allocation_info(copy);
+
+ /* find original value */
+ original = info->original_value;
+ if (original != value) {
+ info = get_allocation_info(original);
+ }
+
+ assert(info->original_value == original);
+ info->current_value = copy;
+
+ /* the copy should not be linked to something else yet */
+ assert(copy_info->original_value == copy);
+ copy_info->original_value = original;
+
+ /* copy over allocation preferences */
+ memcpy(copy_info->prefs, info->prefs, n_regs * sizeof(copy_info->prefs[0]));
}
/**
* Calculate the penalties for every register on a node and its live neighbors.
*
- * @param live_nodes the set of live nodes at the current position, may be NULL
- * @param penalty the penalty to subtract from
- * @param limited a raw bitset containing the limited set for the node
- * @param node the node
+ * @param live_nodes the set of live nodes at the current position, may be NULL
+ * @param penalty the penalty to subtract from
+ * @param limited a raw bitset containing the limited set for the node
+ * @param node the node
*/
static void give_penalties_for_limits(const ir_nodeset_t *live_nodes,
float penalty, const unsigned* limited,
- ir_node *node)
+ ir_node *node)
{
ir_nodeset_iterator_t iter;
unsigned r;
+ unsigned n_allowed;
allocation_info_t *info = get_allocation_info(node);
ir_node *neighbor;
if (live_nodes == NULL)
return;
- /* TODO: reduce penalty if there are multiple allowed registers... */
- penalty *= NEIGHBOR_FACTOR;
+ penalty *= NEIGHBOR_FACTOR;
+ n_allowed = rbitset_popcnt(limited, n_regs);
+ if (n_allowed > 1) {
+ /* only create a very weak penalty if multiple regs are allowed */
+ penalty = (penalty * 0.8) / n_allowed;
+ }
foreach_ir_nodeset(live_nodes, neighbor, iter) {
allocation_info_t *neighbor_info;
if (neighbor == node)
continue;
- neighbor_info = get_allocation_info(neighbor);
+ neighbor_info = get_allocation_info(neighbor);
for (r = 0; r < n_regs; ++r) {
if (!rbitset_is_set(limited, r))
continue;
float factor = 1.0f / rbitset_popcnt(&req->other_same, arity);
for (i = 0; i < arity; ++i) {
ir_node *op;
- unsigned r;
+ unsigned r;
allocation_info_t *op_info;
if (!rbitset_is_set(&req->other_same, i))
continue;
- op = get_irn_n(insn, i);
+ op = get_irn_n(insn, i);
+
+ /* if we the value at the should_be_same input doesn't die at the
+ * node, then it is no use to propagate the constraints (since a
+ * copy will emerge anyway) */
+ if (ir_nodeset_contains(live_nodes, op))
+ continue;
+
op_info = get_allocation_info(op);
for (r = 0; r < n_regs; ++r) {
- if (bitset_is_set(ignore_regs, r))
- continue;
op_info->prefs[r] += info->prefs[r] * factor;
}
}
sched_foreach_reverse(block, node) {
allocation_info_t *info;
- int i, arity;
+ int i;
+ int arity;
- if (is_Phi(node)) {
- /* TODO: handle constrained phi-nodes */
+ if (is_Phi(node))
break;
- }
- /* TODO give/take penalties for should_be_same/different) */
check_defs(&live_nodes, weight, node);
/* mark last uses */
arity = get_irn_arity(node);
- /* I was lazy, and only allocated 1 unsigned
- => maximum of 32 uses per node (rewrite if necessary) */
- assert(arity <= (int) sizeof(unsigned) * 8);
+
+ /* the allocation info node currently only uses 1 unsigned value
+ to mark last used inputs. So we will fail for a node with more than
+ 32 inputs. */
+ if (arity >= (int) sizeof(unsigned) * 8) {
+ panic("Node with more than %d inputs not supported yet",
+ (int) sizeof(unsigned) * 8);
+ }
info = get_allocation_info(node);
for (i = 0; i < arity; ++i) {
continue;
req = arch_get_register_req(node, i);
- if ((req->type & arch_register_req_type_limited) == 0)
+ if (!(req->type & arch_register_req_type_limited))
continue;
- /* TODO: give penalties to neighbors for precolored nodes! */
-
limited = req->limited;
give_penalties_for_limits(&live_nodes, weight * USE_FACTOR, limited,
op);
ir_nodeset_destroy(&live_nodes);
}
+static void congruence_def(ir_nodeset_t *live_nodes, ir_node *node)
+{
+ const arch_register_req_t *req;
+
+ if (get_irn_mode(node) == mode_T) {
+ const ir_edge_t *edge;
+ foreach_out_edge(node, edge) {
+ ir_node *def = get_edge_src_irn(edge);
+ congruence_def(live_nodes, def);
+ }
+ return;
+ }
+
+ if (!arch_irn_consider_in_reg_alloc(cls, node))
+ return;
+
+ /* should be same constraint? */
+ req = arch_get_register_req_out(node);
+ if (req->type & arch_register_req_type_should_be_same) {
+ ir_node *insn = skip_Proj(node);
+ int arity = get_irn_arity(insn);
+ int i;
+ unsigned node_idx = get_irn_idx(node);
+ node_idx = uf_find(congruence_classes, node_idx);
+
+ for (i = 0; i < arity; ++i) {
+ ir_node *live;
+ ir_node *op;
+ int op_idx;
+ ir_nodeset_iterator_t iter;
+ bool interferes = false;
+
+ if (!rbitset_is_set(&req->other_same, i))
+ continue;
+
+ op = get_irn_n(insn, i);
+ op_idx = get_irn_idx(op);
+ op_idx = uf_find(congruence_classes, op_idx);
+
+ /* do we interfere with the value */
+ foreach_ir_nodeset(live_nodes, live, iter) {
+ int lv_idx = get_irn_idx(live);
+ lv_idx = uf_find(congruence_classes, lv_idx);
+ if (lv_idx == op_idx) {
+ interferes = true;
+ break;
+ }
+ }
+ /* don't put in same affinity class if we interfere */
+ if (interferes)
+ continue;
+
+ node_idx = uf_union(congruence_classes, node_idx, op_idx);
+ DB((dbg, LEVEL_3, "Merge %+F and %+F congruence classes\n",
+ node, op));
+ /* one should_be_same is enough... */
+ break;
+ }
+ }
+}
+
+static void create_congurence_class(ir_node *block, void *data)
+{
+ ir_nodeset_t live_nodes;
+ ir_node *node;
+
+ (void) data;
+ ir_nodeset_init(&live_nodes);
+ be_liveness_end_of_block(lv, cls, block, &live_nodes);
+
+ /* check should be same constraints */
+ sched_foreach_reverse(block, node) {
+ if (is_Phi(node))
+ break;
+
+ congruence_def(&live_nodes, node);
+ be_liveness_transfer(cls, node, &live_nodes);
+ }
+
+ /* check phi congruence classes */
+ sched_foreach_reverse_from(node, node) {
+ int i;
+ int arity;
+ int node_idx;
+ assert(is_Phi(node));
+
+ if (!arch_irn_consider_in_reg_alloc(cls, node))
+ continue;
+
+ node_idx = get_irn_idx(node);
+ node_idx = uf_find(congruence_classes, node_idx);
+
+ arity = get_irn_arity(node);
+ for (i = 0; i < arity; ++i) {
+ bool interferes = false;
+ ir_nodeset_iterator_t iter;
+ ir_node *live;
+ ir_node *phi;
+ ir_node *op = get_Phi_pred(node, i);
+ int op_idx = get_irn_idx(op);
+ op_idx = uf_find(congruence_classes, op_idx);
+
+ /* do we interfere with the value */
+ foreach_ir_nodeset(&live_nodes, live, iter) {
+ int lv_idx = get_irn_idx(live);
+ lv_idx = uf_find(congruence_classes, lv_idx);
+ if (lv_idx == op_idx) {
+ interferes = true;
+ break;
+ }
+ }
+ /* don't put in same affinity class if we interfere */
+ if (interferes)
+ continue;
+ /* any other phi has the same input? */
+ sched_foreach(block, phi) {
+ ir_node *oop;
+ int oop_idx;
+ if (!is_Phi(phi))
+ break;
+ if (!arch_irn_consider_in_reg_alloc(cls, phi))
+ continue;
+ oop = get_Phi_pred(phi, i);
+ if (oop == op)
+ continue;
+ oop_idx = get_irn_idx(oop);
+ oop_idx = uf_find(congruence_classes, oop_idx);
+ if (oop_idx == op_idx) {
+ interferes = true;
+ break;
+ }
+ }
+ if (interferes)
+ continue;
+
+ node_idx = uf_union(congruence_classes, node_idx, op_idx);
+ DB((dbg, LEVEL_3, "Merge %+F and %+F congruence classes\n",
+ node, op));
+ }
+ }
+}
+
+static void merge_congruence_prefs(ir_node *node, void *data)
+{
+ allocation_info_t *info;
+ allocation_info_t *head_info;
+ unsigned node_idx = get_irn_idx(node);
+ unsigned node_set = uf_find(congruence_classes, node_idx);
+ unsigned r;
+
+ (void) data;
+
+ /* head of congruence class or not in any class */
+ if (node_set == node_idx)
+ return;
+
+ if (!arch_irn_consider_in_reg_alloc(cls, node))
+ return;
+
+ head_info = get_allocation_info(get_idx_irn(irg, node_set));
+ info = get_allocation_info(node);
+
+ for (r = 0; r < n_regs; ++r) {
+ head_info->prefs[r] += info->prefs[r];
+ }
+}
+
+static void set_congruence_prefs(ir_node *node, void *data)
+{
+ allocation_info_t *info;
+ allocation_info_t *head_info;
+ unsigned node_idx = get_irn_idx(node);
+ unsigned node_set = uf_find(congruence_classes, node_idx);
+
+ (void) data;
+
+ /* head of congruence class or not in any class */
+ if (node_set == node_idx)
+ return;
+
+ if (!arch_irn_consider_in_reg_alloc(cls, node))
+ return;
+
+ head_info = get_allocation_info(get_idx_irn(irg, node_set));
+ info = get_allocation_info(node);
+
+ memcpy(info->prefs, head_info->prefs, n_regs * sizeof(info->prefs[0]));
+}
+
+static void combine_congruence_classes(void)
+{
+ size_t n = get_irg_last_idx(irg);
+ congruence_classes = XMALLOCN(int, n);
+ uf_init(congruence_classes, n);
+
+ /* create congruence classes */
+ irg_block_walk_graph(irg, create_congurence_class, NULL, NULL);
+ /* merge preferences */
+ irg_walk_graph(irg, merge_congruence_prefs, NULL, NULL);
+ irg_walk_graph(irg, set_congruence_prefs, NULL, NULL);
+}
+
+
+
+
+
/**
* Assign register reg to the given node.
*
*/
static void use_reg(ir_node *node, const arch_register_t *reg)
{
- unsigned r = arch_register_get_index(reg);
- assignment_t *assignment = &assignments[r];
- allocation_info_t *info;
+ unsigned r = arch_register_get_index(reg);
+ assignments[r] = node;
+ arch_set_irn_register(node, reg);
+}
- assert(assignment->value == NULL);
- assignment->value = node;
+static void free_reg_of_value(ir_node *node)
+{
+ const arch_register_t *reg;
+ unsigned r;
- info = get_allocation_info(node);
- info->current_assignment = assignment;
+ if (!arch_irn_consider_in_reg_alloc(cls, node))
+ return;
- arch_set_irn_register(node, reg);
+ reg = arch_get_irn_register(node);
+ r = arch_register_get_index(reg);
+ /* assignment->value may be NULL if a value is used at 2 inputs
+ so it gets freed twice. */
+ assert(assignments[r] == node || assignments[r] == NULL);
+ assignments[r] = NULL;
}
/**
for (r = 0; r < n_regs; ++r) {
float pref = info->prefs[r];
- if (bitset_is_set(ignore_regs, r)) {
- pref = -10000;
- }
regprefs[r].num = r;
regprefs[r].pref = pref;
}
qsort(regprefs, n_regs, sizeof(regprefs[0]), compare_reg_pref);
}
-static void assign_reg(const ir_node *block, ir_node *node)
+static bool try_optimistic_split(ir_node *to_split, ir_node *before,
+ float pref, float pref_delta,
+ unsigned *forbidden_regs, int recursion)
+{
+ const arch_register_t *from_reg;
+ const arch_register_t *reg;
+ ir_node *original_insn;
+ ir_node *block;
+ ir_node *copy;
+ unsigned r;
+ unsigned from_r;
+ unsigned i;
+ allocation_info_t *info = get_allocation_info(to_split);
+ reg_pref_t *prefs;
+ float delta;
+ float split_threshold;
+
+ (void) pref;
+
+ /* stupid hack: don't optimisticallt split don't spill nodes...
+ * (so we don't split away the values produced because of
+ * must_be_different constraints) */
+ original_insn = skip_Proj(info->original_value);
+ if (arch_irn_get_flags(original_insn) & arch_irn_flags_dont_spill)
+ return false;
+
+ from_reg = arch_get_irn_register(to_split);
+ from_r = arch_register_get_index(from_reg);
+ block = get_nodes_block(before);
+ split_threshold = get_block_execfreq(execfreqs, block) * SPLIT_DELTA;
+
+ if (pref_delta < split_threshold*0.5)
+ return false;
+
+ /* find the best free position where we could move to */
+ prefs = ALLOCAN(reg_pref_t, n_regs);
+ fill_sort_candidates(prefs, info);
+ for (i = 0; i < n_regs; ++i) {
+ float apref;
+ float apref_delta;
+ bool res;
+ bool old_source_state;
+
+ /* we need a normal register which is not an output register
+ an different from the current register of to_split */
+ r = prefs[i].num;
+ if (!rbitset_is_set(normal_regs, r))
+ continue;
+ if (rbitset_is_set(forbidden_regs, r))
+ continue;
+ if (r == from_r)
+ continue;
+
+ /* is the split worth it? */
+ delta = pref_delta + prefs[i].pref;
+ if (delta < split_threshold) {
+ DB((dbg, LEVEL_3, "Not doing optimistical split of %+F (depth %d), win %f too low\n",
+ to_split, recursion, delta));
+ return false;
+ }
+
+ /* if the register is free then we can do the split */
+ if (assignments[r] == NULL)
+ break;
+
+ /* otherwise we might try recursively calling optimistic_split */
+ if (recursion+1 > MAX_OPTIMISTIC_SPLIT_RECURSION)
+ continue;
+
+ apref = prefs[i].pref;
+ apref_delta = i+1 < n_regs ? apref - prefs[i+1].pref : 0;
+ apref_delta += pref_delta - split_threshold;
+
+ /* our source register isn't a usefull destination for recursive
+ splits */
+ old_source_state = rbitset_is_set(forbidden_regs, from_r);
+ rbitset_set(forbidden_regs, from_r);
+ /* try recursive split */
+ res = try_optimistic_split(assignments[r], before, apref,
+ apref_delta, forbidden_regs, recursion+1);
+ /* restore our destination */
+ if (old_source_state) {
+ rbitset_set(forbidden_regs, from_r);
+ } else {
+ rbitset_clear(forbidden_regs, from_r);
+ }
+
+ if (res)
+ break;
+ }
+ if (i >= n_regs)
+ return false;
+
+ reg = arch_register_for_index(cls, r);
+ copy = be_new_Copy(cls, block, to_split);
+ mark_as_copy_of(copy, to_split);
+ /* hacky, but correct here */
+ if (assignments[arch_register_get_index(from_reg)] == to_split)
+ free_reg_of_value(to_split);
+ use_reg(copy, reg);
+ sched_add_before(before, copy);
+
+ DB((dbg, LEVEL_3,
+ "Optimistic live-range split %+F move %+F(%s) -> %s before %+F (win %f, depth %d)\n",
+ copy, to_split, from_reg->name, reg->name, before, delta, recursion));
+ return true;
+}
+
+/**
+ * Determine and assign a register for node @p node
+ */
+static void assign_reg(const ir_node *block, ir_node *node,
+ unsigned *forbidden_regs)
{
const arch_register_t *reg;
allocation_info_t *info;
const arch_register_req_t *req;
reg_pref_t *reg_prefs;
ir_node *in_node;
- unsigned i;
+ unsigned i;
+ const unsigned *allowed_regs;
+ unsigned r;
+ assert(!is_Phi(node));
assert(arch_irn_consider_in_reg_alloc(cls, node));
/* preassigned register? */
reg = arch_get_irn_register(in);
assert(reg != NULL);
r = arch_register_get_index(reg);
- if (bitset_is_set(ignore_regs, r))
+
+ /* if the value didn't die here then we should not propagate the
+ * should_be_same info */
+ if (assignments[r] == in)
continue;
- info->prefs[r] += weight * SHOULD_BE_SAME;
+
+ info->prefs[r] += weight * AFF_SHOULD_BE_SAME;
}
}
- /* TODO: handle must_be_different */
-
- /* */
+ /* create list of register candidates and sort by their preference */
DB((dbg, LEVEL_2, "Candidates for %+F:", node));
reg_prefs = alloca(n_regs * sizeof(reg_prefs[0]));
fill_sort_candidates(reg_prefs, info);
for (i = 0; i < n_regs; ++i) {
- unsigned num = reg_prefs[i].num;
- const arch_register_t *reg = arch_register_for_index(cls, num);
+ unsigned num = reg_prefs[i].num;
+ const arch_register_t *reg;
+
+ if (!rbitset_is_set(normal_regs, num))
+ continue;
+
+ reg = arch_register_for_index(cls, num);
DB((dbg, LEVEL_2, " %s(%f)", reg->name, reg_prefs[i].pref));
}
DB((dbg, LEVEL_2, "\n"));
+ allowed_regs = normal_regs;
+ if (req->type & arch_register_req_type_limited) {
+ allowed_regs = req->limited;
+ }
+
for (i = 0; i < n_regs; ++i) {
- unsigned r = reg_prefs[i].num;
- /* ignores should be last and we should have a non-ignore left */
- assert(!bitset_is_set(ignore_regs, r));
- /* already used? TODO: It might be better to copy the value occupying the register around here, find out when... */
- if (assignments[r].value != NULL)
+ r = reg_prefs[i].num;
+ if (!rbitset_is_set(allowed_regs, r))
continue;
- reg = arch_register_for_index(cls, r);
- DB((dbg, LEVEL_2, "Assign %+F -> %s\n", node, reg->name));
- use_reg(node, reg);
- break;
+ if (assignments[r] == NULL)
+ break;
+ float pref = reg_prefs[i].pref;
+ float delta = i+1 < n_regs ? pref - reg_prefs[i+1].pref : 0;
+ ir_node *before = skip_Proj(node);
+ bool res = try_optimistic_split(assignments[r], before,
+ pref, delta, forbidden_regs, 0);
+ if (res)
+ break;
+ }
+ if (i >= n_regs) {
+ panic("No register left for %+F\n", node);
}
-}
-
-static void free_reg_of_value(ir_node *node)
-{
- allocation_info_t *info;
- assignment_t *assignment;
- unsigned r;
-
- if (!arch_irn_consider_in_reg_alloc(cls, node))
- return;
-
- info = get_allocation_info(node);
- assignment = info->current_assignment;
-
- assert(assignment != NULL);
-
- r = assignment - assignments;
- DB((dbg, LEVEL_2, "Value %+F ended, freeing %s\n",
- node, arch_register_for_index(cls, r)->name));
- assignment->value = NULL;
- info->current_assignment = NULL;
-}
-
-/**
- * Return the index of the currently assigned register of a node.
- */
-static unsigned get_current_reg(ir_node *node)
-{
- allocation_info_t *info = get_allocation_info(node);
- assignment_t *assignment = info->current_assignment;
- return assignment - assignments;
-}
-/**
- * Return the currently assigned assignment of a node.
- */
-static assignment_t *get_current_assignment(ir_node *node)
-{
- allocation_info_t *info = get_allocation_info(node);
- return info->current_assignment;
+ reg = arch_register_for_index(cls, r);
+ DB((dbg, LEVEL_2, "Assign %+F -> %s\n", node, reg->name));
+ use_reg(node, reg);
}
/**
* Add an permutation in front of a node and change the assignments
* due to this permutation.
*
+ * To understand this imagine a permutation like this:
+ *
+ * 1 -> 2
+ * 2 -> 3
+ * 3 -> 1, 5
+ * 4 -> 6
+ * 5
+ * 6
+ * 7 -> 7
+ *
+ * First we count how many destinations a single value has. At the same time
+ * we can be sure that each destination register has at most 1 source register
+ * (it can have 0 which means we don't care what value is in it).
+ * We ignore all fullfilled permuations (like 7->7)
+ * In a first pass we create as much copy instructions as possible as they
+ * are generally cheaper than exchanges. We do this by counting into how many
+ * destinations a register has to be copied (in the example it's 2 for register
+ * 3, or 1 for the registers 1,2,4 and 7).
+ * We can then create a copy into every destination register when the usecount
+ * of that register is 0 (= noone else needs the value in the register).
+ *
+ * After this step we should have cycles left. We implement a cyclic permutation
+ * of n registers with n-1 transpositions.
+ *
* @param live_nodes the set of live nodes, updated due to live range split
* @param before the node before we add the permutation
- * @param permutation the permutation array (map indexes to indexes)
+ * @param permutation the permutation array indices are the destination
+ * registers, the values in the array are the source
+ * registers.
*/
-static void permutate_values(ir_nodeset_t *live_nodes, ir_node *before,
+static void permute_values(ir_nodeset_t *live_nodes, ir_node *before,
unsigned *permutation)
{
- ir_node *block;
- ir_node **srcs = ALLOCANZ(ir_node*, n_regs);
- unsigned *n_used = ALLOCANZ(unsigned, n_regs);
- unsigned r;
+ unsigned *n_used = ALLOCANZ(unsigned, n_regs);
+ ir_node *block;
+ unsigned r;
- /* create a list of values which really need to be "permed" */
+ /* determine how often each source register needs to be read */
for (r = 0; r < n_regs; ++r) {
- unsigned new_reg = permutation[r];
- assignment_t *assignment;
- ir_node *value;
-
- if (new_reg == r)
- continue;
+ unsigned old_reg = permutation[r];
+ ir_node *value;
- assignment = &assignments[r];
- value = assignment->value;
+ value = assignments[old_reg];
if (value == NULL) {
- /* nothing to do here, reg is not live */
+ /* nothing to do here, reg is not live. Mark it as fixpoint
+ * so we ignore it in the next steps */
permutation[r] = r;
continue;
}
- assert(srcs[new_reg] == NULL);
- srcs[new_reg] = value;
- n_used[r]++;
-
- free_reg_of_value(value);
- ir_nodeset_remove(live_nodes, value);
+ ++n_used[old_reg];
}
block = get_nodes_block(before);
/* step1: create copies where immediately possible */
for (r = 0; r < n_regs; /* empty */) {
ir_node *copy;
- ir_node *src = srcs[r];
- unsigned old_r;
+ ir_node *src;
const arch_register_t *reg;
+ unsigned old_r = permutation[r];
- if (src == NULL || n_used[r] > 0) {
+ /* - no need to do anything for fixed points.
+ - we can't copy if the value in the dest reg is still needed */
+ if (old_r == r || n_used[r] > 0) {
++r;
continue;
}
/* create a copy */
+ src = assignments[old_r];
copy = be_new_Copy(cls, block, src);
+ sched_add_before(before, copy);
reg = arch_register_for_index(cls, r);
- DB((dbg, LEVEL_2, "Copy %+F (from %+F) -> %s\n", copy, src, reg->name));
- link_to(copy, src);
+ DB((dbg, LEVEL_2, "Copy %+F (from %+F, before %+F) -> %s\n",
+ copy, src, before, reg->name));
+ mark_as_copy_of(copy, src);
use_reg(copy, reg);
- sched_add_before(before, copy);
- /* old register has 1 user less */
- reg = arch_get_irn_register(src);
- old_r = arch_register_get_index(reg);
+ if (live_nodes != NULL) {
+ ir_nodeset_insert(live_nodes, copy);
+ }
+
+ /* old register has 1 user less, permutation is resolved */
+ assert(arch_register_get_index(arch_get_irn_register(src)) == old_r);
+ permutation[r] = r;
+
assert(n_used[old_r] > 0);
--n_used[old_r];
- srcs[r] = NULL;
+ if (n_used[old_r] == 0) {
+ if (live_nodes != NULL) {
+ ir_nodeset_remove(live_nodes, src);
+ }
+ free_reg_of_value(src);
+ }
- /* advance */
- if (old_r < r)
+ /* advance or jump back (if this copy enabled another copy) */
+ if (old_r < r && n_used[old_r] == 0) {
r = old_r;
- else
+ } else {
++r;
+ }
}
+ /* at this point we only have "cycles" left which we have to resolve with
+ * perm instructions
+ * TODO: if we have free registers left, then we should really use copy
+ * instructions for any cycle longer than 2 registers...
+ * (this is probably architecture dependent, there might be archs where
+ * copies are preferable even for 2-cycles) */
+
/* create perms with the rest */
- for (r = 0; r < n_regs; ++r) {
- if (srcs[r] != NULL) {
- assert (false && "perm creation not implemented yet");
+ for (r = 0; r < n_regs; /* empty */) {
+ const arch_register_t *reg;
+ unsigned old_r = permutation[r];
+ unsigned r2;
+ ir_node *in[2];
+ ir_node *perm;
+ ir_node *proj0;
+ ir_node *proj1;
+
+ if (old_r == r) {
+ ++r;
+ continue;
+ }
+
+ /* we shouldn't have copies from 1 value to multiple destinations left*/
+ assert(n_used[old_r] == 1);
+
+ /* exchange old_r and r2; after that old_r is a fixed point */
+ r2 = permutation[old_r];
+
+ in[0] = assignments[r2];
+ in[1] = assignments[old_r];
+ perm = be_new_Perm(cls, block, 2, in);
+ sched_add_before(before, perm);
+ DB((dbg, LEVEL_2, "Perm %+F (perm %+F,%+F, before %+F)\n",
+ perm, in[0], in[1], before));
+
+ proj0 = new_r_Proj(block, perm, get_irn_mode(in[0]), 0);
+ mark_as_copy_of(proj0, in[0]);
+ reg = arch_register_for_index(cls, old_r);
+ use_reg(proj0, reg);
+
+ proj1 = new_r_Proj(block, perm, get_irn_mode(in[1]), 1);
+ mark_as_copy_of(proj1, in[1]);
+ reg = arch_register_for_index(cls, r2);
+ use_reg(proj1, reg);
+
+ /* 1 value is now in the correct register */
+ permutation[old_r] = old_r;
+ /* the source of r changed to r2 */
+ permutation[r] = r2;
+
+ /* if we have reached a fixpoint update data structures */
+ if (live_nodes != NULL) {
+ ir_nodeset_remove(live_nodes, in[0]);
+ ir_nodeset_remove(live_nodes, in[1]);
+ ir_nodeset_remove(live_nodes, proj0);
+ ir_nodeset_insert(live_nodes, proj1);
}
}
+
+#ifdef DEBUG_libfirm
+ /* now we should only have fixpoints left */
+ for (r = 0; r < n_regs; ++r) {
+ assert(permutation[r] == r);
+ }
+#endif
}
/**
*/
static void free_last_uses(ir_nodeset_t *live_nodes, ir_node *node)
{
- allocation_info_t *info = get_allocation_info(node);
- int arity = get_irn_arity(node);
- int i;
+ allocation_info_t *info = get_allocation_info(node);
+ const unsigned *last_uses = &info->last_uses;
+ int arity = get_irn_arity(node);
+ int i;
+
for (i = 0; i < arity; ++i) {
ir_node *op;
/* check if one operand is the last use */
- if (!rbitset_is_set(&info->last_uses, i))
+ if (!rbitset_is_set(last_uses, i))
continue;
op = get_irn_n(node, i);
}
/**
- * Enforce constraints at a node by live range splits.
- *
- * @param live_nodes the set of live nodes, might be changed
- * @param node the current node
+ * change inputs of a node to the current value (copies/perms)
+ */
+static void rewire_inputs(ir_node *node)
+{
+ int i;
+ int arity = get_irn_arity(node);
+
+ for (i = 0; i < arity; ++i) {
+ ir_node *op = get_irn_n(node, i);
+ allocation_info_t *info;
+
+ if (!arch_irn_consider_in_reg_alloc(cls, op))
+ continue;
+
+ info = get_allocation_info(op);
+ info = get_allocation_info(info->original_value);
+ if (info->current_value != op) {
+ set_irn_n(node, i, info->current_value);
+ }
+ }
+}
+
+/**
+ * Create a bitset of registers occupied with value living through an
+ * instruction
+ */
+static void determine_live_through_regs(unsigned *bitset, ir_node *node)
+{
+ const allocation_info_t *info = get_allocation_info(node);
+ unsigned r;
+ int i;
+ int arity;
+
+ /* mark all used registers as potentially live-through */
+ for (r = 0; r < n_regs; ++r) {
+ if (assignments[r] == NULL)
+ continue;
+ if (!rbitset_is_set(normal_regs, r))
+ continue;
+
+ rbitset_set(bitset, r);
+ }
+
+ /* remove registers of value dying at the instruction */
+ arity = get_irn_arity(node);
+ for (i = 0; i < arity; ++i) {
+ ir_node *op;
+ const arch_register_t *reg;
+
+ if (!rbitset_is_set(&info->last_uses, i))
+ continue;
+
+ op = get_irn_n(node, i);
+ reg = arch_get_irn_register(op);
+ rbitset_clear(bitset, arch_register_get_index(reg));
+ }
+}
+
+/**
+ * Enforce constraints at a node by live range splits.
+ *
+ * @param live_nodes the set of live nodes, might be changed
+ * @param node the current node
*/
-static void enforce_constraints(ir_nodeset_t *live_nodes, ir_node *node)
+static void enforce_constraints(ir_nodeset_t *live_nodes, ir_node *node,
+ unsigned *forbidden_regs)
{
int arity = get_irn_arity(node);
- int i, dummy, res;
+ int i, res;
hungarian_problem_t *bp;
- unsigned l, r, p;
+ unsigned l, r;
unsigned *assignment;
+ /* construct a list of register occupied by live-through values */
+ unsigned *live_through_regs = NULL;
+
/* see if any use constraints are not met */
bool good = true;
for (i = 0; i < arity; ++i) {
ir_node *op = get_irn_n(node, i);
+ const arch_register_t *reg;
const arch_register_req_t *req;
const unsigned *limited;
unsigned r;
/* are there any limitations for the i'th operand? */
req = arch_get_register_req(node, i);
- if ((req->type & arch_register_req_type_limited) == 0)
+ if (!(req->type & arch_register_req_type_limited))
continue;
limited = req->limited;
- r = get_current_reg(op);
+ reg = arch_get_irn_register(op);
+ r = arch_register_get_index(reg);
if (!rbitset_is_set(limited, r)) {
- /* found an assignement outside the limited set */
+ /* found an assignment outside the limited set */
good = false;
break;
}
}
+ /* is any of the live-throughs using a constrained output register? */
+ if (get_irn_mode(node) == mode_T) {
+ const ir_edge_t *edge;
+
+ foreach_out_edge(node, edge) {
+ ir_node *proj = get_edge_src_irn(edge);
+ const arch_register_req_t *req;
+
+ if (!arch_irn_consider_in_reg_alloc(cls, proj))
+ continue;
+
+ req = arch_get_register_req_out(proj);
+ if (!(req->type & arch_register_req_type_limited))
+ continue;
+
+ if (live_through_regs == NULL) {
+ rbitset_alloca(live_through_regs, n_regs);
+ determine_live_through_regs(live_through_regs, node);
+ }
+
+ rbitset_or(forbidden_regs, req->limited, n_regs);
+ if (rbitsets_have_common(req->limited, live_through_regs, n_regs)) {
+ good = false;
+ }
+ }
+ } else {
+ if (arch_irn_consider_in_reg_alloc(cls, node)) {
+ const arch_register_req_t *req = arch_get_register_req_out(node);
+ if (req->type & arch_register_req_type_limited) {
+ rbitset_alloca(live_through_regs, n_regs);
+ determine_live_through_regs(live_through_regs, node);
+ if (rbitsets_have_common(req->limited, live_through_regs, n_regs)) {
+ good = false;
+ rbitset_or(forbidden_regs, req->limited, n_regs);
+ }
+ }
+ }
+ }
+
if (good)
return;
- /* swap values around */
+ /* create these arrays if we haven't yet */
+ if (live_through_regs == NULL) {
+ rbitset_alloca(live_through_regs, n_regs);
+ }
+
+ /* at this point we have to construct a bipartite matching problem to see
+ * which values should go to which registers
+ * Note: We're building the matrix in "reverse" - source registers are
+ * right, destinations left because this will produce the solution
+ * in the format required for permute_values.
+ */
bp = hungarian_new(n_regs, n_regs, HUNGARIAN_MATCH_PERFECT);
/* add all combinations, then remove not allowed ones */
for (l = 0; l < n_regs; ++l) {
- if (bitset_is_set(ignore_regs, l)) {
- hungarian_add(bp, l, l, 90);
+ if (!rbitset_is_set(normal_regs, l)) {
+ hungarian_add(bp, l, l, 1);
continue;
}
for (r = 0; r < n_regs; ++r) {
- if (bitset_is_set(ignore_regs, r))
+ if (!rbitset_is_set(normal_regs, r))
+ continue;
+ /* livethrough values may not use constrainted output registers */
+ if (rbitset_is_set(live_through_regs, l)
+ && rbitset_is_set(forbidden_regs, r))
continue;
- hungarian_add(bp, l, r, l == r ? 90 : 89);
+ hungarian_add(bp, r, l, l == r ? 9 : 8);
}
}
for (i = 0; i < arity; ++i) {
ir_node *op = get_irn_n(node, i);
+ const arch_register_t *reg;
const arch_register_req_t *req;
const unsigned *limited;
- unsigned current_reg;
+ unsigned current_reg;
if (!arch_irn_consider_in_reg_alloc(cls, op))
continue;
req = arch_get_register_req(node, i);
- if ((req->type & arch_register_req_type_limited) == 0)
+ if (!(req->type & arch_register_req_type_limited))
continue;
limited = req->limited;
- current_reg = get_current_reg(op);
+ reg = arch_get_irn_register(op);
+ current_reg = arch_register_get_index(reg);
for (r = 0; r < n_regs; ++r) {
if (rbitset_is_set(limited, r))
continue;
- hungarian_remv(bp, current_reg, r);
+ hungarian_remv(bp, r, current_reg);
}
}
- hungarian_print_costmatrix(bp, 1);
+ //hungarian_print_cost_matrix(bp, 1);
hungarian_prepare_cost_matrix(bp, HUNGARIAN_MODE_MAXIMIZE_UTIL);
assignment = ALLOCAN(unsigned, n_regs);
- res = hungarian_solve(bp, (int*) assignment, &dummy, 0);
+ res = hungarian_solve(bp, (int*) assignment, NULL, 0);
assert(res == 0);
- printf("Swap result:");
- for (p = 0; p < n_regs; ++p) {
- printf(" %d", assignment[p]);
+#if 0
+ fprintf(stderr, "Swap result:");
+ for (i = 0; i < (int) n_regs; ++i) {
+ fprintf(stderr, " %d", assignment[i]);
}
- printf("\n");
+ fprintf(stderr, "\n");
+#endif
hungarian_free(bp);
- permutate_values(live_nodes, node, assignment);
+ permute_values(live_nodes, node, assignment);
+}
+
+/** test wether a node @p n is a copy of the value of node @p of */
+static bool is_copy_of(ir_node *value, ir_node *test_value)
+{
+ allocation_info_t *test_info;
+ allocation_info_t *info;
+
+ if (value == test_value)
+ return true;
+
+ info = get_allocation_info(value);
+ test_info = get_allocation_info(test_value);
+ return test_info->original_value == info->original_value;
}
/**
- * Walker: assign registers to all nodes of a block that
- * needs registers from the currently considered register class.
+ * find a value in the end-assignment of a basic block
+ * @returns the index into the assignment array if found
+ * -1 if not found
*/
-static void allocate_coalesce_block(ir_node *block, void *data)
+static int find_value_in_block_info(block_info_t *info, ir_node *value)
{
- int i;
- ir_nodeset_t live_nodes;
- ir_nodeset_iterator_t iter;
- ir_node *node, *start;
+ unsigned r;
+ ir_node **assignments = info->assignments;
+ for (r = 0; r < n_regs; ++r) {
+ ir_node *a_value = assignments[r];
- DB((dbg, LEVEL_2, "Allocating in block %+F\n",
- block));
+ if (a_value == NULL)
+ continue;
+ if (is_copy_of(a_value, value))
+ return (int) r;
+ }
- /* clear assignments */
- memset(assignments, 0, n_regs * sizeof(assignments[0]));
- ir_nodeset_init(&live_nodes);
+ return -1;
+}
- (void) data;
+/**
+ * Create the necessary permutations at the end of a basic block to fullfill
+ * the register assignment for phi-nodes in the next block
+ */
+static void add_phi_permutations(ir_node *block, int p)
+{
+ unsigned r;
+ unsigned *permutation;
+ ir_node **old_assignments;
+ bool need_permutation;
+ ir_node *node;
+ ir_node *pred = get_Block_cfgpred_block(block, p);
- /* collect live-in nodes and preassigned values */
- be_lv_foreach(lv, block, be_lv_state_in, i) {
+ block_info_t *pred_info = get_block_info(pred);
+
+ /* predecessor not processed yet? nothing to do */
+ if (!pred_info->processed)
+ return;
+
+ permutation = ALLOCAN(unsigned, n_regs);
+ for (r = 0; r < n_regs; ++r) {
+ permutation[r] = r;
+ }
+
+ /* check phi nodes */
+ need_permutation = false;
+ node = sched_first(block);
+ for ( ; is_Phi(node); node = sched_next(node)) {
const arch_register_t *reg;
+ int regn;
+ int a;
+ ir_node *op;
- node = be_lv_get_irn(lv, block, i);
if (!arch_irn_consider_in_reg_alloc(cls, node))
continue;
- /* remember that this node is live at the beginning of the block */
- ir_nodeset_insert(&live_nodes, node);
+ op = get_Phi_pred(node, p);
+ if (!arch_irn_consider_in_reg_alloc(cls, op))
+ continue;
- /* if the node already has a register assigned use it */
- /* TODO: the value could already be copied away at this point and be in
- another register */
- reg = arch_get_irn_register(node);
- if (reg != NULL) {
- use_reg(node, reg);
+ a = find_value_in_block_info(pred_info, op);
+ assert(a >= 0);
+
+ reg = arch_get_irn_register(node);
+ regn = arch_register_get_index(reg);
+ if (regn != a) {
+ permutation[regn] = a;
+ need_permutation = true;
}
}
- /* handle phis... */
+ if (need_permutation) {
+ /* permute values at end of predecessor */
+ old_assignments = assignments;
+ assignments = pred_info->assignments;
+ permute_values(NULL, be_get_end_of_block_insertion_point(pred),
+ permutation);
+ assignments = old_assignments;
+ }
+
+ /* change phi nodes to use the copied values */
node = sched_first(block);
for ( ; is_Phi(node); node = sched_next(node)) {
+ int a;
+ ir_node *op;
+
+ if (!arch_irn_consider_in_reg_alloc(cls, node))
+ continue;
+
+ op = get_Phi_pred(node, p);
+ /* no need to do anything for Unknown inputs */
+ if (!arch_irn_consider_in_reg_alloc(cls, op))
+ continue;
+
+ /* we have permuted all values into the correct registers so we can
+ simply query which value occupies the phis register in the
+ predecessor */
+ a = arch_register_get_index(arch_get_irn_register(node));
+ op = pred_info->assignments[a];
+ set_Phi_pred(node, p, op);
+ }
+}
+
+/**
+ * Set preferences for a phis register based on the registers used on the
+ * phi inputs.
+ */
+static void adapt_phi_prefs(ir_node *phi)
+{
+ int i;
+ int arity = get_irn_arity(phi);
+ ir_node *block = get_nodes_block(phi);
+ allocation_info_t *info = get_allocation_info(phi);
+
+ for (i = 0; i < arity; ++i) {
+ ir_node *op = get_irn_n(phi, i);
+ const arch_register_t *reg = arch_get_irn_register(op);
+ ir_node *pred_block;
+ block_info_t *pred_block_info;
+ float weight;
+ unsigned r;
+
+ if (reg == NULL)
+ continue;
+ /* we only give the bonus if the predecessor already has registers
+ * assigned, otherwise we only see a dummy value
+ * and any conclusions about its register are useless */
+ pred_block = get_Block_cfgpred_block(block, i);
+ pred_block_info = get_block_info(pred_block);
+ if (!pred_block_info->processed)
+ continue;
+
+ /* give bonus for already assigned register */
+ weight = get_block_execfreq(execfreqs, pred_block);
+ r = arch_register_get_index(reg);
+ info->prefs[r] += weight * AFF_PHI;
+ }
+}
+
+/**
+ * After a phi has been assigned a register propagate preference inputs
+ * to the phi inputs.
+ */
+static void propagate_phi_register(ir_node *phi, unsigned assigned_r)
+{
+ int i;
+ ir_node *block = get_nodes_block(phi);
+ int arity = get_irn_arity(phi);
+
+ for (i = 0; i < arity; ++i) {
+ ir_node *op = get_Phi_pred(phi, i);
+ allocation_info_t *info = get_allocation_info(op);
+ ir_node *pred_block = get_Block_cfgpred_block(block, i);
+ unsigned r;
+ float weight
+ = get_block_execfreq(execfreqs, pred_block) * AFF_PHI;
+
+ if (info->prefs[assigned_r] >= weight)
+ continue;
+
+ /* promote the prefered register */
+ for (r = 0; r < n_regs; ++r) {
+ if (r == assigned_r) {
+ info->prefs[r] = AFF_PHI * weight;
+ } else {
+ info->prefs[r] -= AFF_PHI * weight;
+ }
+ }
+
+ if (is_Phi(op))
+ propagate_phi_register(op, assigned_r);
+ }
+}
+
+static void assign_phi_registers(ir_node *block)
+{
+ int n_phis = 0;
+ int n;
+ int res;
+ int *assignment;
+ ir_node *node;
+ hungarian_problem_t *bp;
+
+ /* count phi nodes */
+ sched_foreach(block, node) {
+ if (!is_Phi(node))
+ break;
+ if (!arch_irn_consider_in_reg_alloc(cls, node))
+ continue;
+ ++n_phis;
+ }
+
+ if (n_phis == 0)
+ return;
+
+ /* build a bipartite matching problem for all phi nodes */
+ bp = hungarian_new(n_phis, n_regs, HUNGARIAN_MATCH_PERFECT);
+ n = 0;
+ sched_foreach(block, node) {
+ unsigned r;
+
+ allocation_info_t *info;
+ if (!is_Phi(node))
+ break;
+ if (!arch_irn_consider_in_reg_alloc(cls, node))
+ continue;
+
+ /* give boni for predecessor colorings */
+ adapt_phi_prefs(node);
+ /* add stuff to bipartite problem */
+ info = get_allocation_info(node);
+ DB((dbg, LEVEL_3, "Prefs for %+F: ", node));
+ for (r = 0; r < n_regs; ++r) {
+ float costs;
+
+ if (!rbitset_is_set(normal_regs, r))
+ continue;
+
+ costs = info->prefs[r];
+ costs = costs < 0 ? -logf(-costs+1) : logf(costs+1);
+ costs *= 100;
+ costs += 10000;
+ hungarian_add(bp, n, r, costs);
+ DB((dbg, LEVEL_3, " %s(%f)", arch_register_for_index(cls, r)->name,
+ info->prefs[r]));
+ }
+ DB((dbg, LEVEL_3, "\n"));
+ ++n;
+ }
+
+ //hungarian_print_cost_matrix(bp, 7);
+ hungarian_prepare_cost_matrix(bp, HUNGARIAN_MODE_MAXIMIZE_UTIL);
+
+ assignment = ALLOCAN(int, n_regs);
+ res = hungarian_solve(bp, assignment, NULL, 0);
+ assert(res == 0);
+
+ /* apply results */
+ n = 0;
+ sched_foreach(block, node) {
+ unsigned r;
+ const arch_register_t *reg;
+
+ if (!is_Phi(node))
+ break;
+ if (!arch_irn_consider_in_reg_alloc(cls, node))
+ continue;
+
+ r = assignment[n++];
+ assert(rbitset_is_set(normal_regs, r));
+ reg = arch_register_for_index(cls, r);
+ DB((dbg, LEVEL_2, "Assign %+F -> %s\n", node, reg->name));
+ use_reg(node, reg);
+
+ /* adapt preferences for phi inputs */
+ propagate_phi_register(node, r);
+ }
+}
+
+/**
+ * Walker: assign registers to all nodes of a block that
+ * need registers from the currently considered register class.
+ */
+static void allocate_coalesce_block(ir_node *block, void *data)
+{
+ int i;
+ ir_nodeset_t live_nodes;
+ ir_nodeset_iterator_t iter;
+ ir_node *node;
+ int n_preds;
+ block_info_t *block_info;
+ block_info_t **pred_block_infos;
+ ir_node **phi_ins;
+ unsigned *forbidden_regs; /**< collects registers which must
+ not be used for optimistic splits */
+
+ (void) data;
+ DB((dbg, LEVEL_2, "* Block %+F\n", block));
+
+ /* clear assignments */
+ block_info = get_block_info(block);
+ assignments = block_info->assignments;
+
+ ir_nodeset_init(&live_nodes);
+
+ /* gather regalloc infos of predecessor blocks */
+ n_preds = get_Block_n_cfgpreds(block);
+ pred_block_infos = ALLOCAN(block_info_t*, n_preds);
+ for (i = 0; i < n_preds; ++i) {
+ ir_node *pred = get_Block_cfgpred_block(block, i);
+ block_info_t *pred_info = get_block_info(pred);
+ pred_block_infos[i] = pred_info;
+ }
+
+ phi_ins = ALLOCAN(ir_node*, n_preds);
+
+ /* collect live-in nodes and preassigned values */
+ be_lv_foreach(lv, block, be_lv_state_in, i) {
const arch_register_t *reg;
+ int p;
+ bool need_phi = false;
+ node = be_lv_get_irn(lv, block, i);
if (!arch_irn_consider_in_reg_alloc(cls, node))
continue;
- /* fill in regs already assigned */
+ /* check all predecessors for this value, if it is not everywhere the
+ same or unknown then we have to construct a phi
+ (we collect the potential phi inputs here) */
+ for (p = 0; p < n_preds; ++p) {
+ block_info_t *pred_info = pred_block_infos[p];
+
+ if (!pred_info->processed) {
+ /* use node for now, it will get fixed later */
+ phi_ins[p] = node;
+ need_phi = true;
+ } else {
+ int a = find_value_in_block_info(pred_info, node);
+
+ /* must live out of predecessor */
+ assert(a >= 0);
+ phi_ins[p] = pred_info->assignments[a];
+ /* different value from last time? then we need a phi */
+ if (p > 0 && phi_ins[p-1] != phi_ins[p]) {
+ need_phi = true;
+ }
+ }
+ }
+
+ if (need_phi) {
+ ir_mode *mode = get_irn_mode(node);
+ const arch_register_req_t *req = get_default_req_current_cls();
+ ir_node *phi;
+ int i;
+
+ phi = new_r_Phi(block, n_preds, phi_ins, mode);
+ be_set_phi_reg_req(phi, req);
+
+ DB((dbg, LEVEL_3, "Create Phi %+F (for %+F) -", phi, node));
+#ifdef DEBUG_libfirm
+ for (i = 0; i < n_preds; ++i) {
+ DB((dbg, LEVEL_3, " %+F", phi_ins[i]));
+ }
+ DB((dbg, LEVEL_3, "\n"));
+#endif
+ mark_as_copy_of(phi, node);
+ sched_add_after(block, phi);
+
+ node = phi;
+ } else {
+ allocation_info_t *info = get_allocation_info(node);
+ info->current_value = phi_ins[0];
+
+ /* Grab 1 of the inputs we constructed (might not be the same as
+ * "node" as we could see the same copy of the value in all
+ * predecessors */
+ node = phi_ins[0];
+ }
+
+ /* if the node already has a register assigned use it */
reg = arch_get_irn_register(node);
if (reg != NULL) {
use_reg(node, reg);
- } else {
- /* TODO: give boni for registers already assigned at the
- predecessors */
}
+
+ /* remember that this node is live at the beginning of the block */
+ ir_nodeset_insert(&live_nodes, node);
}
- start = node;
- /* assign regs for live-in values */
- foreach_ir_nodeset(&live_nodes, node, iter) {
- const arch_register_t *reg;
- reg = arch_get_irn_register(node);
- if (reg != NULL)
- continue;
+ rbitset_alloca(forbidden_regs, n_regs);
- assign_reg(block, node);
+ /* handle phis... */
+ assign_phi_registers(block);
+
+ /* all live-ins must have a register */
+#ifdef DEBUG_libfirm
+ foreach_ir_nodeset(&live_nodes, node, iter) {
+ const arch_register_t *reg = arch_get_irn_register(node);
+ assert(reg != NULL);
}
+#endif
/* assign instructions in the block */
- for (node = start; !sched_is_end(node); node = sched_next(node)) {
- int arity = get_irn_arity(node);
+ sched_foreach(block, node) {
int i;
+ int arity;
+
+ /* phis are already assigned */
+ if (is_Phi(node))
+ continue;
+
+ rewire_inputs(node);
/* enforce use constraints */
- enforce_constraints(&live_nodes, node);
+ rbitset_clear_all(forbidden_regs, n_regs);
+ enforce_constraints(&live_nodes, node, forbidden_regs);
- /* exchange values to copied values where needed */
- for (i = 0; i < arity; ++i) {
- ir_node *op = get_irn_n(node, i);
- assignment_t *assignment;
+ rewire_inputs(node);
+ /* we may not use registers used for inputs for optimistic splits */
+ arity = get_irn_arity(node);
+ for (i = 0; i < arity; ++i) {
+ ir_node *op = get_irn_n(node, i);
+ const arch_register_t *reg;
if (!arch_irn_consider_in_reg_alloc(cls, op))
continue;
- assignment = get_current_assignment(op);
- assert(assignment != NULL);
- if (op != assignment->value) {
- set_irn_n(node, i, assignment->value);
- }
+
+ reg = arch_get_irn_register(op);
+ rbitset_set(forbidden_regs, arch_register_get_index(reg));
}
+ /* free registers of values last used at this instruction */
free_last_uses(&live_nodes, node);
/* assign output registers */
ir_node *proj = get_edge_src_irn(edge);
if (!arch_irn_consider_in_reg_alloc(cls, proj))
continue;
- assign_reg(block, proj);
+ assign_reg(block, proj, forbidden_regs);
}
} else if (arch_irn_consider_in_reg_alloc(cls, node)) {
- assign_reg(block, node);
+ assign_reg(block, node, forbidden_regs);
}
}
- foreach_ir_nodeset(&live_nodes, node, iter) {
- free_reg_of_value(node);
+ ir_nodeset_destroy(&live_nodes);
+ assignments = NULL;
+
+ block_info->processed = true;
+
+ /* permute values at end of predecessor blocks in case of phi-nodes */
+ if (n_preds > 1) {
+ int p;
+ for (p = 0; p < n_preds; ++p) {
+ add_phi_permutations(block, p);
+ }
}
- ir_nodeset_destroy(&live_nodes);
+ /* if we have exactly 1 successor then we might be able to produce phi
+ copies now */
+ if (get_irn_n_edges_kind(block, EDGE_KIND_BLOCK) == 1) {
+ const ir_edge_t *edge
+ = get_irn_out_edge_first_kind(block, EDGE_KIND_BLOCK);
+ ir_node *succ = get_edge_src_irn(edge);
+ int p = get_edge_src_pos(edge);
+ block_info_t *succ_info = get_block_info(succ);
+
+ if (succ_info->processed) {
+ add_phi_permutations(succ, p);
+ }
+ }
+}
+
+typedef struct block_costs_t block_costs_t;
+struct block_costs_t {
+ float costs; /**< costs of the block */
+ int dfs_num; /**< depth first search number (to detect backedges) */
+};
+
+static int cmp_block_costs(const void *d1, const void *d2)
+{
+ const ir_node * const *block1 = d1;
+ const ir_node * const *block2 = d2;
+ const block_costs_t *info1 = get_irn_link(*block1);
+ const block_costs_t *info2 = get_irn_link(*block2);
+ return QSORT_CMP(info2->costs, info1->costs);
+}
+
+static void determine_block_order(void)
+{
+ int i;
+ ir_node **blocklist = be_get_cfgpostorder(irg);
+ int n_blocks = ARR_LEN(blocklist);
+ int dfs_num = 0;
+ pdeq *worklist = new_pdeq();
+ ir_node **order = XMALLOCN(ir_node*, n_blocks);
+ int order_p = 0;
+
+ /* clear block links... */
+ for (i = 0; i < n_blocks; ++i) {
+ ir_node *block = blocklist[i];
+ set_irn_link(block, NULL);
+ }
+
+ /* walk blocks in reverse postorder, the costs for each block are the
+ * sum of the costs of its predecessors (excluding the costs on backedges
+ * which we can't determine) */
+ for (i = n_blocks-1; i >= 0; --i) {
+ block_costs_t *cost_info;
+ ir_node *block = blocklist[i];
+
+ float execfreq = get_block_execfreq(execfreqs, block);
+ float costs = execfreq;
+ int n_cfgpreds = get_Block_n_cfgpreds(block);
+ int p;
+ for (p = 0; p < n_cfgpreds; ++p) {
+ ir_node *pred_block = get_Block_cfgpred_block(block, p);
+ block_costs_t *pred_costs = get_irn_link(pred_block);
+ /* we don't have any info for backedges */
+ if (pred_costs == NULL)
+ continue;
+ costs += pred_costs->costs;
+ }
+
+ cost_info = OALLOCZ(&obst, block_costs_t);
+ cost_info->costs = costs;
+ cost_info->dfs_num = dfs_num++;
+ set_irn_link(block, cost_info);
+ }
+
+ /* sort array by block costs */
+ qsort(blocklist, n_blocks, sizeof(blocklist[0]), cmp_block_costs);
+
+ ir_reserve_resources(irg, IR_RESOURCE_BLOCK_VISITED);
+ inc_irg_block_visited(irg);
+
+ for (i = 0; i < n_blocks; ++i) {
+ ir_node *block = blocklist[i];
+ if (Block_block_visited(block))
+ continue;
+
+ /* continually add predecessors with highest costs to worklist
+ * (without using backedges) */
+ do {
+ block_costs_t *info = get_irn_link(block);
+ ir_node *best_pred = NULL;
+ float best_costs = -1;
+ int n_cfgpred = get_Block_n_cfgpreds(block);
+ int i;
+
+ pdeq_putr(worklist, block);
+ mark_Block_block_visited(block);
+ for (i = 0; i < n_cfgpred; ++i) {
+ ir_node *pred_block = get_Block_cfgpred_block(block, i);
+ block_costs_t *pred_info = get_irn_link(pred_block);
+
+ /* ignore backedges */
+ if (pred_info->dfs_num > info->dfs_num)
+ continue;
+
+ if (info->costs > best_costs) {
+ best_costs = info->costs;
+ best_pred = pred_block;
+ }
+ }
+ block = best_pred;
+ } while(block != NULL && !Block_block_visited(block));
+
+ /* now put all nodes in the worklist in our final order */
+ while (!pdeq_empty(worklist)) {
+ ir_node *pblock = pdeq_getr(worklist);
+ assert(order_p < n_blocks);
+ order[order_p++] = pblock;
+ }
+ }
+ assert(order_p == n_blocks);
+ del_pdeq(worklist);
+
+ ir_free_resources(irg, IR_RESOURCE_BLOCK_VISITED);
+
+ DEL_ARR_F(blocklist);
+
+ obstack_free(&obst, NULL);
+ obstack_init(&obst);
+
+ block_order = order;
+ n_block_order = n_blocks;
}
/**
*/
static void be_straight_alloc_cls(void)
{
- n_regs = arch_register_class_n_regs(cls);
- lv = be_assure_liveness(birg);
+ int i;
+
+ lv = be_assure_liveness(birg);
be_liveness_assure_sets(lv);
be_liveness_assure_chk(lv);
- assignments = obstack_alloc(&obst, n_regs * sizeof(assignments[0]));
-
- ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK | IR_RESOURCE_IRN_VISITED);
- inc_irg_visited(irg);
+ ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK);
- DB((dbg, LEVEL_2, "=== Registers in %s ===\n", cls->name));
+ DB((dbg, LEVEL_2, "=== Allocating registers of %s ===\n", cls->name));
+ be_clear_links(irg);
irg_block_walk_graph(irg, NULL, analyze_block, NULL);
- irg_block_walk_graph(irg, NULL, allocate_coalesce_block, NULL);
+ combine_congruence_classes();
+
+ for (i = 0; i < n_block_order; ++i) {
+ ir_node *block = block_order[i];
+ allocate_coalesce_block(block, NULL);
+ }
- ir_free_resources(irg, IR_RESOURCE_IRN_LINK | IR_RESOURCE_IRN_VISITED);
+ ir_free_resources(irg, IR_RESOURCE_IRN_LINK);
+}
+
+static void dump(int mask, ir_graph *irg, const char *suffix,
+ void (*dumper)(ir_graph *, const char *))
+{
+ if(birg->main_env->options->dump_flags & mask)
+ be_dump(irg, suffix, dumper);
}
/**
be_pre_spill_prepare_constr(birg, cls);
BE_TIMER_POP(t_ra_constr);
+ dump(DUMP_RA, irg, "-spillprepare", dump_ir_block_graph_sched);
+
/* spill */
BE_TIMER_PUSH(t_ra_spill);
be_do_spill(birg, cls);
BE_TIMER_PUSH(t_ra_spill_apply);
check_for_memory_operands(irg);
BE_TIMER_POP(t_ra_spill_apply);
+
+ dump(DUMP_RA, irg, "-spill", dump_ir_block_graph_sched);
}
/**
irg = be_get_birg_irg(birg);
execfreqs = birg->exec_freq;
- /* TODO: extract some of the stuff from bechordal allocator, like
- * statistics, time measurements, etc. and use them here too */
+ /* determine a good coloring order */
+ determine_block_order();
for (c = 0; c < n_cls; ++c) {
- cls = arch_env_get_reg_class(arch_env, c);
+ cls = arch_env_get_reg_class(arch_env, c);
+ default_cls_req = NULL;
if (arch_register_class_flags(cls) & arch_register_class_flag_manual_ra)
continue;
- stat_ev_ctx_push_str("bestraight_cls", cls->name);
+ stat_ev_ctx_push_str("regcls", cls->name);
- n_regs = cls->n_regs;
- ignore_regs = bitset_malloc(n_regs);
- be_put_ignore_regs(birg, cls, ignore_regs);
+ n_regs = arch_register_class_n_regs(cls);
+ normal_regs = rbitset_malloc(n_regs);
+ be_abi_set_non_ignore_regs(birg->abi, cls, normal_regs);
spill();
/* verify schedule and register pressure */
BE_TIMER_PUSH(t_verify);
- if (birg->main_env->options->vrfy_option == BE_CH_VRFY_WARN) {
+ if (birg->main_env->options->vrfy_option == BE_VRFY_WARN) {
be_verify_schedule(birg);
be_verify_register_pressure(birg, cls, irg);
- } else if (birg->main_env->options->vrfy_option == BE_CH_VRFY_ASSERT) {
+ } else if (birg->main_env->options->vrfy_option == BE_VRFY_ASSERT) {
assert(be_verify_schedule(birg) && "Schedule verification failed");
assert(be_verify_register_pressure(birg, cls, irg)
&& "Register pressure verification failed");
be_straight_alloc_cls();
BE_TIMER_POP(t_ra_color);
- bitset_free(ignore_regs);
+ /* we most probably constructed new Phis so liveness info is invalid
+ * now */
+ /* TODO: test liveness_introduce */
+ be_liveness_invalidate(lv);
+ free(normal_regs);
+
+ stat_ev_ctx_pop("regcls");
+ }
- /* TODO: dump intermediate results */
+ BE_TIMER_PUSH(t_ra_spill_apply);
+ be_abi_fix_stack_nodes(birg->abi);
+ BE_TIMER_POP(t_ra_spill_apply);
- stat_ev_ctx_pop("bestraight_cls");
+ BE_TIMER_PUSH(t_verify);
+ if (birg->main_env->options->vrfy_option == BE_VRFY_WARN) {
+ be_verify_register_allocation(birg);
+ } else if (birg->main_env->options->vrfy_option == BE_VRFY_ASSERT) {
+ assert(be_verify_register_allocation(birg)
+ && "Register allocation invalid");
}
+ BE_TIMER_POP(t_verify);
obstack_free(&obst, NULL);
}
-static be_ra_t be_ra_straight = {
- be_straight_alloc,
-};
-
/**
* Initializes this module.
*/
void be_init_straight_alloc(void)
{
+ static be_ra_t be_ra_straight = {
+ be_straight_alloc,
+ };
+
FIRM_DBG_REGISTER(dbg, "firm.be.straightalloc");
be_register_allocator("straight", &be_ra_straight);