#include <float.h>
#include <stdbool.h>
+#include <math.h>
#include "error.h"
#include "execfreq.h"
#include "irgraph_t.h"
#include "irgwalk.h"
#include "irnode_t.h"
+#include "irprintf.h"
#include "obst.h"
#include "raw_bitset.h"
#include "unionfind.h"
+#include "pdeq.h"
+#include "hungarian.h"
#include "beabi.h"
#include "bechordal_t.h"
#include "bespill.h"
#include "bespillutil.h"
#include "beverify.h"
+#include "beutil.h"
-#include "hungarian.h"
-
-#define USE_FACTOR 1.0f
-#define DEF_FACTOR 1.0f
-#define NEIGHBOR_FACTOR 0.2f
-#define AFF_SHOULD_BE_SAME 0.5f
-#define AFF_PHI 1.0f
-#define SPLIT_DELTA 1.0f
+#define USE_FACTOR 1.0f
+#define DEF_FACTOR 1.0f
+#define NEIGHBOR_FACTOR 0.2f
+#define AFF_SHOULD_BE_SAME 0.5f
+#define AFF_PHI 1.0f
+#define SPLIT_DELTA 1.0f
+#define MAX_OPTIMISTIC_SPLIT_RECURSION 0
DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
static unsigned n_regs;
static unsigned *normal_regs;
static int *congruence_classes;
+static ir_node **block_order;
+static int n_block_order;
+static int create_preferences = true;
+static int create_congruence_classes = true;
+static int propagate_phi_registers = true;
+
+static const lc_opt_table_entry_t options[] = {
+ LC_OPT_ENT_BOOL("prefs", "use preference based coloring", &create_preferences),
+ LC_OPT_ENT_BOOL("congruences", "create congruence classes", &create_congruence_classes),
+ LC_OPT_ENT_BOOL("prop_phi", "propagate phi registers", &propagate_phi_registers),
+ LC_OPT_LAST
+};
/** currently active assignments (while processing a basic block)
* maps registers to values(their current copies) */
unsigned last_uses; /**< bitset indicating last uses (input pos) */
ir_node *current_value; /**< copy of the value that should be used */
ir_node *original_value; /**< for copies point to original value */
- unsigned char should_be_same[2];
float prefs[0]; /**< register preferences */
};
typedef struct allocation_info_t allocation_info_t;
return info;
}
+static allocation_info_t *try_get_allocation_info(const ir_node *node)
+{
+ return (allocation_info_t*) get_irn_link(node);
+}
+
/**
* Get allocation information for a basic block
*/
{
ir_nodeset_iterator_t iter;
unsigned r;
+ unsigned n_allowed;
allocation_info_t *info = get_allocation_info(node);
ir_node *neighbor;
if (live_nodes == NULL)
return;
- /* TODO: reduce penalty if there are multiple allowed registers... */
- penalty *= NEIGHBOR_FACTOR;
+ penalty *= NEIGHBOR_FACTOR;
+ n_allowed = rbitset_popcnt(limited, n_regs);
+ if (n_allowed > 1) {
+ /* only create a very weak penalty if multiple regs are allowed */
+ penalty = (penalty * 0.8f) / n_allowed;
+ }
foreach_ir_nodeset(live_nodes, neighbor, iter) {
allocation_info_t *neighbor_info;
if (is_Phi(node))
break;
- check_defs(&live_nodes, weight, node);
+ if (create_preferences)
+ check_defs(&live_nodes, weight, node);
/* mark last uses */
arity = get_irn_arity(node);
be_liveness_transfer(cls, node, &live_nodes);
- /* update weights based on usage constraints */
- for (i = 0; i < arity; ++i) {
- const arch_register_req_t *req;
- const unsigned *limited;
- ir_node *op = get_irn_n(node, i);
+ if (create_preferences) {
+ /* update weights based on usage constraints */
+ for (i = 0; i < arity; ++i) {
+ const arch_register_req_t *req;
+ const unsigned *limited;
+ ir_node *op = get_irn_n(node, i);
- if (!arch_irn_consider_in_reg_alloc(cls, op))
- continue;
+ if (!arch_irn_consider_in_reg_alloc(cls, op))
+ continue;
- req = arch_get_register_req(node, i);
- if (!(req->type & arch_register_req_type_limited))
- continue;
+ req = arch_get_register_req(node, i);
+ if (!(req->type & arch_register_req_type_limited))
+ continue;
- limited = req->limited;
- give_penalties_for_limits(&live_nodes, weight * USE_FACTOR, limited,
- op);
+ limited = req->limited;
+ give_penalties_for_limits(&live_nodes, weight * USE_FACTOR, limited,
+ op);
+ }
}
}
ir_nodeset_destroy(&live_nodes);
}
-static void create_congurence_class(ir_node *node, void *data)
+static void congruence_def(ir_nodeset_t *live_nodes, ir_node *node)
{
- (void) data;
- if (is_Phi(node)) {
- int i;
- int arity = get_irn_arity(node);
- unsigned phi_idx = get_irn_idx(node);
- phi_idx = uf_find(congruence_classes, phi_idx);
- for (i = 0; i < arity; ++i) {
- ir_node *op = get_Phi_pred(node, i);
- int op_idx = get_irn_idx(op);
- op_idx = uf_find(congruence_classes, op_idx);
- phi_idx = uf_union(congruence_classes, phi_idx, op_idx);
+ const arch_register_req_t *req;
+
+ if (get_irn_mode(node) == mode_T) {
+ const ir_edge_t *edge;
+ foreach_out_edge(node, edge) {
+ ir_node *def = get_edge_src_irn(edge);
+ congruence_def(live_nodes, def);
}
return;
}
+
+ if (!arch_irn_consider_in_reg_alloc(cls, node))
+ return;
+
/* should be same constraint? */
- if (is_Proj(node)) {
- const arch_register_req_t *req = arch_get_register_req_out(node);
- if (req->type & arch_register_req_type_should_be_same) {
- ir_node *pred = get_Proj_pred(node);
- int arity = get_irn_arity(pred);
- int i;
- unsigned node_idx = get_irn_idx(node);
- node_idx = uf_find(congruence_classes, node_idx);
+ req = arch_get_register_req_out(node);
+ if (req->type & arch_register_req_type_should_be_same) {
+ ir_node *insn = skip_Proj(node);
+ int arity = get_irn_arity(insn);
+ int i;
+ unsigned node_idx = get_irn_idx(node);
+ node_idx = uf_find(congruence_classes, node_idx);
- for (i = 0; i < arity; ++i) {
- ir_node *op;
- unsigned op_idx;
+ for (i = 0; i < arity; ++i) {
+ ir_node *live;
+ ir_node *op;
+ int op_idx;
+ ir_nodeset_iterator_t iter;
+ bool interferes = false;
- if (!rbitset_is_set(&req->other_same, i))
- continue;
+ if (!rbitset_is_set(&req->other_same, i))
+ continue;
- op = get_irn_n(pred, i);
- op_idx = get_irn_idx(op);
- op_idx = uf_find(congruence_classes, op_idx);
- node_idx = uf_union(congruence_classes, node_idx, op_idx);
+ op = get_irn_n(insn, i);
+ op_idx = get_irn_idx(op);
+ op_idx = uf_find(congruence_classes, op_idx);
+
+ /* do we interfere with the value */
+ foreach_ir_nodeset(live_nodes, live, iter) {
+ int lv_idx = get_irn_idx(live);
+ lv_idx = uf_find(congruence_classes, lv_idx);
+ if (lv_idx == op_idx) {
+ interferes = true;
+ break;
+ }
}
+ /* don't put in same affinity class if we interfere */
+ if (interferes)
+ continue;
+
+ node_idx = uf_union(congruence_classes, node_idx, op_idx);
+ DB((dbg, LEVEL_3, "Merge %+F and %+F congruence classes\n",
+ node, op));
+ /* one should_be_same is enough... */
+ break;
+ }
+ }
+}
+
+static void create_congurence_class(ir_node *block, void *data)
+{
+ ir_nodeset_t live_nodes;
+ ir_node *node;
+
+ (void) data;
+ ir_nodeset_init(&live_nodes);
+ be_liveness_end_of_block(lv, cls, block, &live_nodes);
+
+ /* check should be same constraints */
+ sched_foreach_reverse(block, node) {
+ if (is_Phi(node))
+ break;
+
+ congruence_def(&live_nodes, node);
+ be_liveness_transfer(cls, node, &live_nodes);
+ }
+
+ /* check phi congruence classes */
+ sched_foreach_reverse_from(node, node) {
+ int i;
+ int arity;
+ int node_idx;
+ assert(is_Phi(node));
+
+ if (!arch_irn_consider_in_reg_alloc(cls, node))
+ continue;
+
+ node_idx = get_irn_idx(node);
+ node_idx = uf_find(congruence_classes, node_idx);
+
+ arity = get_irn_arity(node);
+ for (i = 0; i < arity; ++i) {
+ bool interferes = false;
+ ir_nodeset_iterator_t iter;
+ ir_node *live;
+ ir_node *phi;
+ ir_node *op = get_Phi_pred(node, i);
+ int op_idx = get_irn_idx(op);
+ op_idx = uf_find(congruence_classes, op_idx);
+
+ /* do we interfere with the value */
+ foreach_ir_nodeset(&live_nodes, live, iter) {
+ int lv_idx = get_irn_idx(live);
+ lv_idx = uf_find(congruence_classes, lv_idx);
+ if (lv_idx == op_idx) {
+ interferes = true;
+ break;
+ }
+ }
+ /* don't put in same affinity class if we interfere */
+ if (interferes)
+ continue;
+ /* any other phi has the same input? */
+ sched_foreach(block, phi) {
+ ir_node *oop;
+ int oop_idx;
+ if (!is_Phi(phi))
+ break;
+ if (!arch_irn_consider_in_reg_alloc(cls, phi))
+ continue;
+ oop = get_Phi_pred(phi, i);
+ if (oop == op)
+ continue;
+ oop_idx = get_irn_idx(oop);
+ oop_idx = uf_find(congruence_classes, oop_idx);
+ if (oop_idx == op_idx) {
+ interferes = true;
+ break;
+ }
+ }
+ if (interferes)
+ continue;
+
+ node_idx = uf_union(congruence_classes, node_idx, op_idx);
+ DB((dbg, LEVEL_3, "Merge %+F and %+F congruence classes\n",
+ node, op));
}
- return;
}
}
uf_init(congruence_classes, n);
/* create congruence classes */
- irg_walk_graph(irg, create_congurence_class, NULL, NULL);
+ irg_block_walk_graph(irg, create_congurence_class, NULL, NULL);
/* merge preferences */
irg_walk_graph(irg, merge_congruence_prefs, NULL, NULL);
irg_walk_graph(irg, set_congruence_prefs, NULL, NULL);
+ free(congruence_classes);
}
static bool try_optimistic_split(ir_node *to_split, ir_node *before,
float pref, float pref_delta,
- unsigned *output_regs)
+ unsigned *forbidden_regs, int recursion)
{
+ const arch_register_t *from_reg;
const arch_register_t *reg;
- ir_node *insn;
+ ir_node *original_insn;
ir_node *block;
ir_node *copy;
unsigned r;
+ unsigned from_r;
unsigned i;
allocation_info_t *info = get_allocation_info(to_split);
reg_pref_t *prefs;
float delta;
+ float split_threshold;
(void) pref;
/* stupid hack: don't optimisticallt split don't spill nodes...
* (so we don't split away the values produced because of
* must_be_different constraints) */
- insn = skip_Proj(to_split);
- if (arch_irn_get_flags(insn) & arch_irn_flags_dont_spill)
+ original_insn = skip_Proj(info->original_value);
+ if (arch_irn_get_flags(original_insn) & arch_irn_flags_dont_spill)
+ return false;
+
+ from_reg = arch_get_irn_register(to_split);
+ from_r = arch_register_get_index(from_reg);
+ block = get_nodes_block(before);
+ split_threshold = get_block_execfreq(execfreqs, block) * SPLIT_DELTA;
+
+ if (pref_delta < split_threshold*0.5)
return false;
/* find the best free position where we could move to */
prefs = ALLOCAN(reg_pref_t, n_regs);
fill_sort_candidates(prefs, info);
for (i = 0; i < n_regs; ++i) {
+ float apref;
+ float apref_delta;
+ bool res;
+ bool old_source_state;
+
+ /* we need a normal register which is not an output register
+ an different from the current register of to_split */
r = prefs[i].num;
if (!rbitset_is_set(normal_regs, r))
continue;
- if (rbitset_is_set(output_regs, r))
+ if (rbitset_is_set(forbidden_regs, r))
+ continue;
+ if (r == from_r)
continue;
+
+ /* is the split worth it? */
+ delta = pref_delta + prefs[i].pref;
+ if (delta < split_threshold) {
+ DB((dbg, LEVEL_3, "Not doing optimistical split of %+F (depth %d), win %f too low\n",
+ to_split, recursion, delta));
+ return false;
+ }
+
+ /* if the register is free then we can do the split */
if (assignments[r] == NULL)
break;
+
+ /* otherwise we might try recursively calling optimistic_split */
+ if (recursion+1 > MAX_OPTIMISTIC_SPLIT_RECURSION)
+ continue;
+
+ apref = prefs[i].pref;
+ apref_delta = i+1 < n_regs ? apref - prefs[i+1].pref : 0;
+ apref_delta += pref_delta - split_threshold;
+
+ /* our source register isn't a usefull destination for recursive
+ splits */
+ old_source_state = rbitset_is_set(forbidden_regs, from_r);
+ rbitset_set(forbidden_regs, from_r);
+ /* try recursive split */
+ res = try_optimistic_split(assignments[r], before, apref,
+ apref_delta, forbidden_regs, recursion+1);
+ /* restore our destination */
+ if (old_source_state) {
+ rbitset_set(forbidden_regs, from_r);
+ } else {
+ rbitset_clear(forbidden_regs, from_r);
+ }
+
+ if (res)
+ break;
}
- if (i >= n_regs) {
- return false;
- }
- /* TODO: use execfreq somehow... */
- delta = pref_delta + prefs[i].pref;
- if (delta < SPLIT_DELTA) {
- DB((dbg, LEVEL_3, "Not doing optimistical split, win %f too low\n",
- delta));
+ if (i >= n_regs)
return false;
- }
- reg = arch_register_for_index(cls, r);
- block = get_nodes_block(before);
+ reg = arch_register_for_index(cls, r);
copy = be_new_Copy(cls, block, to_split);
mark_as_copy_of(copy, to_split);
- free_reg_of_value(to_split);
+ /* hacky, but correct here */
+ if (assignments[arch_register_get_index(from_reg)] == to_split)
+ free_reg_of_value(to_split);
use_reg(copy, reg);
sched_add_before(before, copy);
DB((dbg, LEVEL_3,
- "Optimistic live-range split %+F move %+F -> %s before %+F (win %f)\n",
- copy, to_split, reg->name, before, delta));
+ "Optimistic live-range split %+F move %+F(%s) -> %s before %+F (win %f, depth %d)\n",
+ copy, to_split, from_reg->name, reg->name, before, delta, recursion));
return true;
}
* Determine and assign a register for node @p node
*/
static void assign_reg(const ir_node *block, ir_node *node,
- unsigned *output_regs)
+ unsigned *forbidden_regs)
{
const arch_register_t *reg;
allocation_info_t *info;
const unsigned *allowed_regs;
unsigned r;
+ assert(!is_Phi(node));
assert(arch_irn_consider_in_reg_alloc(cls, node));
/* preassigned register? */
}
for (i = 0; i < n_regs; ++i) {
+ float pref, delta;
+ ir_node *before;
+ bool res;
+
r = reg_prefs[i].num;
if (!rbitset_is_set(allowed_regs, r))
continue;
if (assignments[r] == NULL)
break;
- if (!is_Phi(node)) {
- float pref = reg_prefs[i].pref;
- float delta = i+1 < n_regs ? pref - reg_prefs[i+1].pref : 0;
- ir_node *before = skip_Proj(node);
- bool res = try_optimistic_split(assignments[r], before,
- pref, delta,
- output_regs);
- if (res)
- break;
- }
+ pref = reg_prefs[i].pref;
+ delta = i+1 < n_regs ? pref - reg_prefs[i+1].pref : 0;
+ before = skip_Proj(node);
+ res = try_optimistic_split(assignments[r], before,
+ pref, delta, forbidden_regs, 0);
+ if (res)
+ break;
}
if (i >= n_regs) {
panic("No register left for %+F\n", node);
for (i = 0; i < arity; ++i) {
ir_node *op = get_irn_n(node, i);
- allocation_info_t *info;
+ allocation_info_t *info = try_get_allocation_info(op);
- if (!arch_irn_consider_in_reg_alloc(cls, op))
+ if (info == NULL)
continue;
- info = get_allocation_info(op);
info = get_allocation_info(info->original_value);
if (info->current_value != op) {
set_irn_n(node, i, info->current_value);
* @param node the current node
*/
static void enforce_constraints(ir_nodeset_t *live_nodes, ir_node *node,
- unsigned *output_regs)
+ unsigned *forbidden_regs)
{
int arity = get_irn_arity(node);
- int i, dummy, res;
+ int i, res;
hungarian_problem_t *bp;
unsigned l, r;
unsigned *assignment;
determine_live_through_regs(live_through_regs, node);
}
- rbitset_or(output_regs, req->limited, n_regs);
+ rbitset_or(forbidden_regs, req->limited, n_regs);
if (rbitsets_have_common(req->limited, live_through_regs, n_regs)) {
good = false;
}
determine_live_through_regs(live_through_regs, node);
if (rbitsets_have_common(req->limited, live_through_regs, n_regs)) {
good = false;
- rbitset_or(output_regs, req->limited, n_regs);
+ rbitset_or(forbidden_regs, req->limited, n_regs);
}
}
}
continue;
/* livethrough values may not use constrainted output registers */
if (rbitset_is_set(live_through_regs, l)
- && rbitset_is_set(output_regs, r))
+ && rbitset_is_set(forbidden_regs, r))
continue;
hungarian_add(bp, r, l, l == r ? 9 : 8);
}
}
- //hungarian_print_costmatrix(bp, 1);
+ //hungarian_print_cost_matrix(bp, 1);
hungarian_prepare_cost_matrix(bp, HUNGARIAN_MODE_MAXIMIZE_UTIL);
assignment = ALLOCAN(unsigned, n_regs);
- res = hungarian_solve(bp, (int*) assignment, &dummy, 0);
+ res = hungarian_solve(bp, (int*) assignment, NULL, 0);
assert(res == 0);
#if 0
if (!arch_irn_consider_in_reg_alloc(cls, op))
continue;
- a = find_value_in_block_info(pred_info, op);
+ a = find_value_in_block_info(pred_info, op);
assert(a >= 0);
reg = arch_get_irn_register(node);
* After a phi has been assigned a register propagate preference inputs
* to the phi inputs.
*/
-static void propagate_phi_register(ir_node *phi, unsigned r)
+static void propagate_phi_register(ir_node *phi, unsigned assigned_r)
{
- int i;
- ir_node *block = get_nodes_block(phi);
- int arity = get_irn_arity(phi);
+ int i;
+ ir_node *block = get_nodes_block(phi);
+ int arity = get_irn_arity(phi);
for (i = 0; i < arity; ++i) {
ir_node *op = get_Phi_pred(phi, i);
allocation_info_t *info = get_allocation_info(op);
ir_node *pred_block = get_Block_cfgpred_block(block, i);
+ unsigned r;
float weight
= get_block_execfreq(execfreqs, pred_block) * AFF_PHI;
- if (info->prefs[r] >= weight)
+ if (info->prefs[assigned_r] >= weight)
continue;
/* promote the prefered register */
- info->prefs[r] = AFF_PHI * weight;
+ for (r = 0; r < n_regs; ++r) {
+ if (info->prefs[r] > -weight) {
+ info->prefs[r] = -weight;
+ }
+ }
+ info->prefs[assigned_r] = weight;
+
if (is_Phi(op))
- propagate_phi_register(op, r);
+ propagate_phi_register(op, assigned_r);
+ }
+}
+
+static void assign_phi_registers(ir_node *block)
+{
+ int n_phis = 0;
+ int n;
+ int res;
+ int *assignment;
+ ir_node *node;
+ hungarian_problem_t *bp;
+
+ /* count phi nodes */
+ sched_foreach(block, node) {
+ if (!is_Phi(node))
+ break;
+ if (!arch_irn_consider_in_reg_alloc(cls, node))
+ continue;
+ ++n_phis;
+ }
+
+ if (n_phis == 0)
+ return;
+
+ /* build a bipartite matching problem for all phi nodes */
+ bp = hungarian_new(n_phis, n_regs, HUNGARIAN_MATCH_PERFECT);
+ n = 0;
+ sched_foreach(block, node) {
+ unsigned r;
+
+ allocation_info_t *info;
+ if (!is_Phi(node))
+ break;
+ if (!arch_irn_consider_in_reg_alloc(cls, node))
+ continue;
+
+ /* give boni for predecessor colorings */
+ adapt_phi_prefs(node);
+ /* add stuff to bipartite problem */
+ info = get_allocation_info(node);
+ DB((dbg, LEVEL_3, "Prefs for %+F: ", node));
+ for (r = 0; r < n_regs; ++r) {
+ float costs;
+
+ if (!rbitset_is_set(normal_regs, r))
+ continue;
+
+ costs = info->prefs[r];
+ costs = costs < 0 ? -logf(-costs+1) : logf(costs+1);
+ costs *= 100;
+ costs += 10000;
+ hungarian_add(bp, n, r, costs);
+ DB((dbg, LEVEL_3, " %s(%f)", arch_register_for_index(cls, r)->name,
+ info->prefs[r]));
+ }
+ DB((dbg, LEVEL_3, "\n"));
+ ++n;
+ }
+
+ //hungarian_print_cost_matrix(bp, 7);
+ hungarian_prepare_cost_matrix(bp, HUNGARIAN_MODE_MAXIMIZE_UTIL);
+
+ assignment = ALLOCAN(int, n_regs);
+ res = hungarian_solve(bp, assignment, NULL, 0);
+ assert(res == 0);
+
+ /* apply results */
+ n = 0;
+ sched_foreach(block, node) {
+ unsigned r;
+ const arch_register_t *reg;
+
+ if (!is_Phi(node))
+ break;
+ if (!arch_irn_consider_in_reg_alloc(cls, node))
+ continue;
+
+ r = assignment[n++];
+ assert(rbitset_is_set(normal_regs, r));
+ reg = arch_register_for_index(cls, r);
+ DB((dbg, LEVEL_2, "Assign %+F -> %s\n", node, reg->name));
+ use_reg(node, reg);
+
+ /* adapt preferences for phi inputs */
+ if (propagate_phi_registers)
+ propagate_phi_register(node, r);
}
}
{
int i;
ir_nodeset_t live_nodes;
- ir_nodeset_iterator_t iter;
- ir_node *node, *start;
+ ir_node *node;
int n_preds;
block_info_t *block_info;
block_info_t **pred_block_infos;
ir_node **phi_ins;
- unsigned *output_regs; /**< collects registers which must not
- be used for optimistic splits */
+ unsigned *forbidden_regs; /**< collects registers which must
+ not be used for optimistic splits */
(void) data;
DB((dbg, LEVEL_2, "* Block %+F\n", block));
ir_mode *mode = get_irn_mode(node);
const arch_register_req_t *req = get_default_req_current_cls();
ir_node *phi;
- int i;
phi = new_r_Phi(block, n_preds, phi_ins, mode);
be_set_phi_reg_req(phi, req);
DB((dbg, LEVEL_3, "Create Phi %+F (for %+F) -", phi, node));
#ifdef DEBUG_libfirm
- for (i = 0; i < n_preds; ++i) {
- DB((dbg, LEVEL_3, " %+F", phi_ins[i]));
+ {
+ int i;
+ for (i = 0; i < n_preds; ++i) {
+ DB((dbg, LEVEL_3, " %+F", phi_ins[i]));
+ }
+ DB((dbg, LEVEL_3, "\n"));
}
- DB((dbg, LEVEL_3, "\n"));
#endif
mark_as_copy_of(phi, node);
sched_add_after(block, phi);
/* if the node already has a register assigned use it */
reg = arch_get_irn_register(node);
if (reg != NULL) {
- /* TODO: consult pred-block infos here. The value could be copied
- away in some/all predecessor blocks. We need to construct
- phi-nodes in this case.
- We even need to construct some Phi_0 like constructs in cases
- where the predecessor allocation is not determined yet. */
use_reg(node, reg);
}
ir_nodeset_insert(&live_nodes, node);
}
- rbitset_alloca(output_regs, n_regs);
+ rbitset_alloca(forbidden_regs, n_regs);
/* handle phis... */
- node = sched_first(block);
- for ( ; is_Phi(node); node = sched_next(node)) {
- const arch_register_t *reg;
-
- if (!arch_irn_consider_in_reg_alloc(cls, node))
- continue;
-
- /* fill in regs already assigned */
- reg = arch_get_irn_register(node);
- if (reg != NULL) {
- use_reg(node, reg);
- } else {
- adapt_phi_prefs(node);
- assign_reg(block, node, output_regs);
+ assign_phi_registers(block);
- reg = arch_get_irn_register(node);
- propagate_phi_register(node, arch_register_get_index(reg));
+ /* all live-ins must have a register */
+#ifdef DEBUG_libfirm
+ {
+ ir_nodeset_iterator_t iter;
+ foreach_ir_nodeset(&live_nodes, node, iter) {
+ const arch_register_t *reg = arch_get_irn_register(node);
+ assert(reg != NULL);
}
}
- start = node;
-
- /* assign regs for live-in values */
- foreach_ir_nodeset(&live_nodes, node, iter) {
- const arch_register_t *reg = arch_get_irn_register(node);
- if (reg != NULL)
- continue;
-
- assign_reg(block, node, output_regs);
- /* shouldn't happen if we color in dominance order */
- assert (!is_Phi(node));
- }
+#endif
/* assign instructions in the block */
- for (node = start; !sched_is_end(node); node = sched_next(node)) {
- unsigned r;
+ sched_foreach(block, node) {
+ int i;
+ int arity;
+
+ /* phis are already assigned */
+ if (is_Phi(node))
+ continue;
rewire_inputs(node);
/* enforce use constraints */
- rbitset_clear_all(output_regs, n_regs);
- enforce_constraints(&live_nodes, node, output_regs);
- /* we may not use registers occupied here for optimistic splits */
- for (r = 0; r < n_regs; ++r) {
- if (assignments[r] != NULL)
- rbitset_set(output_regs, r);
- }
+ rbitset_clear_all(forbidden_regs, n_regs);
+ enforce_constraints(&live_nodes, node, forbidden_regs);
rewire_inputs(node);
+ /* we may not use registers used for inputs for optimistic splits */
+ arity = get_irn_arity(node);
+ for (i = 0; i < arity; ++i) {
+ ir_node *op = get_irn_n(node, i);
+ const arch_register_t *reg;
+ if (!arch_irn_consider_in_reg_alloc(cls, op))
+ continue;
+
+ reg = arch_get_irn_register(op);
+ rbitset_set(forbidden_regs, arch_register_get_index(reg));
+ }
+
/* free registers of values last used at this instruction */
free_last_uses(&live_nodes, node);
ir_node *proj = get_edge_src_irn(edge);
if (!arch_irn_consider_in_reg_alloc(cls, proj))
continue;
- assign_reg(block, proj, output_regs);
+ assign_reg(block, proj, forbidden_regs);
}
} else if (arch_irn_consider_in_reg_alloc(cls, node)) {
- assign_reg(block, node, output_regs);
+ assign_reg(block, node, forbidden_regs);
}
}
}
}
+typedef struct block_costs_t block_costs_t;
+struct block_costs_t {
+ float costs; /**< costs of the block */
+ int dfs_num; /**< depth first search number (to detect backedges) */
+};
+
+static int cmp_block_costs(const void *d1, const void *d2)
+{
+ const ir_node * const *block1 = d1;
+ const ir_node * const *block2 = d2;
+ const block_costs_t *info1 = get_irn_link(*block1);
+ const block_costs_t *info2 = get_irn_link(*block2);
+ return QSORT_CMP(info2->costs, info1->costs);
+}
+
+static void determine_block_order(void)
+{
+ int i;
+ ir_node **blocklist = be_get_cfgpostorder(irg);
+ int n_blocks = ARR_LEN(blocklist);
+ int dfs_num = 0;
+ pdeq *worklist = new_pdeq();
+ ir_node **order = XMALLOCN(ir_node*, n_blocks);
+ int order_p = 0;
+
+ /* clear block links... */
+ for (i = 0; i < n_blocks; ++i) {
+ ir_node *block = blocklist[i];
+ set_irn_link(block, NULL);
+ }
+
+ /* walk blocks in reverse postorder, the costs for each block are the
+ * sum of the costs of its predecessors (excluding the costs on backedges
+ * which we can't determine) */
+ for (i = n_blocks-1; i >= 0; --i) {
+ block_costs_t *cost_info;
+ ir_node *block = blocklist[i];
+
+ float execfreq = get_block_execfreq(execfreqs, block);
+ float costs = execfreq;
+ int n_cfgpreds = get_Block_n_cfgpreds(block);
+ int p;
+ for (p = 0; p < n_cfgpreds; ++p) {
+ ir_node *pred_block = get_Block_cfgpred_block(block, p);
+ block_costs_t *pred_costs = get_irn_link(pred_block);
+ /* we don't have any info for backedges */
+ if (pred_costs == NULL)
+ continue;
+ costs += pred_costs->costs;
+ }
+
+ cost_info = OALLOCZ(&obst, block_costs_t);
+ cost_info->costs = costs;
+ cost_info->dfs_num = dfs_num++;
+ set_irn_link(block, cost_info);
+ }
+
+ /* sort array by block costs */
+ qsort(blocklist, n_blocks, sizeof(blocklist[0]), cmp_block_costs);
+
+ ir_reserve_resources(irg, IR_RESOURCE_BLOCK_VISITED);
+ inc_irg_block_visited(irg);
+
+ for (i = 0; i < n_blocks; ++i) {
+ ir_node *block = blocklist[i];
+ if (Block_block_visited(block))
+ continue;
+
+ /* continually add predecessors with highest costs to worklist
+ * (without using backedges) */
+ do {
+ block_costs_t *info = get_irn_link(block);
+ ir_node *best_pred = NULL;
+ float best_costs = -1;
+ int n_cfgpred = get_Block_n_cfgpreds(block);
+ int i;
+
+ pdeq_putr(worklist, block);
+ mark_Block_block_visited(block);
+ for (i = 0; i < n_cfgpred; ++i) {
+ ir_node *pred_block = get_Block_cfgpred_block(block, i);
+ block_costs_t *pred_info = get_irn_link(pred_block);
+
+ /* ignore backedges */
+ if (pred_info->dfs_num > info->dfs_num)
+ continue;
+
+ if (info->costs > best_costs) {
+ best_costs = info->costs;
+ best_pred = pred_block;
+ }
+ }
+ block = best_pred;
+ } while(block != NULL && !Block_block_visited(block));
+
+ /* now put all nodes in the worklist in our final order */
+ while (!pdeq_empty(worklist)) {
+ ir_node *pblock = pdeq_getr(worklist);
+ assert(order_p < n_blocks);
+ order[order_p++] = pblock;
+ }
+ }
+ assert(order_p == n_blocks);
+ del_pdeq(worklist);
+
+ ir_free_resources(irg, IR_RESOURCE_BLOCK_VISITED);
+
+ DEL_ARR_F(blocklist);
+
+ obstack_free(&obst, NULL);
+ obstack_init(&obst);
+
+ block_order = order;
+ n_block_order = n_blocks;
+}
+
/**
* Run the register allocator for the current register class.
*/
static void be_straight_alloc_cls(void)
{
+ int i;
+
lv = be_assure_liveness(birg);
be_liveness_assure_sets(lv);
- be_liveness_assure_chk(lv);
ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK);
DB((dbg, LEVEL_2, "=== Allocating registers of %s ===\n", cls->name));
be_clear_links(irg);
+
irg_block_walk_graph(irg, NULL, analyze_block, NULL);
- combine_congruence_classes();
- /* we need some dominance pre-order walk to ensure we see all
- * definitions/create copies before we encounter their users */
- dom_tree_walk_irg(irg, allocate_coalesce_block, NULL, NULL);
+ if (create_congruence_classes)
+ combine_congruence_classes();
+
+ for (i = 0; i < n_block_order; ++i) {
+ ir_node *block = block_order[i];
+ allocate_coalesce_block(block, NULL);
+ }
ir_free_resources(irg, IR_RESOURCE_IRN_LINK);
}
irg = be_get_birg_irg(birg);
execfreqs = birg->exec_freq;
- /* TODO: extract some of the stuff from bechordal allocator, like
- * statistics, time measurements, etc. and use them here too */
+ /* determine a good coloring order */
+ determine_block_order();
for (c = 0; c < n_cls; ++c) {
cls = arch_env_get_reg_class(arch_env, c);
static be_ra_t be_ra_straight = {
be_straight_alloc,
};
-
- FIRM_DBG_REGISTER(dbg, "firm.be.straightalloc");
+ lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be");
+ lc_opt_entry_t *straightalloc_group = lc_opt_get_grp(be_grp, "straightalloc");
+ lc_opt_add_table(straightalloc_group, options);
be_register_allocator("straight", &be_ra_straight);
+ FIRM_DBG_REGISTER(dbg, "firm.be.straightalloc");
}
BE_REGISTER_MODULE_CONSTRUCTOR(be_init_straight_alloc);