* @file
* @brief Beladys spillalgorithm version 2.
* @author Daniel Grund, Matthias Braun, Sebastian Hack
- * @date 20.09.2005
+ * @date 01.08.2007
* @version $Id$
+ *
+ * The main differences to the original Belady are:
+ * - The workset is empty at the start of a block
+ * There is no attempt to fill it with variables which
+ * are not used in the block.
+ * - There is a global pass which tries to use the remaining
+ * capacity of the blocks to let global variables live through
+ * them.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <math.h>
+#include <limits.h>
#include "obst.h"
#include "irnodeset.h"
+#include "irbitset.h"
#include "irprintf_t.h"
#include "irgraph.h"
#include "irnode.h"
#include "irgwalk.h"
#include "irloop.h"
#include "iredges_t.h"
+#include "irphase_t.h"
#include "ircons_t.h"
#include "irprintf.h"
#include "execfreq.h"
#define DBG_TRACE 64
#define DBG_WORKSET 128
#define DBG_GLOBAL 256
-DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
+#define DEAD UINT_MAX
+DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
/**
* An association between a node and a point in time.
typedef struct _belady_env_t {
struct obstack ob;
+ ir_graph *irg;
const arch_env_t *arch;
const arch_register_class_t *cls;
be_lv_t *lv;
} belady_env_t;
-static int loc_compare_idx(const void *a, const void *b)
-{
- const loc_t *p = a;
- const loc_t *q = b;
- return (int) get_irn_idx(p->irn) - (int) get_irn_idx(q->irn);
-}
static int loc_compare(const void *a, const void *b)
{
const loc_t *p = a;
const loc_t *q = b;
- int diff = (int) p->time - (int) q->time;
- return diff != 0 ? diff : loc_compare_idx(a, b);
+ return (int) p->time - (int) q->time;
}
static INLINE void workset_print(const workset_t *w)
#define workset_sort(ws) qsort((ws)->vals, (ws)->len, sizeof((ws)->vals[0]), loc_compare);
#define workset_contains(ws, n) (workset_get_index(ws, n) >= 0)
-typedef struct _block_use_t {
- struct _block_use_t *next;
- ir_node *insn;
- int pos, tick;
-} block_use_t;
-
typedef struct _block_info_t {
belady_env_t *bel;
const ir_node *bl;
ir_node *first_non_in; /**< First node in block which is not a phi. */
workset_t *ws_start, *ws_end;
+ ir_phase next_uses;
workset_t *entrance_reg; /**< That set will contain all values
transported into the block which
bring them into the block in a register
or reload them at the entry of the block. */
- workset_t * entrance_mem; /**< That set will contain all variables
- (transported into the block)
- which are in the memory upon entering
- the block:
- entrance_reg U entrance_mem = live-in + local phis. */
-
int pressure; /**< The amount of registers which remain free
in this block. This capacity can be used to let
global variables, transported into other blocks,
double exec_freq; /**< The execution frequency of this block. */
} block_info_t;
-
static INLINE void *new_block_info(belady_env_t *bel, ir_node *bl) {
block_info_t *res = obstack_alloc(&bel->ob, sizeof(*res));
memset(res, 0, sizeof(res[0]));
#define get_block_info(block) ((block_info_t *)get_irn_link(block))
#define set_block_info(block, info) set_irn_link(block, info)
-/**
- * @return The distance to the next use or 0 if irn has dont_spill flag set
- */
-static INLINE unsigned get_distance(belady_env_t *env, ir_node *from, unsigned from_step, const ir_node *def, int skip_from_uses)
+typedef struct _next_use_t {
+ ir_node *user;
+ int step;
+ struct _next_use_t *next;
+} next_use_t;
+
+static void *next_use_init(ir_phase *phase, ir_node *irn, void *old)
{
- be_next_use_t use;
- int flags = arch_irn_get_flags(env->arch, def);
+ (void) phase;
+ (void) irn;
+ (void) old;
+ return NULL;
+}
+
+static void build_next_uses(block_info_t *bi)
+{
+ ir_node *irn;
- assert(! (flags & arch_irn_flags_ignore));
+ phase_init(&bi->next_uses, "next uses", bi->bel->irg, PHASE_DEFAULT_GROWTH, next_use_init, NULL);
+ sched_foreach_reverse(bi->bl, irn) {
+ int i, step = sched_get_time_step(irn);
- use = be_get_next_use(env->uses, from, from_step, def, skip_from_uses);
- if(USES_IS_INFINITE(use.time))
- return USES_INFINITY;
+ if (is_Phi(irn))
+ break;
- /* We have to keep nonspillable nodes in the workingset */
- if(flags & arch_irn_flags_dont_spill)
- return 0;
+ for (i = get_irn_arity(irn) - 1; i >= 0; --i) {
+ ir_node *op = get_irn_n(irn, i);
+ next_use_t *curr = phase_get_irn_data(&bi->next_uses, op);
+ next_use_t *use = phase_alloc(&bi->next_uses, sizeof(use[0]));
- return use.time;
+ assert(step >= 0);
+ use->user = irn;
+ use->step = step;
+ use->next = curr;
+ phase_set_irn_data(&bi->next_uses, op, use);
+ }
+ }
}
/**
/* Only make more free room if we do not have enough */
if (len > max_allowed) {
+ int curr_step = sched_get_time_step(env->instr);
/* get current next-use distance */
for (i = 0; i < ws->len; ++i) {
+ ir_node *val = workset_get_val(ws, i);
+ next_use_t *use = phase_get_irn_data(&bi->next_uses, val);
+ assert(use == NULL || use->step >= curr_step);
+ workset_set_time(ws, i, use ? (unsigned) (use->step - curr_step) : DEAD);
+
+#if 0
unsigned dist = get_distance(env, env->instr, env->instr_nr, workset_get_val(ws, i), !is_usage);
workset_set_time(ws, i, dist);
+#endif
}
/* sort entries by increasing nextuse-distance*/
int iter;
/* process the block from start to end */
- DBG((dbg, DBG_WSETS, "Processing...\n"));
+ DBG((dbg, DBG_WSETS, "Processing %+F...\n", block_info->bl));
env->instr_nr = 0;
new_vals = new_workset(env, &env->ob);
block_info->first_non_in = NULL;
+ build_next_uses(block_info);
+ workset_clear(env->ws);
sched_foreach(block, irn) {
int i, arity;
assert(workset_get_length(env->ws) <= env->n_regs && "Too much values in workset!");
}
displace(block_info, new_vals, 1);
+ block_info->pressure = MAX(block_info->pressure, workset_get_length(env->ws));
+
+ /*
+ * set all used variables to the next use in their next_use_t list
+ * Also, kill all dead variables from the workset. They are only
+ * augmenting the pressure. Note, that a variable is dead
+ * if it has no further use in this block and is *not* live end
+ */
+ for(i = 0, arity = get_irn_arity(irn); i < arity; ++i) {
+ ir_node *op = get_irn_n(irn, i);
+ next_use_t *use = phase_get_irn_data(&block_info->next_uses, op);
+
+ assert(use);
+ if (!use->next && !be_is_live_end(env->lv, block, op))
+ workset_remove(env->ws, op);
+
+ phase_set_irn_data(&block_info->next_uses, op, use->next);
+ }
+
/* allocate all values _defined_ by this instruction */
workset_clear(new_vals);
if (get_irn_mode(irn) == mode_T) { /* special handling for tuples and projs */
}
displace(block_info, new_vals, 0);
- block_info->pressure = MAX(block_info->pressure, workset_get_length(env->ws));
env->instr_nr++;
}
+ phase_free(&block_info->next_uses);
+
/* Remember end-workset for this block */
block_info->ws_end = workset_clone(env, &env->ob, env->ws);
DBG((dbg, DBG_WSETS, "End workset for %+F:\n", block));
typedef struct _global_end_state_t {
belady_env_t *env;
+ bitset_t *succ_phis;
struct obstack obst;
block_end_state_t *end_info;
unsigned gauge;
if (slot > 0) {
int gauge = ges->gauge;
- double reload_here = get_block_execfreq(bi->bel->ef, bl);
+ double reload_here = bi->exec_freq;
double bring_in = bi->pressure < n_regs ? can_bring_in(ges, bl, irn, level + 1) : HUGE_VAL;
DBG((dbg, DBG_GLOBAL, "\t%{firm:indent}there is a free slot. capacity=%d, reload here=%f, bring in=%f\n",
bes->live_through = 1;
bes->costs = bring_in;
}
+
+ end->vals[slot].irn = irn;
+ end->vals[slot].version = ges->version;
}
}
/* insert the reload if the val was reloaded at the block's end */
if (bes->reload_at_end) {
- be_add_reload(env->senv, bes->irn, bes->bl, env->cls, 1);
+ be_add_reload_at_end(env->senv, bes->irn, bes->bl, env->cls, 1);
DBG((dbg, DBG_GLOBAL, "\t\tadding reload of %+F at end of %+F\n", bes->irn, bes->bl));
}
* if the variable is live through the block,
* update the pressure indicator.
*/
- bi->pressure += bes->live_through;
+ bi->pressure = MAX(bi->pressure + bes->live_through, workset_get_length(bes->end_state));
idx = workset_get_index(bes->end_state, bes->irn);
/* process all variables which shall be in a reg at
* the beginning of the block in the order of the next use. */
workset_foreach(bi->entrance_reg, irn, i) {
+ int is_entrance_phi = is_Phi(irn) && get_nodes_block(irn) == block;
double bring_in_costs;
/* reset the gauge and begin the search */
/* we were not able to let the value arrive
* in a register at the entrance of the block
* so we have to do the reload locally */
- if (bring_in_costs >= HUGE_VAL) {
- if (is_Phi(irn) && get_nodes_block(irn) == block)
- be_spill_phi(env->senv, irn);
- else
- be_add_reload(env->senv, irn, bi->first_non_in, env->cls, 1);
+ if (bring_in_costs > bi->exec_freq) {
+ DBG((dbg, DBG_GLOBAL, "\tbring in: %f, local: %f -> doing reload at beginning\n",
+ bring_in_costs, bi->exec_freq));
+
+ be_add_reload(env->senv, irn, bi->first_non_in, env->cls, 1);
}
- else
+ else {
+ /*
+ * Mark this phi as succeeded.
+ * It was not replaced by a reload at the block's entrance
+ * and thus is not phi_spilled.
+ */
+ if (is_entrance_phi)
+ bitset_add_irn(ges->succ_phis, irn);
+
materialize_and_commit_end_state(ges);
+ }
}
}
int i;
obstack_init(&ges.obst);
- ges.gauge = 0;
- ges.env = env;
- ges.version = -1;
- ges.end_info = NEW_ARR_F(block_end_state_t, env->n_blocks);
+ ges.gauge = 0;
+ ges.env = env;
+ ges.version = -1;
+ ges.end_info = NEW_ARR_F(block_end_state_t, env->n_blocks);
+ ges.succ_phis = bitset_irg_obstack_alloc(&env->ob, env->irg);
/*
* sort the blocks according to execution frequency.
for (i = 0; i < env->n_blocks; ++i)
fix_block_borders(&ges, env->blocks[i]);
+
+ /*
+ * Now we spill phis which cannot be kept since they were replaced
+ * by reloads at the block entrances.
+ */
+ for (i = 0; i < env->n_blocks; ++i) {
+ ir_node *bl = env->blocks[i];
+ ir_node *irn;
+
+ sched_foreach(bl, irn) {
+ if (!is_Phi(irn))
+ break;
+
+ if (arch_irn_consider_in_reg_alloc(env->arch, env->cls, irn)
+ && !bitset_contains_irn(ges.succ_phis, irn))
+ be_spill_phi(env->senv, irn);
+ }
+ }
+
}
static void collect_blocks(ir_node *bl, void *data)
/* init belady env */
obstack_init(&env.ob);
+ env.irg = irg;
env.arch = birg->main_env->arch_env;
env.cls = cls;
env.lv = be_get_birg_liveness(birg);