/*
- * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
+ * Copyright (C) 1995-2011 University of Karlsruhe. All right reserved.
*
* This file is part of libFirm.
*
* @date 20.09.2005
* @version $Id$
*/
-#ifdef HAVE_CONFIG_H
#include "config.h"
-#endif
+
+#include <stdbool.h>
#include "obst.h"
#include "irprintf_t.h"
#include "ircons_t.h"
#include "irprintf.h"
#include "irnodeset.h"
-#include "xmalloc.h"
-#include "pdeq.h"
#include "beutil.h"
-#include "bearch_t.h"
+#include "bearch.h"
#include "beuses.h"
-#include "besched_t.h"
+#include "besched.h"
#include "beirgmod.h"
#include "belive_t.h"
-#include "benode_t.h"
+#include "benode.h"
#include "bechordal_t.h"
-#include "bespilloptions.h"
-#include "beloopana.h"
-#include "beirg_t.h"
#include "bespill.h"
+#include "beloopana.h"
+#include "beirg.h"
+#include "bespillutil.h"
#include "bemodule.h"
#define DBG_SPILL 1
#define DBG_WORKSET 128
DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
-/* factor to weight the different costs of reloading/rematerializing a node
- (see bespill.h be_get_reload_costs_no_weight) */
-#define RELOAD_COST_FACTOR 10
-
-typedef enum {
- value_not_reloaded, /* the value has not been reloaded */
- value_partially_reloaded, /* the value has been reloaded on some paths */
- value_reloaded /* the value has been reloaded on all paths */
-} reloaded_state_t;
+#define TIME_UNDEFINED 6666
/**
* An association between a node and a point in time.
typedef struct loc_t {
ir_node *node;
unsigned time; /**< A use time (see beuses.h). */
- reloaded_state_t reloaded; /**< the value is a reloaded value */
+ bool spilled; /**< value was already spilled on this path */
} loc_t;
-typedef struct _workset_t {
- int len; /**< current length */
- loc_t vals[0]; /**< inlined array of the values/distances in this working set */
+typedef struct workset_t {
+ unsigned len; /**< current length */
+ loc_t vals[0]; /**< array of the values/distances in this working set */
} workset_t;
static struct obstack obst;
-static const arch_env_t *arch_env;
static const arch_register_class_t *cls;
static const be_lv_t *lv;
static be_loopana_t *loop_ana;
-static int n_regs;
+static unsigned n_regs;
static workset_t *ws; /**< the main workset used while
processing a block. */
static be_uses_t *uses; /**< env for the next-use magic */
static ir_node *instr; /**< current instruction */
-static unsigned instr_nr; /**< current instruction number
- (relative to block start) */
-static ir_nodeset_t used;
static spill_env_t *senv; /**< see bespill.h */
-static pdeq *worklist;
-
-static int loc_compare(const void *a, const void *b)
-{
- const loc_t *p = a;
- const loc_t *q = b;
- return p->time - q->time;
-}
+static ir_node **blocklist;
-void workset_print(const workset_t *w)
-{
- int i;
+static int move_spills = true;
+static int respectloopdepth = true;
+static int improve_known_preds = true;
+/* factor to weight the different costs of reloading/rematerializing a node
+ (see bespill.h be_get_reload_costs_no_weight) */
+static int remat_bonus = 10;
- for(i = 0; i < w->len; ++i) {
- ir_fprintf(stderr, "%+F %d\n", w->vals[i].node, w->vals[i].time);
- }
-}
+static const lc_opt_table_entry_t options[] = {
+ LC_OPT_ENT_BOOL ("movespills", "try to move spills out of loops", &move_spills),
+ LC_OPT_ENT_BOOL ("respectloopdepth", "outermost loop cutting", &respectloopdepth),
+ LC_OPT_ENT_BOOL ("improveknownpreds", "known preds cutting", &improve_known_preds),
+ LC_OPT_ENT_INT ("rematbonus", "give bonus to rematerialisable nodes", &remat_bonus),
+ LC_OPT_LAST
+};
/**
* Alloc a new workset on obstack @p ob with maximum size @p max
*/
static workset_t *new_workset(void)
{
- workset_t *res;
- size_t size = sizeof(*res) + n_regs * sizeof(res->vals[0]);
-
- res = obstack_alloc(&obst, size);
- memset(res, 0, size);
- return res;
+ return OALLOCFZ(&obst, workset_t, vals, n_regs);
}
/**
*/
static workset_t *workset_clone(workset_t *workset)
{
- workset_t *res;
- size_t size = sizeof(*res) + n_regs * sizeof(res->vals[0]);
- res = obstack_alloc(&obst, size);
- memcpy(res, workset, size);
+ workset_t *res = OALLOCF(&obst, workset_t, vals, n_regs);
+ memcpy(res, workset, sizeof(*res) + n_regs * sizeof(res->vals[0]));
return res;
}
* Inserts the value @p val into the workset, iff it is not
* already contained. The workset must not be full.
*/
-static void workset_insert(workset_t *workset, ir_node *val, int reloaded)
+static void workset_insert(workset_t *workset, ir_node *val, bool spilled)
{
- loc_t *loc;
- int i;
+ loc_t *loc;
+ unsigned i;
/* check for current regclass */
- assert(arch_irn_consider_in_reg_alloc(arch_env, cls, val));
+ assert(arch_irn_consider_in_reg_alloc(cls, val));
/* check if val is already contained */
for (i = 0; i < workset->len; ++i) {
loc = &workset->vals[i];
if (loc->node == val) {
- if(!loc->reloaded) {
- loc->reloaded = reloaded;
+ if (spilled) {
+ loc->spilled = true;
}
return;
}
assert(workset->len < n_regs && "Workset already full!");
loc = &workset->vals[workset->len];
loc->node = val;
- loc->reloaded = reloaded;
- loc->time = 6666; /* undefined yet */
+ loc->spilled = spilled;
+ loc->time = TIME_UNDEFINED;
workset->len++;
}
/**
* Removes the value @p val from the workset if present.
*/
-static INLINE void workset_remove(workset_t *workset, ir_node *val)
+static void workset_remove(workset_t *workset, ir_node *val)
{
- int i;
- for(i = 0; i < workset->len; ++i) {
+ unsigned i;
+ for (i = 0; i < workset->len; ++i) {
if (workset->vals[i].node == val) {
workset->vals[i] = workset->vals[--workset->len];
return;
}
}
-static INLINE int workset_contains(const workset_t *ws, const ir_node *val)
+static const loc_t *workset_contains(const workset_t *ws, const ir_node *val)
{
- int i;
-
- for(i=0; i<ws->len; ++i) {
+ unsigned i;
+ for (i = 0; i < ws->len; ++i) {
if (ws->vals[i].node == val)
- return 1;
+ return &ws->vals[i];
}
- return 0;
+ return NULL;
+}
+
+static int loc_compare(const void *a, const void *b)
+{
+ const loc_t *p = (const loc_t*)a;
+ const loc_t *q = (const loc_t*)b;
+ return p->time - q->time;
+}
+
+static void workset_sort(workset_t *workset)
+{
+ qsort(workset->vals, workset->len, sizeof(workset->vals[0]), loc_compare);
+}
+
+static inline unsigned workset_get_time(const workset_t *workset, unsigned idx)
+{
+ return workset->vals[idx].time;
+}
+
+static inline void workset_set_time(workset_t *workset, unsigned idx,
+ unsigned time)
+{
+ workset->vals[idx].time = time;
+}
+
+static inline unsigned workset_get_length(const workset_t *workset)
+{
+ return workset->len;
+}
+
+static inline void workset_set_length(workset_t *workset, unsigned len)
+{
+ workset->len = len;
+}
+
+static inline ir_node *workset_get_val(const workset_t *workset, unsigned idx)
+{
+ return workset->vals[idx].node;
}
/**
* @p v A variable to put the current value in
* @p i An integer for internal use
*/
-#define workset_foreach(ws, v, i) for(i=0; \
- v=(i < ws->len) ? ws->vals[i].node : NULL, i < ws->len; \
- ++i)
-
-#define workset_set_time(ws, i, t) (ws)->vals[i].time=t
-#define workset_get_time(ws, i) (ws)->vals[i].time
-#define workset_set_length(ws, length) (ws)->len = length
-#define workset_get_length(ws) ((ws)->len)
-#define workset_get_val(ws, i) ((ws)->vals[i].node)
-#define workset_sort(ws) qsort((ws)->vals, (ws)->len, sizeof((ws)->vals[0]), loc_compare);
-
-typedef struct _block_info_t
-{
+#define workset_foreach(ws, v, i) \
+ for (i=0; v=(i < ws->len) ? ws->vals[i].node : NULL, i < ws->len; ++i)
+
+typedef struct block_info_t {
workset_t *start_workset;
workset_t *end_workset;
} block_info_t;
-
-static void *new_block_info(void)
+static block_info_t *new_block_info(void)
{
- block_info_t *res = obstack_alloc(&obst, sizeof(res[0]));
- memset(res, 0, sizeof(res[0]));
+ return OALLOCZ(&obst, block_info_t);
+}
- return res;
+static inline block_info_t *get_block_info(const ir_node *block)
+{
+ return (block_info_t*)get_irn_link(block);
}
-#define get_block_info(block) ((block_info_t *)get_irn_link(block))
-#define set_block_info(block, info) set_irn_link(block, info)
+static inline void set_block_info(ir_node *block, block_info_t *info)
+{
+ set_irn_link(block, info);
+}
/**
* @return The distance to the next use or 0 if irn has dont_spill flag set
*/
-static INLINE unsigned get_distance(ir_node *from, unsigned from_step,
- const ir_node *def, int skip_from_uses)
+static unsigned get_distance(ir_node *from, const ir_node *def, int skip_from_uses)
{
be_next_use_t use;
- int flags = arch_irn_get_flags(arch_env, def);
unsigned costs;
unsigned time;
- assert(! (flags & arch_irn_flags_ignore));
+ assert(!arch_irn_is_ignore(def));
- use = be_get_next_use(uses, from, from_step, def, skip_from_uses);
- if(USES_IS_INFINITE(use.time))
+ use = be_get_next_use(uses, from, def, skip_from_uses);
+ time = use.time;
+ if (USES_IS_INFINITE(time))
return USES_INFINITY;
/* We have to keep nonspillable nodes in the workingset */
- if(flags & arch_irn_flags_dont_spill)
+ if (arch_irn_get_flags(skip_Proj_const(def)) & arch_irn_flags_dont_spill)
return 0;
- costs = be_get_reload_costs_no_weight(senv, def, use.before);
- assert(costs * RELOAD_COST_FACTOR < 1000);
- time = use.time + 1000 - (costs * RELOAD_COST_FACTOR);
+ /* give some bonus to rematerialisable nodes */
+ if (remat_bonus > 0) {
+ costs = be_get_reload_costs_no_weight(senv, def, use.before);
+ assert(costs * remat_bonus < 1000);
+ time += 1000 - (costs * remat_bonus);
+ }
return time;
}
*/
static void displace(workset_t *new_vals, int is_usage)
{
- ir_node **to_insert = alloca(n_regs * sizeof(to_insert[0]));
+ ir_node **to_insert = ALLOCAN(ir_node*, n_regs);
+ bool *spilled = ALLOCAN(bool, n_regs);
ir_node *val;
int i;
int len;
int spills_needed;
int demand;
- int iter;
+ unsigned iter;
/* 1. Identify the number of needed slots and the values to reload */
demand = 0;
workset_foreach(new_vals, val, iter) {
- /* mark value as used */
- if (is_usage)
- ir_nodeset_insert(&used, val);
+ bool reloaded = false;
if (! workset_contains(ws, val)) {
DB((dbg, DBG_DECIDE, " insert %+F\n", val));
if (is_usage) {
DB((dbg, DBG_SPILL, "Reload %+F before %+F\n", val, instr));
be_add_reload(senv, val, instr, cls, 1);
+ reloaded = true;
}
} else {
DB((dbg, DBG_DECIDE, " %+F already in workset\n", val));
* spilled */
workset_remove(ws, val);
}
- to_insert[demand++] = val;
+ spilled[demand] = reloaded;
+ to_insert[demand] = val;
+ ++demand;
}
/* 2. Make room for at least 'demand' slots */
/* Only make more free room if we do not have enough */
if (spills_needed > 0) {
- ir_node *curr_bb = get_nodes_block(instr);
- workset_t *ws_start = get_block_info(curr_bb)->start_workset;
-
DB((dbg, DBG_DECIDE, " disposing %d values\n", spills_needed));
/* calculate current next-use distance for live values */
for (i = 0; i < len; ++i) {
ir_node *val = workset_get_val(ws, i);
- unsigned dist = get_distance(instr, instr_nr, val, !is_usage);
+ unsigned dist = get_distance(instr, val, !is_usage);
workset_set_time(ws, i, dist);
}
/* sort entries by increasing nextuse-distance*/
workset_sort(ws);
- /* Logic for not needed live-ins: If a value is disposed
- * before its first usage, remove it from start workset
- * We don't do this for phis though */
for (i = len - spills_needed; i < len; ++i) {
ir_node *val = ws->vals[i].node;
DB((dbg, DBG_DECIDE, " disposing node %+F (%u)\n", val,
workset_get_time(ws, i)));
- if(!USES_IS_INFINITE(ws->vals[i].time)
- && !ws->vals[i].reloaded) {
- //be_add_spill(senv, val, instr);
- }
-
- if (!is_Phi(val) && ! ir_nodeset_contains(&used, val)) {
- workset_remove(ws_start, val);
- DB((dbg, DBG_DECIDE, " (and removing %+F from start workset)\n", val));
+ if (move_spills) {
+ if (!USES_IS_INFINITE(ws->vals[i].time)
+ && !ws->vals[i].spilled) {
+ ir_node *after_pos = sched_prev(instr);
+ DB((dbg, DBG_DECIDE, "Spill %+F after node %+F\n", val,
+ after_pos));
+ be_add_spill(senv, val, after_pos);
+ }
}
}
for (i = 0; i < demand; ++i) {
ir_node *val = to_insert[i];
- workset_insert(ws, val, 1);
+ workset_insert(ws, val, spilled[i]);
+ }
+}
+
+enum {
+ AVAILABLE_EVERYWHERE,
+ AVAILABLE_NOWHERE,
+ AVAILABLE_PARTLY,
+ AVAILABLE_UNKNOWN
+};
+
+static unsigned available_in_all_preds(workset_t* const* pred_worksets,
+ size_t n_pred_worksets,
+ const ir_node *value, bool is_local_phi)
+{
+ size_t i;
+ bool avail_everywhere = true;
+ bool avail_nowhere = true;
+
+ assert(n_pred_worksets > 0);
+
+ /* value available in all preds? */
+ for (i = 0; i < n_pred_worksets; ++i) {
+ bool found = false;
+ const workset_t *p_workset = pred_worksets[i];
+ int p_len = workset_get_length(p_workset);
+ int p_i;
+ const ir_node *l_value;
+
+ if (is_local_phi) {
+ assert(is_Phi(value));
+ l_value = get_irn_n(value, i);
+ } else {
+ l_value = value;
+ }
+
+ for (p_i = 0; p_i < p_len; ++p_i) {
+ const loc_t *p_l = &p_workset->vals[p_i];
+ if (p_l->node != l_value)
+ continue;
+
+ found = true;
+ break;
+ }
+
+ if (found) {
+ avail_nowhere = false;
+ } else {
+ avail_everywhere = false;
+ }
+ }
+
+ if (avail_everywhere) {
+ assert(!avail_nowhere);
+ return AVAILABLE_EVERYWHERE;
+ } else if (avail_nowhere) {
+ return AVAILABLE_NOWHERE;
+ } else {
+ return AVAILABLE_PARTLY;
}
}
* @param loop the loop of the node
*/
static loc_t to_take_or_not_to_take(ir_node* first, ir_node *node,
- ir_loop *loop)
+ ir_loop *loop, unsigned available)
{
be_next_use_t next_use;
loc_t loc;
- loc.time = USES_INFINITY;
- loc.node = node;
- //loc.reloaded = rand() % 2; /* provoke a bug... */
- loc.reloaded = 0;
+ loc.time = USES_INFINITY;
+ loc.node = node;
+ loc.spilled = false;
- if (!arch_irn_consider_in_reg_alloc(arch_env, cls, node)) {
+ if (!arch_irn_consider_in_reg_alloc(cls, node)) {
loc.time = USES_INFINITY;
return loc;
}
/* We have to keep nonspillable nodes in the workingset */
- if(arch_irn_get_flags(arch_env, node) & arch_irn_flags_dont_spill) {
+ if (arch_irn_get_flags(skip_Proj_const(node)) & arch_irn_flags_dont_spill) {
loc.time = 0;
DB((dbg, DBG_START, " %+F taken (dontspill node)\n", node, loc.time));
return loc;
}
- next_use = be_get_next_use(uses, first, 0, node, 0);
- if(USES_IS_INFINITE(next_use.time)) {
- // the nodes marked as live in shouldn't be dead, so it must be a phi
+ next_use = be_get_next_use(uses, first, node, 0);
+ if (USES_IS_INFINITE(next_use.time)) {
+ /* the nodes marked as live in shouldn't be dead, so it must be a phi */
assert(is_Phi(node));
loc.time = USES_INFINITY;
DB((dbg, DBG_START, " %+F not taken (dead)\n", node));
- if(is_Phi(node)) {
- be_spill_phi(senv, node);
- }
return loc;
}
loc.time = next_use.time;
- if(next_use.outermost_loop >= get_loop_depth(loop)) {
+ if (improve_known_preds) {
+ if (available == AVAILABLE_EVERYWHERE) {
+ DB((dbg, DBG_START, " %+F taken (%u, live in all preds)\n",
+ node, loc.time));
+ return loc;
+ } else if (available == AVAILABLE_NOWHERE) {
+ DB((dbg, DBG_START, " %+F not taken (%u, live in no pred)\n",
+ node, loc.time));
+ loc.time = USES_INFINITY;
+ return loc;
+ }
+ }
+
+ if (!respectloopdepth || next_use.outermost_loop >= get_loop_depth(loop)) {
DB((dbg, DBG_START, " %+F taken (%u, loop %d)\n", node, loc.time,
next_use.outermost_loop));
} else {
DB((dbg, DBG_START, " %+F delayed (outerdepth %d < loopdepth %d)\n",
node, next_use.outermost_loop, get_loop_depth(loop)));
}
+
return loc;
}
* beginning of a loop. We try to reload as much values as possible now so they
* don't get reloaded inside the loop.
*/
-static void compute_live_ins(const ir_node *block)
+static void decide_start_workset(const ir_node *block)
{
ir_loop *loop = get_irn_loop(block);
ir_node *first;
loc_t loc;
loc_t *starters;
loc_t *delayed;
- int i, len, ws_count;
- int free_slots, free_pressure_slots;
+ unsigned len;
+ unsigned i;
+ int in;
+ unsigned ws_count;
+ int free_slots, free_pressure_slots;
unsigned pressure;
- //int arity;
- //int n_pred_worksets;
- //workset_t **pred_worksets;
+ int arity;
+ workset_t **pred_worksets;
+ bool all_preds_known;
+
+ /* check predecessors */
+ arity = get_irn_arity(block);
+ pred_worksets = ALLOCAN(workset_t*, arity);
+ all_preds_known = true;
+ for (in = 0; in < arity; ++in) {
+ ir_node *pred_block = get_Block_cfgpred_block(block, in);
+ block_info_t *pred_info = get_block_info(pred_block);
+
+ if (pred_info == NULL) {
+ pred_worksets[in] = NULL;
+ all_preds_known = false;
+ } else {
+ pred_worksets[in] = pred_info->end_workset;
+ }
+ }
/* Collect all values living at start of block */
starters = NEW_ARR_F(loc_t, 0);
/* check all Phis first */
sched_foreach(block, node) {
+ unsigned available;
+
if (! is_Phi(node))
break;
+ if (!arch_irn_consider_in_reg_alloc(cls, node))
+ continue;
+
+ if (all_preds_known) {
+ available = available_in_all_preds(pred_worksets, arity, node, true);
+ } else {
+ available = AVAILABLE_UNKNOWN;
+ }
- loc = to_take_or_not_to_take(first, node, loop);
+ loc = to_take_or_not_to_take(first, node, loop, available);
if (! USES_IS_INFINITE(loc.time)) {
if (USES_IS_PENDING(loc.time))
ARR_APP1(loc_t, delayed, loc);
else
ARR_APP1(loc_t, starters, loc);
+ } else {
+ be_spill_phi(senv, node);
}
}
/* check all Live-Ins */
- be_lv_foreach(lv, block, be_lv_state_in, i) {
- ir_node *node = be_lv_get_irn(lv, block, i);
+ be_lv_foreach(lv, block, be_lv_state_in, in) {
+ ir_node *node = be_lv_get_irn(lv, block, in);
+ unsigned available;
- loc = to_take_or_not_to_take(first, node, loop);
+ if (all_preds_known) {
+ available = available_in_all_preds(pred_worksets, arity, node, false);
+ } else {
+ available = AVAILABLE_UNKNOWN;
+ }
+
+ loc = to_take_or_not_to_take(first, node, loop, available);
if (! USES_IS_INFINITE(loc.time)) {
if (USES_IS_PENDING(loc.time))
}
pressure = be_get_loop_pressure(loop_ana, cls, loop);
- assert(ARR_LEN(delayed) <= (signed)pressure);
+ assert(ARR_LEN(delayed) <= pressure);
free_slots = n_regs - ARR_LEN(starters);
free_pressure_slots = n_regs - (pressure - ARR_LEN(delayed));
free_slots = MIN(free_slots, free_pressure_slots);
/* so far we only put nodes into the starters list that are used inside
* the loop. If register pressure in the loop is low then we can take some
* values and let them live through the loop */
+ DB((dbg, DBG_START, "Loop pressure %d, taking %d delayed vals\n",
+ pressure, free_slots));
if (free_slots > 0) {
+ size_t i;
+
qsort(delayed, ARR_LEN(delayed), sizeof(delayed[0]), loc_compare);
- for (i = 0; i < ARR_LEN(delayed) && i < free_slots; ++i) {
+ for (i = 0; i < ARR_LEN(delayed) && free_slots > 0; ++i) {
int p, arity;
loc_t *loc = & delayed[i];
- /* don't use values which are dead in a known predecessors
- * to not induce unnecessary reloads */
- arity = get_irn_arity(block);
- for (p = 0; p < arity; ++p) {
- ir_node *pred_block = get_Block_cfgpred_block(block, p);
- block_info_t *pred_info = get_block_info(pred_block);
-
- if (pred_info == NULL)
- continue;
-
- if (!workset_contains(pred_info->end_workset, loc->node)) {
- DB((dbg, DBG_START,
- " delayed %+F not live at pred %+F\n", loc->node,
- pred_block));
- goto skip_delayed;
+ if (!is_Phi(loc->node)) {
+ /* don't use values which are dead in a known predecessors
+ * to not induce unnecessary reloads */
+ arity = get_irn_arity(block);
+ for (p = 0; p < arity; ++p) {
+ ir_node *pred_block = get_Block_cfgpred_block(block, p);
+ block_info_t *pred_info = get_block_info(pred_block);
+
+ if (pred_info == NULL)
+ continue;
+
+ if (!workset_contains(pred_info->end_workset, loc->node)) {
+ DB((dbg, DBG_START,
+ " delayed %+F not live at pred %+F\n", loc->node,
+ pred_block));
+ goto skip_delayed;
+ }
}
}
DB((dbg, DBG_START, " delayed %+F taken\n", loc->node));
ARR_APP1(loc_t, starters, *loc);
loc->node = NULL;
+ --free_slots;
skip_delayed:
;
}
/* spill phis (the actual phis not just their values) that are in this block
* but not in the start workset */
- for (i = ARR_LEN(delayed) - 1; i >= 0; --i) {
+ len = ARR_LEN(delayed);
+ for (i = 0; i < len; ++i) {
ir_node *node = delayed[i].node;
- if(node == NULL || !is_Phi(node) || get_nodes_block(node) != block)
+ if (node == NULL || !is_Phi(node) || get_nodes_block(node) != block)
continue;
DB((dbg, DBG_START, " spilling delayed phi %+F\n", node));
qsort(starters, ARR_LEN(starters), sizeof(starters[0]), loc_compare);
/* Copy the best ones from starters to start workset */
- ws_count = MIN(ARR_LEN(starters), n_regs);
+ ws_count = MIN((unsigned) ARR_LEN(starters), n_regs);
workset_clear(ws);
workset_bulk_fill(ws, ws_count, starters);
DEL_ARR_F(starters);
-#if 0
- /* determine reloaded status of the values: If there's 1 pred block (which
- * is no backedge) where the value is reloaded then we must set it to
- * reloaded here. We place spills in all pred where the value was not yet
- * reloaded to be sure we have a spill on each path */
- n_pred_worksets = 0;
- arity = get_irn_arity(block);
- pred_worksets = alloca(sizeof(pred_worksets[0]) * arity);
- for(i = 0; i < arity; ++i) {
- ir_node *pred_block = get_Block_cfgpred_block(block, i);
- block_info_t *pred_info = get_block_info(pred_block);
- if(pred_info == NULL)
- continue;
-
- pred_worksets[n_pred_worksets] = pred_info->end_workset;
- ++n_pred_worksets;
- }
-
- for(i = 0; i < ws_count; ++i) {
- loc_t *loc = &ws->vals[i];
- ir_node *value = loc->node;
- int reloaded;
+ /* determine spill status of the values: If there's 1 pred block (which
+ * is no backedge) where the value is spilled then we must set it to
+ * spilled here. */
+ for (i = 0; i < ws_count; ++i) {
+ loc_t *loc = &ws->vals[i];
+ ir_node *value = loc->node;
+ bool spilled;
int n;
- /* phis from this block aren't reloaded */
- if(get_nodes_block(value) == block) {
+ /* phis from this block aren't spilled */
+ if (get_nodes_block(value) == block) {
assert(is_Phi(value));
- loc->reloaded = value_not_reloaded;
+ loc->spilled = false;
continue;
}
- /* was the value reloaded on any of the other inputs */
- reloaded = 0;
- arity = get_Block_n_cfgpreds(block);
- for(n = 0; n < n_pred_worksets; ++n) {
+ /* determine if value was spilled on any predecessor */
+ spilled = false;
+ for (n = 0; n < arity; ++n) {
workset_t *pred_workset = pred_worksets[n];
- int p_len = workset_get_length(pred_workset);
+ int p_len;
int p;
- for(p = 0; p < p_len; ++p) {
+ if (pred_workset == NULL)
+ continue;
+
+ p_len = workset_get_length(pred_workset);
+ for (p = 0; p < p_len; ++p) {
loc_t *l = &pred_workset->vals[p];
- if(l->node == value) {
- if(l->reloaded) {
- reloaded = 1;
- }
- break;
+
+ if (l->node != value)
+ continue;
+
+ if (l->spilled) {
+ spilled = true;
}
- }
- if(p >= p_len) {
- reloaded = 1;
break;
}
}
+
+ loc->spilled = spilled;
}
-#endif
}
/**
* whether it is used from a register or is reloaded
* before the use.
*/
-static void belady(ir_node *block)
+static void process_block(ir_node *block)
{
- workset_t *new_vals;
- ir_node *irn;
- int iter;
- block_info_t *block_info;
- int i, arity;
- int has_backedges = 0;
- //int first = 0;
- const ir_edge_t *edge;
+ workset_t *new_vals;
+ ir_node *irn;
+ unsigned iter;
+ block_info_t *block_info;
+ int arity;
/* no need to process a block twice */
- if(get_block_info(block) != NULL) {
- return;
- }
+ assert(get_block_info(block) == NULL);
- /* check if all predecessor blocks are processed yet (though for backedges
- * we have to make an exception as we can't process them first) */
+ /* construct start workset */
arity = get_Block_n_cfgpreds(block);
- for(i = 0; i < arity; ++i) {
- ir_node *pred_block = get_Block_cfgpred_block(block, i);
- block_info_t *pred_info = get_block_info(pred_block);
-
- if(pred_info == NULL) {
- /* process predecessor first (it will be in the queue already) */
- if(!is_backedge(block, i)) {
- return;
- }
- has_backedges = 1;
- }
- }
- (void) has_backedges;
- if(arity == 0) {
+ if (arity == 0) {
+ /* no predecessor -> empty set */
workset_clear(ws);
- } else if(arity == 1) {
+ } else if (arity == 1) {
+ /* one predecessor, copy its end workset */
ir_node *pred_block = get_Block_cfgpred_block(block, 0);
block_info_t *pred_info = get_block_info(pred_block);
assert(pred_info != NULL);
workset_copy(ws, pred_info->end_workset);
} else {
- /* we need 2 heuristics here, for the case when all predecessor blocks
- * are known and when some are backedges (and therefore can't be known
- * yet) */
- compute_live_ins(block);
+ /* multiple predecessors, do more advanced magic :) */
+ decide_start_workset(block);
}
DB((dbg, DBG_DECIDE, "\n"));
/* process the block from start to end */
DB((dbg, DBG_WSETS, "Processing...\n"));
- ir_nodeset_init(&used);
- instr_nr = 0;
/* TODO: this leaks (into the obstack)... */
new_vals = new_workset();
sched_foreach(block, irn) {
int i, arity;
+ ir_node *value;
assert(workset_get_length(ws) <= n_regs);
/* Phis are no real instr (see insert_starters()) */
/* allocate all values _used_ by this instruction */
workset_clear(new_vals);
- for(i = 0, arity = get_irn_arity(irn); i < arity; ++i) {
+ for (i = 0, arity = get_irn_arity(irn); i < arity; ++i) {
ir_node *in = get_irn_n(irn, i);
- if (!arch_irn_consider_in_reg_alloc(arch_env, cls, in))
+ if (!arch_irn_consider_in_reg_alloc(cls, in))
continue;
- /* (note that reloaded_value is irrelevant here) */
- workset_insert(new_vals, in, 0);
+ /* (note that "spilled" is irrelevant here) */
+ workset_insert(new_vals, in, false);
}
displace(new_vals, 1);
/* allocate all values _defined_ by this instruction */
workset_clear(new_vals);
- if (get_irn_mode(irn) == mode_T) {
- const ir_edge_t *edge;
-
- foreach_out_edge(irn, edge) {
- ir_node *proj = get_edge_src_irn(edge);
- if (!arch_irn_consider_in_reg_alloc(arch_env, cls, proj))
- continue;
- workset_insert(new_vals, proj, 0);
- }
- } else {
- if (!arch_irn_consider_in_reg_alloc(arch_env, cls, irn))
- continue;
- workset_insert(new_vals, irn, 0);
- }
+ be_foreach_definition(irn, cls, value,
+ assert(req_->width == 1);
+ workset_insert(new_vals, value, false);
+ );
displace(new_vals, 0);
-
- instr_nr++;
}
- ir_nodeset_destroy(&used);
/* Remember end-workset for this block */
block_info->end_workset = workset_clone(ws);
DB((dbg, DBG_WSETS, "End workset for %+F:\n", block));
workset_foreach(ws, irn, iter)
- DB((dbg, DBG_WSETS, " %+F (%u)\n", irn,
- workset_get_time(ws, iter)));
-
- /* add successor blocks into worklist */
- foreach_block_succ(block, edge) {
- ir_node *succ = get_edge_src_irn(edge);
- pdeq_putr(worklist, succ);
- }
+ DB((dbg, DBG_WSETS, " %+F (%u)\n", irn, workset_get_time(ws, iter)));
}
/**
*/
static void fix_block_borders(ir_node *block, void *data)
{
- workset_t *start_workset;
- int arity;
- int i;
- int iter;
+ workset_t *start_workset;
+ int arity;
+ int i;
+ unsigned iter;
(void) data;
DB((dbg, DBG_FIX, "\n"));
DB((dbg, DBG_FIX, "Fixing %+F\n", block));
+ arity = get_irn_arity(block);
+ /* can happen for endless loops */
+ if (arity == 0)
+ return;
+
start_workset = get_block_info(block)->start_workset;
/* process all pred blocks */
- arity = get_irn_arity(block);
for (i = 0; i < arity; ++i) {
ir_node *pred = get_Block_cfgpred_block(block, i);
workset_t *pred_end_workset = get_block_info(pred)->end_workset;
/* spill all values not used anymore */
workset_foreach(pred_end_workset, node, iter) {
ir_node *n2;
- int iter2;
- int found = 0;
+ unsigned iter2;
+ bool found = false;
workset_foreach(start_workset, n2, iter2) {
- if(n2 == node) {
- found = 1;
+ if (n2 == node) {
+ found = true;
break;
}
/* note that we do not look at phi inputs, becuase the values
* workset */
}
-#if 0
- if(!found && be_is_live_out(lv, pred, node)
- && !pred_end_workset->vals[iter].reloaded) {
- ir_node *insert_point
- = be_get_end_of_block_insertion_point(pred);
- DB((dbg, DBG_SPILL, "Spill %+F before %+F\n", node,
+ if (found)
+ continue;
+
+ if (move_spills && be_is_live_in(lv, block, node)
+ && !pred_end_workset->vals[iter].spilled) {
+ ir_node *insert_point;
+ if (arity > 1) {
+ insert_point = be_get_end_of_block_insertion_point(pred);
+ insert_point = sched_prev(insert_point);
+ } else {
+ insert_point = block;
+ }
+ DB((dbg, DBG_SPILL, "Spill %+F after %+F\n", node,
insert_point));
be_add_spill(senv, node, insert_point);
}
-#endif
}
- /* reload missing values in predecessors */
+ /* reload missing values in predecessors, add missing spills */
workset_foreach(start_workset, node, iter) {
+ const loc_t *l = &start_workset->vals[iter];
+ const loc_t *pred_loc;
+
/* if node is a phi of the current block we reload
* the corresponding argument, else node itself */
- if(is_Phi(node) && block == get_nodes_block(node)) {
+ if (is_Phi(node) && get_nodes_block(node) == block) {
node = get_irn_n(node, i);
+ assert(!l->spilled);
/* we might have unknowns as argument for the phi */
- if(!arch_irn_consider_in_reg_alloc(arch_env, cls, node))
+ if (!arch_irn_consider_in_reg_alloc(cls, node))
continue;
}
/* check if node is in a register at end of pred */
- if(workset_contains(pred_end_workset, node))
- continue;
-
- /* node is not in memory at the end of pred -> reload it */
- DB((dbg, DBG_FIX, " reload %+F\n", node));
- DB((dbg, DBG_SPILL, "Reload %+F before %+F,%d\n", node, block, i));
- be_add_reload_on_edge(senv, node, block, i, cls, 1);
+ pred_loc = workset_contains(pred_end_workset, node);
+ if (pred_loc != NULL) {
+ /* we might have to spill value on this path */
+ if (move_spills && !pred_loc->spilled && l->spilled) {
+ ir_node *insert_point
+ = be_get_end_of_block_insertion_point(pred);
+ insert_point = sched_prev(insert_point);
+ DB((dbg, DBG_SPILL, "Spill %+F after %+F\n", node,
+ insert_point));
+ be_add_spill(senv, node, insert_point);
+ }
+ } else {
+ /* node is not in register at the end of pred -> reload it */
+ DB((dbg, DBG_FIX, " reload %+F\n", node));
+ DB((dbg, DBG_SPILL, "Reload %+F before %+F,%d\n", node, block, i));
+ be_add_reload_on_edge(senv, node, block, i, cls, 1);
+ }
}
}
}
-static void be_spill_belady(be_irg_t *birg, const arch_register_class_t *rcls)
+static void be_spill_belady(ir_graph *irg, const arch_register_class_t *rcls)
{
- ir_graph *irg = be_get_birg_irg(birg);
+ int i;
- be_liveness_assure_sets(be_assure_liveness(birg));
+ be_liveness_assure_sets(be_assure_liveness(irg));
+ stat_ev_tim_push();
/* construct control flow loop tree */
- if(! (get_irg_loopinfo_state(irg) & loopinfo_cf_consistent)) {
+ if (! (get_irg_loopinfo_state(irg) & loopinfo_cf_consistent)) {
construct_cf_backedges(irg);
}
+ stat_ev_tim_pop("belady_time_backedges");
+ stat_ev_tim_push();
be_clear_links(irg);
+ stat_ev_tim_pop("belady_time_clear_links");
+
+ ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK);
/* init belady env */
+ stat_ev_tim_push();
obstack_init(&obst);
- arch_env = birg->main_env->arch_env;
- cls = rcls;
- lv = be_get_birg_liveness(birg);
- n_regs = cls->n_regs - be_put_ignore_regs(birg, cls, NULL);
- ws = new_workset();
- uses = be_begin_uses(irg, lv);
- loop_ana = be_new_loop_pressure(birg);
- senv = be_new_spill_env(birg);
- worklist = new_pdeq();
-
- pdeq_putr(worklist, get_irg_start_block(irg));
-
- while(!pdeq_empty(worklist)) {
- ir_node *block = pdeq_getl(worklist);
- belady(block);
+ cls = rcls;
+ lv = be_get_irg_liveness(irg);
+ n_regs = be_get_n_allocatable_regs(irg, cls);
+ ws = new_workset();
+ uses = be_begin_uses(irg, lv);
+ loop_ana = be_new_loop_pressure(irg, cls);
+ senv = be_new_spill_env(irg);
+ blocklist = be_get_cfgpostorder(irg);
+ stat_ev_tim_pop("belady_time_init");
+
+ stat_ev_tim_push();
+ /* walk blocks in reverse postorder */
+ for (i = ARR_LEN(blocklist) - 1; i >= 0; --i) {
+ process_block(blocklist[i]);
}
- /* end block might not be reachable in endless loops */
- belady(get_irg_end_block(irg));
-
- del_pdeq(worklist);
+ DEL_ARR_F(blocklist);
+ stat_ev_tim_pop("belady_time_belady");
+ stat_ev_tim_push();
/* belady was block-local, fix the global flow by adding reloads on the
* edges */
irg_block_walk_graph(irg, fix_block_borders, NULL, NULL);
+ stat_ev_tim_pop("belady_time_fix_borders");
+
+ ir_free_resources(irg, IR_RESOURCE_IRN_LINK);
/* Insert spill/reload nodes into the graph and fix usages */
be_insert_spills_reloads(senv);
obstack_free(&obst, NULL);
}
+BE_REGISTER_MODULE_CONSTRUCTOR(be_init_spillbelady)
void be_init_spillbelady(void)
{
static be_spiller_t belady_spiller = {
be_spill_belady
};
+ lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be");
+ lc_opt_entry_t *belady_group = lc_opt_get_grp(be_grp, "belady");
+ lc_opt_add_table(belady_group, options);
be_register_spiller("belady", &belady_spiller);
FIRM_DBG_REGISTER(dbg, "firm.be.spill.belady");
}
-
-BE_REGISTER_MODULE_CONSTRUCTOR(be_init_spillbelady);