X-Git-Url: http://nsz.repo.hu/git/?a=blobdiff_plain;f=ir%2Fbe%2Fbespillbelady2.c;h=4c8a641170fed0f8db5825b80758ddb1660cbb7e;hb=dbe6718705a162ea25d8035b95595f0cf167637c;hp=1ae5f185f95966bf90ca5b3db58f9765e953e5d9;hpb=3c5a368e59c4f38cc1ecc3ac029003db99f9a4a9;p=libfirm diff --git a/ir/be/bespillbelady2.c b/ir/be/bespillbelady2.c index 1ae5f185f..4c8a64117 100644 --- a/ir/be/bespillbelady2.c +++ b/ir/be/bespillbelady2.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 1995-2007 University of Karlsruhe. All right reserved. + * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved. * * This file is part of libFirm. * @@ -20,7 +20,7 @@ /** * @file * @brief Beladys spillalgorithm version 2. - * @author Daniel Grund, Matthias Braun, Sebastian Hack + * @author Sebastian Hack, Matthias Braun, Daniel Grund * @date 01.08.2007 * @version $Id$ * @@ -32,9 +32,7 @@ * capacity of the blocks to let global variables live through * them. */ -#ifdef HAVE_CONFIG_H #include "config.h" -#endif #include #include @@ -53,20 +51,23 @@ #include "ircons_t.h" #include "irprintf.h" #include "execfreq.h" -#include "xmalloc.h" +#include "dfs_t.h" #include "beutil.h" -#include "bearch_t.h" -#include "bespillbelady.h" -#include "besched_t.h" +#include "bearch.h" +#include "besched.h" #include "beirgmod.h" #include "belive_t.h" -#include "benode_t.h" +#include "benode.h" #include "bechordal_t.h" -#include "bespilloptions.h" +#include "bespill.h" #include "beloopana.h" -#include "beirg_t.h" +#include "beirg.h" #include "bemodule.h" +#include "bespillutil.h" + +#include "lc_opts.h" +#include "lc_opts_enum.h" #define DBG_SPILL 1 #define DBG_WSETS 2 @@ -78,8 +79,22 @@ #define DBG_WORKSET 128 #define DBG_GLOBAL 256 -#define DEAD UINT_MAX -#define LIVE_END (DEAD-1) +#define ALREADY_SPILLED_FACTOR 2 + +#define DEAD UINT_MAX +#define LIVE_END (DEAD-1) +#define REMAT_DIST (DEAD-2) + +static int already_spilled_factor = 2; +static int remat_live_range_ext = 1; +static int global_pass_enabled = 1; + +static const lc_opt_table_entry_t options[] = { + LC_OPT_ENT_INT ("asf", "already spilled factor", &already_spilled_factor), + LC_OPT_ENT_BOOL ("remat", "rematerializable ops get infinite long live ranges", &remat_live_range_ext), + LC_OPT_ENT_BOOL ("global", "enable/disable the global pass", &global_pass_enabled), + LC_OPT_LAST +}; DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;) @@ -88,10 +103,11 @@ DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;) */ typedef struct _loc_t { ir_node *irn; /**< A node. */ - unsigned time; /**< A use time (see beuses.h). */ - unsigned version; /**< That is used in the global pass below. - For usage see the comments below. - In the local belady pass, this is not important. */ + unsigned time; /**< A use time. + In the global pass this is used + as the version number and not as a time. + Only to save space... + */ } loc_t; typedef struct _workset_t { @@ -102,19 +118,22 @@ typedef struct _workset_t { typedef struct _belady_env_t { struct obstack ob; ir_graph *irg; + const dfs_t *dfs; const arch_env_t *arch; const arch_register_class_t *cls; be_lv_t *lv; ir_exec_freq *ef; - ir_node **blocks; /**< Array of all blocks. */ - int n_blocks; /**< Number of blocks in the graph. */ - int n_regs; /**< number of regs in this reg-class */ - workset_t *ws; /**< the main workset used while processing a block. ob-allocated */ - ir_node *instr; /**< current instruction */ - unsigned instr_nr; /**< current instruction number (relative to block start) */ + ir_node **blocks; /**< Array of all blocks. */ + int n_blocks; /**< Number of blocks in the graph. */ + int n_regs; /**< number of regs in this reg-class */ + workset_t *ws; /**< the main workset used while processing a block. ob-allocated */ + ir_node *instr; /**< current instruction */ + int instr_nr; /**< current instruction number (relative to block start) */ - spill_env_t *senv; /**< see bespill.h */ + spill_env_t *senv; /**< see bespill.h */ + bitset_t *spilled; /**< bitset to keep all the irns which have already been spilled. */ + ir_nodeset_t *extra_spilled; /** All nodes for which a special spill location has been computed. */ } belady_env_t; @@ -122,14 +141,14 @@ static int loc_compare(const void *a, const void *b) { const loc_t *p = a; const loc_t *q = b; - return (int) p->time - (int) q->time; + return (p->time > q->time) - (p->time < q->time); } -static INLINE void workset_print(const workset_t *w) +static inline void workset_print(const workset_t *w) { int i; - for(i = 0; i < w->len; ++i) { + for (i = 0; i < w->len; ++i) { ir_fprintf(stderr, "%+F %d\n", w->vals[i].irn, w->vals[i].time); } } @@ -137,22 +156,18 @@ static INLINE void workset_print(const workset_t *w) /** * Alloc a new workset on obstack @p ob with maximum size @p max */ -static INLINE workset_t *new_workset(belady_env_t *env, struct obstack *ob) { - workset_t *res; - size_t size = sizeof(*res) + (env->n_regs)*sizeof(res->vals[0]); - res = obstack_alloc(ob, size); - memset(res, 0, size); - return res; +static inline workset_t *new_workset(belady_env_t *env, struct obstack *ob) +{ + return OALLOCFZ(ob, workset_t, vals, env->n_regs); } /** * Alloc a new instance on obstack and make it equal to @param ws */ -static INLINE workset_t *workset_clone(belady_env_t *env, struct obstack *ob, workset_t *ws) { - workset_t *res; - size_t size = sizeof(*res) + (env->n_regs)*sizeof(res->vals[0]); - res = obstack_alloc(ob, size); - memcpy(res, ws, size); +static inline workset_t *workset_clone(belady_env_t *env, struct obstack *ob, workset_t *ws) +{ + workset_t *res = OALLOCF(ob, workset_t, vals, env->n_regs); + memcpy(res, ws, sizeof(*res) + (env->n_regs)*sizeof(res->vals[0])); return res; } @@ -160,7 +175,8 @@ static INLINE workset_t *workset_clone(belady_env_t *env, struct obstack *ob, wo * Do NOT alloc anything. Make @param tgt equal to @param src. * returns @param tgt for convenience */ -static INLINE workset_t *workset_copy(belady_env_t *env, workset_t *tgt, workset_t *src) { +static inline workset_t *workset_copy(belady_env_t *env, workset_t *tgt, workset_t *src) +{ size_t size = sizeof(*src) + (env->n_regs)*sizeof(src->vals[0]); memcpy(tgt, src, size); return tgt; @@ -171,7 +187,8 @@ static INLINE workset_t *workset_copy(belady_env_t *env, workset_t *tgt, workset * @param count locations given at memory @param locs. * Set the length of @param ws to count. */ -static INLINE void workset_bulk_fill(workset_t *workset, int count, const loc_t *locs) { +static inline void workset_bulk_fill(workset_t *workset, int count, const loc_t *locs) +{ workset->len = count; memcpy(&(workset->vals[0]), locs, count * sizeof(locs[0])); } @@ -180,16 +197,17 @@ static INLINE void workset_bulk_fill(workset_t *workset, int count, const loc_t * Inserts the value @p val into the workset, iff it is not * already contained. The workset must not be full. */ -static INLINE void workset_insert(belady_env_t *env, workset_t *ws, ir_node *val) { +static inline void workset_insert(belady_env_t *env, workset_t *ws, ir_node *val) +{ int i; /* check for current regclass */ - if (!arch_irn_consider_in_reg_alloc(env->arch, env->cls, val)) { - DBG((dbg, DBG_WORKSET, "Skipped %+F\n", val)); + if (!arch_irn_consider_in_reg_alloc(env->cls, val)) { + // DBG((dbg, DBG_WORKSET, "Skipped %+F\n", val)); return; } /* check if val is already contained */ - for(i=0; ilen; ++i) + for (i=0; ilen; ++i) if (ws->vals[i].irn == val) return; @@ -201,16 +219,18 @@ static INLINE void workset_insert(belady_env_t *env, workset_t *ws, ir_node *val /** * Removes all entries from this workset */ -static INLINE void workset_clear(workset_t *ws) { +static inline void workset_clear(workset_t *ws) +{ ws->len = 0; } /** * Removes the value @p val from the workset if present. */ -static INLINE void workset_remove(workset_t *ws, ir_node *val) { +static inline void workset_remove(workset_t *ws, ir_node *val) +{ int i; - for(i=0; ilen; ++i) { + for (i=0; ilen; ++i) { if (ws->vals[i].irn == val) { ws->vals[i] = ws->vals[--ws->len]; return; @@ -218,9 +238,10 @@ static INLINE void workset_remove(workset_t *ws, ir_node *val) { } } -static INLINE int workset_get_index(const workset_t *ws, const ir_node *val) { +static inline int workset_get_index(const workset_t *ws, const ir_node *val) +{ int i; - for(i=0; ilen; ++i) { + for (i=0; ilen; ++i) { if (ws->vals[i].irn == val) return i; } @@ -234,7 +255,7 @@ static INLINE int workset_get_index(const workset_t *ws, const ir_node *val) { * @p v A variable to put the current value in * @p i An integer for internal use */ -#define workset_foreach(ws, v, i) for(i=0; \ +#define workset_foreach(ws, v, i) for (i=0; \ v=(i < ws->len) ? ws->vals[i].irn : NULL, i < ws->len; \ ++i) @@ -246,40 +267,47 @@ static INLINE int workset_get_index(const workset_t *ws, const ir_node *val) { #define workset_sort(ws) qsort((ws)->vals, (ws)->len, sizeof((ws)->vals[0]), loc_compare); #define workset_contains(ws, n) (workset_get_index(ws, n) >= 0) +typedef struct _bring_in_t bring_in_t; + typedef struct _block_info_t { belady_env_t *bel; - const ir_node *bl; - workset_t *ws_start, *ws_end; + ir_node *bl; + int id; ir_phase next_uses; + workset_t *ws_end; /**< The end set after the local belady pass. */ + double exec_freq; /**< The execution frequency of this block. */ + double reload_cost; /**< Cost of a reload in this block. */ ir_node *first_non_in; /**< First node in block which is not a phi. */ ir_node *last_ins; /**< The instruction before which end of block reloads will be inserted. */ - workset_t *entrance_reg; /**< That set will contain all values - transported into the block which - are used before they are displaced. - That means, we later have to care to - bring them into the block in a register - or reload them at the entry of the block. */ + int pressure; /**< The amount of registers which remain free + in this block. This capacity can be used to let + global variables, transported into other blocks, + live through this block. */ - int pressure; /**< The amount of registers which remain free - in this block. This capacity can be used to let - global variables, transported into other blocks, - live through this block. */ + int front_pressure; /**< The pressure right before the first + real (non-phi) node. At the beginning + of the global pass, this is 0. */ + struct list_head br_head; /**< List head for all bring_in variables. */ + int free_at_jump; /**< registers free at jump. */ - double exec_freq; /**< The execution frequency of this block. */ } block_info_t; -static INLINE void *new_block_info(belady_env_t *bel, ir_node *bl) { - block_info_t *res = obstack_alloc(&bel->ob, sizeof(*res)); - memset(res, 0, sizeof(res[0])); +static inline void *new_block_info(belady_env_t *bel, int id) +{ + ir_node *bl = bel->blocks[id]; + block_info_t *res = OALLOCZ(&bel->ob, block_info_t); res->first_non_in = NULL; - res->last_ins = NULL; - res->bel = bel; - res->bl = bl; - res->entrance_reg = new_workset(bel, &bel->ob); + res->last_ins = NULL; + res->bel = bel; + res->bl = bl; + res->id = id; res->exec_freq = get_block_execfreq(bel->ef, bl); + res->reload_cost = bel->arch->reload_cost * res->exec_freq; + res->free_at_jump = bel->n_regs; + INIT_LIST_HEAD(&res->br_head); set_irn_link(bl, res); return res; } @@ -287,7 +315,7 @@ static INLINE void *new_block_info(belady_env_t *bel, ir_node *bl) { #define get_block_info(block) ((block_info_t *)get_irn_link(block)) #define set_block_info(block, info) set_irn_link(block, info) -static INLINE ir_node *block_info_get_last_ins(block_info_t *bi) +static inline ir_node *block_info_get_last_ins(block_info_t *bi) { if (!bi->last_ins) bi->last_ins = be_get_end_of_block_insertion_point(bi->bl); @@ -300,26 +328,21 @@ typedef struct _next_use_t { in the block. Needed to identify transport in values for the global pass. */ - int step; /**< The time step of the use. */ + sched_timestep_t step; /**< The time step of the use. */ + ir_node *irn; struct _next_use_t *next; /**< The next use int this block or NULL. */ } next_use_t; -static void *next_use_init(ir_phase *phase, ir_node *irn, void *old) -{ - (void) phase; - (void) irn; - (void) old; - return NULL; -} - static void build_next_uses(block_info_t *bi) { ir_node *irn; - phase_init(&bi->next_uses, "next uses", bi->bel->irg, PHASE_DEFAULT_GROWTH, next_use_init, NULL); + sched_renumber(bi->bl); + + phase_init(&bi->next_uses, bi->bel->irg, phase_irn_init_default); sched_foreach_reverse(bi->bl, irn) { - int i, step = sched_get_time_step(irn); + int i; if (is_Phi(irn)) break; @@ -329,13 +352,15 @@ static void build_next_uses(block_info_t *bi) next_use_t *curr = phase_get_irn_data(&bi->next_uses, op); next_use_t *use = phase_alloc(&bi->next_uses, sizeof(use[0])); - assert(step >= 0); use->is_first_use = 1; - use->step = step; + use->step = sched_get_time_step(irn); use->next = curr; + use->irn = irn; - if (curr) + if (curr) { curr->is_first_use = 0; + assert(curr->step >= use->step); + } phase_set_irn_data(&bi->next_uses, op, use); } @@ -344,7 +369,7 @@ static void build_next_uses(block_info_t *bi) #define get_current_use(bi, irn) phase_get_irn_data(&(bi)->next_uses, (irn)) -static INLINE void advance_current_use(block_info_t *bi, const ir_node *irn) +static inline void advance_current_use(block_info_t *bi, const ir_node *irn) { next_use_t *use = get_current_use(bi, irn); @@ -352,31 +377,152 @@ static INLINE void advance_current_use(block_info_t *bi, const ir_node *irn) phase_set_irn_data(&bi->next_uses, irn, use->next); } -static INLINE unsigned get_curr_distance(block_info_t *bi, const ir_node *irn, int is_usage) +static __attribute__((unused)) int block_freq_gt(const void *a, const void *b) { - belady_env_t *env = bi->bel; - next_use_t *use = get_current_use(bi, irn); - int curr_step = sched_get_time_step(env->instr); - int flags = arch_irn_get_flags(env->arch, irn); + const ir_node * const *p = a; + const ir_node * const *q = b; + block_info_t *pi = get_block_info(*p); + block_info_t *qi = get_block_info(*q); + double diff = qi->exec_freq - pi->exec_freq; + return (diff > 0) - (diff < 0); +} - assert(!(flags & arch_irn_flags_ignore)); +static int block_freq_dfs_gt(const void *a, const void *b) +{ + const ir_node * const *p = a; + const ir_node * const *q = b; + block_info_t *pi = get_block_info(*p); + block_info_t *qi = get_block_info(*q); + double diff; + + if ((pi->exec_freq > 1.0 && qi->exec_freq > 1.0) + || (pi->exec_freq <= 1.0 && qi->exec_freq <= 1.0)) { + + const dfs_t *dfs = pi->bel->dfs; + int pp = dfs_get_post_num(dfs, pi->bl); + int pq = dfs_get_post_num(dfs, qi->bl); + return pq - pp; + } - /* We have to keep nonspillable nodes in the workingset */ - if(flags & arch_irn_flags_dont_spill) + diff = qi->exec_freq - pi->exec_freq; + return (diff > 0) - (diff < 0); +} + +/* + ____ _ ___ + | __ ) _ __(_)_ __ __ _ |_ _|_ __ + | _ \| '__| | '_ \ / _` | | || '_ \ + | |_) | | | | | | | (_| | | || | | | + |____/|_| |_|_| |_|\__, | |___|_| |_| + |___/ + + Data structures to represent bring in variables. +*/ + +struct _bring_in_t { + ir_node *irn; /**< The node to bring in. */ + block_info_t *bi; /**< The block to which bring in should happen. */ + int pressure_so_far; /**< The maximal pressure till the first use of irn in bl. */ + ir_node *first_use; /**< The first user of irn in bl. */ + sched_timestep_t use_step; /**< Schedule step of the first use. */ + + int is_remat : 1; /**< Is rematerializable. */ + int sect_pressure; /**< Offset to maximum pressure in block. */ + struct list_head list; + struct list_head sect_list; + bring_in_t *sect_head; +}; + +static inline bring_in_t *new_bring_in(block_info_t *bi, ir_node *irn, const next_use_t *use) +{ + bring_in_t *br = OALLOC(&bi->bel->ob, bring_in_t); + br->irn = irn; + br->bi = bi; + br->first_use = use->irn; + br->use_step = use->step; + br->is_remat = be_is_rematerializable(bi->bel->senv, irn, use->irn); + br->pressure_so_far = bi->pressure; + br->sect_pressure = bi->front_pressure; + br->sect_head = br; + + INIT_LIST_HEAD(&br->list); + INIT_LIST_HEAD(&br->sect_list); + list_add_tail(&br->list, &bi->br_head); + return br; +} + +static int bring_in_cmp(const void *a, const void *b) +{ + const bring_in_t *p = *(const bring_in_t * const *) a; + const bring_in_t *q = *(const bring_in_t * const *) b; + double fp, fq; + + /* if one of both is a remat node, it will be done after the other. */ + if (p->is_remat != q->is_remat) + return p->is_remat - q->is_remat; + + /* in the same block, the one further in the front has to be processed first! + * Otherwise the front_pressure 'trick' is not exact. */ + if (p->bi == q->bi) + return p->use_step - q->use_step; + + fp = p->bi->exec_freq; + fq = q->bi->exec_freq; + + /* if both have the same frequency, inspect the frequency of the definition */ + if (fp == fq) { + double fdp = get_block_info(get_nodes_block(p->irn))->exec_freq; + double fdq = get_block_info(get_nodes_block(q->irn))->exec_freq; + + /* if the defs of both have the same freq, we go for reverse dfs post order. */ + if (fdp == fdq) { + const dfs_t *dfs = p->bi->bel->dfs; + int pp = dfs_get_post_num(dfs, p->bi->bl); + int pq = dfs_get_post_num(dfs, q->bi->bl); + return pq - pp; + } + + return (fdq > fdp) - (fdq < fdp); + } + + return (fq > fp) - (fq < fp); +} + +static inline unsigned get_curr_distance(block_info_t *bi, const ir_node *irn, int is_usage) +{ + belady_env_t *env = bi->bel; + sched_timestep_t curr_step = sched_get_time_step(env->instr); + next_use_t *use = get_current_use(bi, irn); + int flags = arch_irn_get_flags(irn); + + assert(!arch_irn_is_ignore(irn)); + + /* We have to keep non-spillable nodes in the working set */ + if (flags & arch_irn_flags_dont_spill) return 0; if (!is_usage && use && use->step == curr_step) use = use->next; if (use) { + unsigned res = use->step - curr_step; + assert(use->step >= curr_step); - return use->step - curr_step; + + if (res != 0) { + if (remat_live_range_ext && be_is_rematerializable(env->senv, irn, use->irn)) + res = REMAT_DIST; + else if (bitset_contains_irn(env->spilled, irn)) + res *= already_spilled_factor; + } + + return res; } return be_is_live_end(env->lv, bi->bl, irn) ? LIVE_END : DEAD; } -static INLINE int is_local_phi(const ir_node *bl, const ir_node *irn) +static inline int is_local_phi(const ir_node *bl, const ir_node *irn) { return is_Phi(irn) && get_nodes_block(irn) == bl; } @@ -389,12 +535,12 @@ static INLINE int is_local_phi(const ir_node *bl, const ir_node *irn) * @param irn The node in question. * @return 1, if node is something transported into @p bl, 0 if not. * @note The function will only give correct answers in the case - * where @p irn is unsed in the block @p bl which is always + * where @p irn is unused in the block @p bl which is always * the case in our usage scenario. */ -static INLINE int is_transport_in(const ir_node *bl, const ir_node *irn) +static inline int is_transport_in(const ir_node *bl, const ir_node *irn) { - return is_local_phi(bl, irn) || get_nodes_block(irn) != bl; + return get_nodes_block(irn) != bl || is_Phi(irn); } /** @@ -406,10 +552,11 @@ static INLINE int is_transport_in(const ir_node *bl, const ir_node *irn) * @p is_usage indicates that the values in new_vals are used (not defined) * In this case reloads must be performed */ -static void displace(block_info_t *bi, workset_t *new_vals, int is_usage) { +static void displace(block_info_t *bi, workset_t *new_vals, int is_usage) +{ belady_env_t *env = bi->bel; workset_t *ws = env->ws; - ir_node **to_insert = alloca(env->n_regs * sizeof(to_insert[0])); + ir_node **to_insert = ALLOCAN(ir_node*, env->n_regs); int i, len, max_allowed, demand, iter; ir_node *val; @@ -422,10 +569,9 @@ static void displace(block_info_t *bi, workset_t *new_vals, int is_usage) { /* mark value as used */ if (! workset_contains(ws, val)) { - DBG((dbg, DBG_DECIDE, " insert %+F\n", val)); + DBG((dbg, DBG_DECIDE, "\t\tinsert %+F\n", val)); to_insert[demand++] = val; if (is_usage) { - int insert_reload = 1; next_use_t *use = get_current_use(bi, val); /* @@ -435,28 +581,30 @@ static void displace(block_info_t *bi, workset_t *new_vals, int is_usage) { * entrance to here or not. */ assert(use); - assert(sched_get_time_step(env->instr) == use->step); + assert(sched_get_time_step(env->instr) == (int) use->step); if (is_transport_in(bi->bl, val) && use->is_first_use) { - DBG((dbg, DBG_DECIDE, "entrance node %+F, pressure %d:\n", val, bi->pressure)); - if (bi->pressure < env->n_regs) { - workset_insert(env, bi->entrance_reg, val); - insert_reload = 0; - ++bi->pressure; - DBG((dbg, DBG_DECIDE, "... no reload. must be considered at block start\n")); - } + bring_in_t *bri = new_bring_in(bi, val, use); + bri->first_use = env->instr; + + /* reset the section pressure, since a new section starts. */ + bi->front_pressure = 0; + + DBG((dbg, DBG_DECIDE, "\t\tbring in node %+F, pressure %d:\n", val, bi->pressure)); + DBG((dbg, DBG_DECIDE, "\t\tno reload. must be considered at block start\n")); } - if (insert_reload) { - DBG((dbg, DBG_SPILL, "Reload %+F before %+F\n", val, env->instr)); + else { + bitset_add_irn(env->spilled, val); + DBG((dbg, DBG_SPILL, "\t\tReload %+F before %+F\n", val, env->instr)); be_add_reload(env->senv, val, env->instr, env->cls, 1); } } } else { - assert(is_usage || "Defined value already in workset?!?"); - DBG((dbg, DBG_DECIDE, " skip %+F\n", val)); + assert(is_usage && "Defined value already in workset?!?"); + DBG((dbg, DBG_DECIDE, "\t\tskip %+F\n", val)); } } - DBG((dbg, DBG_DECIDE, " demand = %d\n", demand)); + DBG((dbg, DBG_DECIDE, "\t\tdemand = %d\n", demand)); /* 2. Make room for at least 'demand' slots @@ -466,9 +614,7 @@ static void displace(block_info_t *bi, workset_t *new_vals, int is_usage) { /* Only make more free room if we do not have enough */ if (len > max_allowed) { - // int curr_step = sched_get_time_step(env->instr); - - DBG((dbg, DBG_DECIDE, " disposing %d values\n", len - max_allowed)); + DBG((dbg, DBG_DECIDE, "\t\tdisposing %d values\n", len - max_allowed)); /* get current next-use distance */ for (i = 0; i < ws->len; ++i) { @@ -493,8 +639,9 @@ static void displace(block_info_t *bi, workset_t *new_vals, int is_usage) { for (i = 0; i < demand; ++i) workset_insert(env, env->ws, to_insert[i]); - bi->pressure = MAX(bi->pressure, workset_get_length(env->ws)); - + /* TODO: simplify this expression? */ + bi->pressure = MAX(bi->pressure, workset_get_length(env->ws)); + bi->front_pressure = MAX(bi->front_pressure, workset_get_length(env->ws)); } /** @@ -502,16 +649,16 @@ static void displace(block_info_t *bi, workset_t *new_vals, int is_usage) { * whether it is used from a register or is reloaded * before the use. */ -static void belady(ir_node *block, void *data) { - belady_env_t *env = data; - block_info_t *block_info = new_block_info(env, block); - void *obst_state = obstack_base(&env->ob); +static void belady(belady_env_t *env, int id) +{ + block_info_t *block_info = new_block_info(env, id); + const ir_node *block = block_info->bl; workset_t *new_vals; ir_node *irn; int iter; - DBG((dbg, DBG_WSETS, "Processing %+F...\n", block_info->bl)); + DBG((dbg, DBG_WSETS, "Belady on %+F\n", block_info->bl)); new_vals = new_workset(env, &env->ob); workset_clear(env->ws); @@ -526,14 +673,12 @@ static void belady(ir_node *block, void *data) { int i, arity; assert(workset_get_length(env->ws) <= env->n_regs && "Too much values in workset!"); - /* projs are handled with the tuple value. + /* Projs are handled with the tuple value. * Phis are no real instr (see insert_starters()) * instr_nr does not increase */ - if (is_Proj(irn) || is_Phi(irn)) { - DBG((dbg, DBG_DECIDE, " ...%+F skipped\n", irn)); + if (is_Proj(irn) || is_Phi(irn)) continue; - } - DBG((dbg, DBG_DECIDE, " ...%+F\n", irn)); + DBG((dbg, DBG_DECIDE, "\t%+F\n", irn)); if (!block_info->first_non_in) block_info->first_non_in = irn; @@ -543,9 +688,10 @@ static void belady(ir_node *block, void *data) { /* allocate all values _used_ by this instruction */ workset_clear(new_vals); - for(i = 0, arity = get_irn_arity(irn); i < arity; ++i) { + for (i = 0, arity = get_irn_arity(irn); i < arity; ++i) { workset_insert(env, new_vals, get_irn_n(irn, i)); } + DBG((dbg, DBG_DECIDE, "\t* uses\n")); displace(block_info, new_vals, 1); /* @@ -554,7 +700,7 @@ static void belady(ir_node *block, void *data) { * augmenting the pressure. Note, that a variable is dead * if it has no further use in this block and is *not* live end */ - for(i = 0, arity = get_irn_arity(irn); i < arity; ++i) { + for (i = 0, arity = get_irn_arity(irn); i < arity; ++i) { ir_node *op = get_irn_n(irn, i); next_use_t *use = get_current_use(block_info, op); @@ -567,7 +713,7 @@ static void belady(ir_node *block, void *data) { /* allocate all values _defined_ by this instruction */ workset_clear(new_vals); - if (get_irn_mode(irn) == mode_T) { /* special handling for tuples and projs */ + if (get_irn_mode(irn) == mode_T) { /* special handling for Tuples and Projs */ const ir_edge_t *edge; foreach_out_edge(irn, edge) { @@ -577,13 +723,20 @@ static void belady(ir_node *block, void *data) { } else { workset_insert(env, new_vals, irn); } + DBG((dbg, DBG_DECIDE, "\t* defs\n")); displace(block_info, new_vals, 0); + if (is_op_forking(get_irn_op(env->instr))) { + for (i = get_irn_arity(env->instr) - 1; i >= 0; --i) { + ir_node *op = get_irn_n(env->instr, i); + block_info->free_at_jump -= arch_irn_consider_in_reg_alloc(env->cls, op); + } + } + env->instr_nr++; } - phase_free(&block_info->next_uses); - obstack_free(&env->ob, obst_state); + phase_deinit(&block_info->next_uses); /* Remember end-workset for this block */ block_info->ws_end = workset_clone(env, &env->ob, env->ws); @@ -591,6 +744,9 @@ static void belady(ir_node *block, void *data) { workset_foreach(block_info->ws_end, irn, iter) DBG((dbg, DBG_WSETS, " %+F (%u)\n", irn, workset_get_time(block_info->ws_end, iter))); DBG((dbg, DBG_WSETS, "Max pressure in block: %d\n", block_info->pressure)); + + /* now, initialize the front pressure to 0. */ + block_info->front_pressure = 0; } /* @@ -603,94 +759,140 @@ static void belady(ir_node *block, void *data) { */ -static int block_freq_gt(const void *a, const void *b) -{ - const ir_node * const *p = a; - const ir_node * const *q = b; - block_info_t *pi = get_block_info(*p); - block_info_t *qi = get_block_info(*q); - double diff = qi->exec_freq - pi->exec_freq; - return (diff > 0) - (diff < 0); -} +#define workset_set_version(ws, i, t) ((ws)->vals[(i)].time = (t)) +#define workset_get_version(ws, i) ((ws)->vals[(i)].time) + +#define ver_oldest (0) +#define ver_youngest ((unsigned) -1) +#define ver_make_newer(v) ((v) + 1) +#define ver_is_older(v, w) ((v) < (w)) +#define ver_is_younger(v, w) ((v) > (w)) + +enum { + irn_act_none = 0, + irn_act_reload, + irn_act_live_through +}; + +typedef struct _block_state_t { + struct _block_state_t *next; + struct _block_state_t *next_intern; + block_info_t *bi; + int pressure; + workset_t *end_state; +} block_state_t; -typedef struct _block_end_state_t { - ir_node *bl; +typedef struct _irn_action_t { + struct _irn_action_t *next; ir_node *irn; - double costs; - workset_t *end_state; - unsigned reload_at_end : 1; - unsigned live_through : 1; -} block_end_state_t; + const ir_node *bl; + int act; +} irn_action_t; typedef struct _global_end_state_t { belady_env_t *env; bitset_t *succ_phis; + bitset_t *committed; struct obstack obst; - block_end_state_t *end_info; - unsigned gauge; + void *reset_level; unsigned version; + + unsigned *bs_tops_vers; + block_state_t **bs_tops; + block_state_t *bs_top; + irn_action_t *ia_top; } global_end_state_t; typedef struct { - void *obst_level; - unsigned gauge; + void *obst_level; + block_state_t *bs_top; + irn_action_t *ia_top; } rollback_info_t; -static INLINE rollback_info_t trans_begin(global_end_state_t *ges) +static inline block_state_t *get_block_state(global_end_state_t *ges, const block_info_t *bi) { - rollback_info_t rb; - rb.obst_level = obstack_base(&ges->obst); - rb.gauge = ges->gauge; - return rb; + int id = bi->id; + assert(!ver_is_younger(ges->bs_tops_vers[id], ges->version)); + return ver_is_older(ges->bs_tops_vers[id], ges->version) ? NULL : ges->bs_tops[bi->id]; } -static INLINE void trans_rollback(global_end_state_t *ges, rollback_info_t *rb) +static inline const workset_t *get_end_state(global_end_state_t *ges, block_info_t *bi) { - ges->gauge = rb->gauge; - obstack_free(&ges->obst, rb->obst_level); + block_state_t *bs = get_block_state(ges, bi); + return bs ? bs->end_state : bi->ws_end; } -static block_end_state_t *get_block_end_state(global_end_state_t *state, ir_node *bl, ir_node *irn) +static block_state_t *new_block_state(global_end_state_t *ges, block_info_t *bi) { - unsigned i; + block_state_t *bs = get_block_state(ges, bi); + block_state_t *nw = OALLOC(&ges->obst, block_state_t); + + nw->next_intern = bs; + nw->next = ges->bs_top; + nw->bi = bi; - for (i = 0; i < state->gauge; ++i) { - block_end_state_t *bei = &state->end_info[i]; - if (bei->bl == bl && bei->irn == irn) - return bei; + if (bs) { + nw->pressure = bs->pressure; + nw->end_state = workset_clone(ges->env, &ges->obst, bs->end_state); + } + else { + nw->pressure = bi->pressure; + nw->end_state = workset_clone(ges->env, &ges->obst, bi->ws_end); } - { - block_info_t *bi = get_block_info(bl); - block_end_state_t *curr; + ges->bs_top = nw; + ges->bs_tops[bi->id] = nw; + ges->bs_tops_vers[bi->id] = ges->version; + return nw; +} + +static irn_action_t *new_irn_action(global_end_state_t *ges, ir_node *irn, const ir_node *bl) +{ + irn_action_t *ia = OALLOC(&ges->obst, irn_action_t); + + ia->irn = irn; + ia->bl = bl; + ia->act = irn_act_none; + ia->next = ges->ia_top; + ges->ia_top = ia; + return ia; +} - /* make sure we have room in the array */ - ARR_EXTO(block_end_state_t, state->end_info, (int) state->gauge); +static inline rollback_info_t trans_begin(global_end_state_t *ges) +{ + rollback_info_t rb; + rb.obst_level = obstack_base(&ges->obst); + rb.bs_top = ges->bs_top; + rb.ia_top = ges->ia_top; + return rb; +} - curr = &state->end_info[state->gauge]; +static inline void trans_rollback(global_end_state_t *ges, rollback_info_t *rb) +{ + block_state_t *bs; - memset(curr, 0, sizeof(curr[0])); - curr->bl = bl; - curr->irn = irn; - curr->end_state = workset_clone(state->env, &state->obst, bi->ws_end); - curr->costs = -1.0; - ++state->gauge; - return curr; + /* unwind all the stacks indiced with the block number */ + for (bs = ges->bs_top; bs != rb->bs_top; bs = bs->next) { + unsigned id = bs->bi->id; + ges->bs_tops[id] = bs->next_intern; } + + ges->ia_top = rb->ia_top; + ges->bs_top = rb->bs_top; + obstack_free(&ges->obst, rb->obst_level); } -static double can_bring_in(global_end_state_t *ges, ir_node *bl, ir_node *irn, int level); -static double can_make_available_at_end(global_end_state_t *ges, ir_node *bl, ir_node *irn, int level) +static double can_bring_in(global_end_state_t *ges, ir_node *bl, ir_node *irn, double limit, int level); + +static double can_make_available_at_end(global_end_state_t *ges, ir_node *bl, ir_node *irn, double limit, int level) { - block_end_state_t *bes = get_block_end_state(ges, bl, irn); - workset_t *end = bes->end_state; - block_info_t *bi = get_block_info(bl); - int n_regs = bi->bel->n_regs; + block_info_t *bi = get_block_info(bl); + const workset_t *end = get_end_state(ges, bi); + double res; int index; - DBG((dbg, DBG_GLOBAL, "\t%2Dcan make avail %+F at end of %+F (pressure %d)\n", - level, irn, bl, bi->pressure)); + DBG((dbg, DBG_GLOBAL, "\t%2Dcan make avail %+F at end of %+F\n", level, irn, bl)); /* * to make the value available at end, @@ -712,32 +914,27 @@ static double can_make_available_at_end(global_end_state_t *ges, ir_node *bl, ir * cheaper, do the other thing. If not, reload it here. */ - /* - * we have been here before and already figured out some costs. - * so we can exit safely. - */ - if (bes->costs >= 0.0) { - DBG((dbg, DBG_GLOBAL, "\t%2Dwe\'ve been here before\n", level)); - goto end; - } - /* if the end set contains it already, it is in a reg and it costs nothing * to load it to one. */ index = workset_get_index(end, irn); if (index >= 0) { - unsigned ver = end->vals[index].version; + unsigned ver = workset_get_version(end, index); DBG((dbg, DBG_GLOBAL, "\t%2Dnode is in the end set and is %s fixed\n", - level, ver > ges->version ? "already" : "not yet")); + level, ver_is_older(ver, ges->version) ? "already" : "not yet")); /* * if the version is older, the value is already fixed - * and cannot be removed from the end set. If not, - * we fix it here by giving it our version. + * and cannot be removed from the end set. + * + * If not, we will create a new block state for that block since + * we modify it by giving the end state a new version. */ - if (ver < ges->version) - end->vals[index].version = ges->version; + if (ver_is_younger(ver, ges->version)) { + block_state_t *bs = new_block_state(ges, bi); + workset_set_version(bs->end_state, index, ges->version); + } - bes->costs = 0.0; + res = 0.0; goto end; } @@ -758,26 +955,28 @@ static double can_make_available_at_end(global_end_state_t *ges, ir_node *bl, ir */ { - int len = workset_get_length(end); + int n_regs = bi->free_at_jump; + int len = workset_get_length(end); int slot = -1; int i; - bes->costs = HUGE_VAL; + res = HUGE_VAL; /* * look if there is room in the end array * for the variable. Note that this does not - * means that the var is living through the block. + * mean that the var can live through the block. * There is just room at the *end* */ if (len < n_regs) { - DBG((dbg, DBG_GLOBAL, "\t%2Dthe end set has %d free slots\n", - level, n_regs - len)); + DBG((dbg, DBG_GLOBAL, "\t%2Dthe end set has %d free slots\n", level, n_regs - len)); slot = len; } else { - for (i = 0; i < len; ++i) - if (end->vals[i].version < ges->version) + for (i = 0; i < len; ++i) { + unsigned ver = workset_get_version(end, i); + if (ver_is_younger(ver, ges->version)) break; + } if (i < len) { DBG((dbg, DBG_GLOBAL, "\t%2D%+F (slot %d) can be erased from the end set\n", @@ -786,65 +985,103 @@ static double can_make_available_at_end(global_end_state_t *ges, ir_node *bl, ir } } + /* + * finally there is some room. we can at least reload the value. + * but we will try to let or live through anyhow. + */ if (slot >= 0) { - rollback_info_t rb = trans_begin(ges); + irn_action_t *vs = new_irn_action(ges, irn, bi->bl); + block_state_t *bs = new_block_state(ges, bi); + workset_t *end = bs->end_state; ir_node *ins_before = block_info_get_last_ins(bi); double reload_here = be_get_reload_costs(bi->bel->senv, irn, ins_before); - double bring_in = bi->pressure < n_regs ? can_bring_in(ges, bl, irn, level + 1) : HUGE_VAL; + int pressure_ok = bs->pressure < n_regs; - DBG((dbg, DBG_GLOBAL, "\t%2Dthere is a free slot. capacity=%d, reload here=%f, bring in=%f\n", - level, n_regs - bi->pressure, reload_here, bring_in)); + if (reload_here < bi->reload_cost) + reload_here = 0.0; /* - * reloading here pays off; bringing the value in from elsewhere - * is too expensive, hence we drop that search by resetting - * the gauge. + * No matter what we do, the value will be in the end set + * if the block from now on (of course only regarding the + * current state). Enter it and set the new length + * appropriately. */ - if (reload_here <= bring_in) { - trans_rollback(ges, &rb); - bes->costs = reload_here; - bes->reload_at_end = 1; - } else { - bes->live_through = 1; - bes->costs = bring_in; + end->vals[slot].irn = irn; + workset_set_version(end, slot, ges->version); + workset_set_length(end, MAX(workset_get_length(end), slot + 1)); + + vs->act = irn_act_reload; + res = reload_here; + + DBG((dbg, DBG_GLOBAL, "\t%2Dthere is a free slot. capacity=%d, reload here=%f, pressure %s\n", + level, n_regs - bs->pressure, reload_here, pressure_ok ? "ok" : "insufficient")); + + + /* look if we can bring the value in. */ + if (pressure_ok && reload_here > 0.0) { + rollback_info_t rb = trans_begin(ges); + double new_limit = MIN(reload_here, limit); + + vs->act = irn_act_live_through; + bs->pressure += 1; + res = can_bring_in(ges, bl, irn, new_limit, level + 1); + + /* + * if bring in is too expensive re-adjust the pressure + * and roll back the state + */ + if (res >= reload_here) { + bs->pressure -= 1; + vs->act = irn_act_reload; + trans_rollback(ges, &rb); + res = reload_here; + } } - end->vals[slot].irn = irn; - end->vals[slot].version = ges->version; - end->len = MAX(end->len, slot + 1); + + DBG((dbg, DBG_GLOBAL, "\t%2D%s\n", level, + vs->act == irn_act_reload ? "reloading" : "bringing in")); } } end: - DBG((dbg, DBG_GLOBAL, "\t%2D-> %f\n", level, bes->costs)); - return bes->costs; + DBG((dbg, DBG_GLOBAL, "\t%2D-> %f\n", level, res)); + return res; } -static double can_bring_in(global_end_state_t *ges, ir_node *bl, ir_node *irn, int level) +static double can_bring_in(global_end_state_t *ges, ir_node *bl, ir_node *irn, double limit, int level) { + belady_env_t *env = ges->env; double glob_costs = HUGE_VAL; - DBG((dbg, DBG_GLOBAL, "\t%2Dcan bring in for %+F at block %+F\n", level, irn, bl)); + DBG((dbg, DBG_GLOBAL, "\t%2Dcan bring in (max %f) for %+F at block %+F\n", level, limit, irn, bl)); if (is_transport_in(bl, irn)) { int i, n = get_irn_arity(bl); - ir_node **nodes = alloca(get_irn_arity(bl) * sizeof(nodes[0])); rollback_info_t rb = trans_begin(ges); - glob_costs = 0.0; for (i = 0; i < n; ++i) { ir_node *pr = get_Block_cfgpred_block(bl, i); ir_node *op = is_local_phi(bl, irn) ? get_irn_n(irn, i) : irn; - double c = can_make_available_at_end(ges, pr, op, level + 1); + double c; - if (c >= HUGE_VAL) { - trans_rollback(ges, &rb); + /* + * there might by Unknowns as operands of Phis in that case + * we set the costs to zero, since they won't get spilled. + */ + if (arch_irn_consider_in_reg_alloc(env->cls, op)) + c = can_make_available_at_end(ges, pr, op, limit - glob_costs, level + 1); + else + c = 0.0; + + glob_costs += c; + + if (glob_costs >= limit) { glob_costs = HUGE_VAL; + trans_rollback(ges, &rb); goto end; } - - glob_costs += c; } } @@ -856,135 +1093,337 @@ end: static void materialize_and_commit_end_state(global_end_state_t *ges) { belady_env_t *env = ges->env; - unsigned i; + irn_action_t *ia; + block_state_t *bs; DBG((dbg, DBG_GLOBAL, "\tmaterializing\n")); - for (i = 0; i < ges->gauge; ++i) { - block_end_state_t *bes = &ges->end_info[i]; - block_info_t *bi = get_block_info(bes->bl); - int idx, end_pressure; - - DBG((dbg, DBG_GLOBAL, "\t\t%+F in %+F, cost %f through: %d, rel: %d\n", - bes->irn, bes->bl, bes->costs, bes->live_through, bes->reload_at_end)); - - /* insert the reload if the val was reloaded at the block's end */ - if (bes->reload_at_end) { - be_add_reload_at_end(env->senv, bes->irn, bes->bl, env->cls, 1); - DBG((dbg, DBG_GLOBAL, "\t\tadding reload of %+F at end of %+F\n", bes->irn, bes->bl)); - } - idx = workset_get_index(bes->end_state, bes->irn); + /* + * Perform all the variable actions. + */ + for (ia = ges->ia_top; ia != NULL; ia = ia->next) { + switch (ia->act) { + case irn_act_live_through: + { + block_info_t *bi = get_block_info(ia->bl); + bring_in_t *iter; + + if (is_local_phi(ia->bl, ia->irn)) { + bitset_add_irn(ges->succ_phis, ia->irn); + DBG((dbg, DBG_GLOBAL, "\t\tlive through phi kept alive: %+F\n", ia->irn)); + } - if (is_local_phi(bes->bl, bes->irn) && bes->live_through) - bitset_add_irn(ges->succ_phis, bes->irn); + list_for_each_entry_reverse(bring_in_t, iter, &bi->br_head, list) + ++iter->sect_pressure; + ++bi->front_pressure; + } + break; + case irn_act_reload: + be_add_reload_at_end(env->senv, ia->irn, ia->bl, env->cls, 1); + DBG((dbg, DBG_GLOBAL, "\t\tadding reload of %+F at end of %+F\n", ia->irn, ia->bl)); + break; + default: + DBG((dbg, DBG_GLOBAL, "\t\t%+F is in the end set of %+F\n", ia->irn, ia->bl)); + } + } - /* - * set the version number in the workset. - * That will mark this value as fixed in the end set - * and will prevent further investigations from removing - * it from there. - * Also "commit" the workset; - * by copying it back to the block's end workset. - */ - if (idx >= 0) { - DBG((dbg, DBG_GLOBAL, "\t\tcommiting workset of %+F with version %x\n", bes->bl, ges->version)); - bes->end_state->vals[idx].version = ges->version; - workset_copy(env, bi->ws_end, bes->end_state); + /* + * Commit the block end states + */ + for (bs = ges->bs_top; bs != NULL; bs = bs->next) { + block_info_t *bi = bs->bi; + + if (!bitset_is_set(ges->committed, bi->id)) { + DBG((dbg, DBG_GLOBAL, "\t\tcommiting workset of %+F with version %x\n", bi->bl, ges->version)); + // bes->bs->end_state->vals[idx].version = ges->version; + workset_copy(env, bi->ws_end, bs->end_state); + DBG((dbg, DBG_GLOBAL, "\t\told pressure: %d, new pressure: %d, end length: %d\n", + bi->pressure, bs->pressure, workset_get_length(bs->end_state))); + bi->pressure = bs->pressure; + bitset_set(ges->committed, bi->id); } + } - end_pressure = 0; - for (idx = workset_get_length(bes->end_state) - 1; idx >= 0; --idx) - if (bes->end_state->vals[idx].version >= ges->version) - end_pressure += 1; + /* clear the committed bitset. the next call is expecting it. */ + bitset_clear_all(ges->committed); +} - /* - * if the variable is live through the block, - * update the pressure indicator. - */ - DBG((dbg, DBG_GLOBAL, "\t\told pressure %d, ", bi->pressure)); +static ir_node *better_spilled_here(const bring_in_t *br) +{ + const block_info_t *bi = br->bi; + double spill_ef = get_block_info(get_nodes_block(br->irn))->exec_freq; + + /* + * If the bring in node is a phi in the bring in block, + * we look at all definitions and sum up their execution frequencies, + * since spills will be placed there. + * (except for the case where an operand is also a phi which is spilled :-( ) + * If that cost is higher than spilling the phi in that block, we opt for + * bringing the phi into the block and spill it there. + */ + if (is_local_phi(bi->bl, br->irn)) { + ir_node *bl = bi->bl; + int i; + + spill_ef = 0.0; + for (i = get_Block_n_cfgpreds(bl) - 1; i >= 0; --i) + spill_ef += get_block_info(get_Block_cfgpred_block(bl, i))->exec_freq; + } - bi->pressure = MAX(bi->pressure + bes->live_through, end_pressure); + return bi->exec_freq < spill_ef ? sched_prev(bi->first_non_in) : NULL; +} - DBG((dbg, DBG_GLOBAL, "new pressure: %d, end pressure: %d, end length: %d\n", - bi->pressure, end_pressure, workset_get_length(bes->end_state))); +static int get_max_pressure_so_far(const block_info_t *bi, const bring_in_t *br) +{ + const struct list_head *l; + int res = INT_MIN; + assert(br->bi == bi); + for (l = &br->list; l != &bi->br_head; l = l->prev) { + br = list_entry(l, bring_in_t, list); + res = MAX(res, br->sect_pressure); } + + /* finally consider the front pressure distance and add the reference line (the global block pressure) */ + return MAX(res, bi->front_pressure); +} + +#define block_last_bring_in(bi) list_entry((bi)->br_head.prev, bring_in_t, list) + +#if 0 +static int get_block_max_pressure(const block_info_t *bi) +{ + int max = get_max_pressure_so_far(bi, block_last_bring_in(bi)); + return MAX(bi->pressure, max); } +#endif /** - * Examine all irns which shall be in regs at the beginning of the - * block. + * Try to bring a variable into a block. + * @param ges The state of all end sets. + * @param block The block. + * @param irn The variable. */ -static void fix_block_borders(global_end_state_t *ges, ir_node *block) { - block_info_t *bi = get_block_info(block); - belady_env_t *env = ges->env; +static void optimize_variable(global_end_state_t *ges, bring_in_t *br) +{ + block_info_t *bi = br->bi; + ir_node *irn = br->irn; + ir_node *bl = bi->bl; + belady_env_t *env = ges->env; + void *reset_level = obstack_base(&ges->obst); + int k = env->n_regs; + int pressure_upto_use = get_max_pressure_so_far(bi, br); + int front_pressure = bi->front_pressure; + ir_node *better_spill_loc = NULL; - ir_node *irn; - int i; + assert(front_pressure <= k); + assert(pressure_upto_use <= k); - DBG((dbg, DBG_GLOBAL, "fixing block borders at %+F (%f)\n", block, bi->exec_freq)); + DBG((dbg, DBG_GLOBAL, "fixing %+F at %+F (%f), front pr: %d, pr to use: %d, first use: %x\n", + irn, bl, bi->exec_freq, front_pressure, pressure_upto_use, br->first_use)); - /* process all variables which shall be in a reg at - * the beginning of the block in the order of the next use. */ - workset_foreach(bi->entrance_reg, irn, i) { - double local_costs = be_get_reload_costs(env->senv, irn, bi->first_non_in); - double bring_in_costs; + // assert(!is_local_phi(bl, irn) || !bitset_contains_irn(ges->succ_phis, irn)); - /* reset the gauge and create a new version. */ - ges->gauge = 0; - ges->version -= 1; + /* + * if we cannot bring the value to the use, let's see if it would be worthwhile + * to bring the value to the beginning of the block to have a better spill + * location. + * + * better _spilled_here will return a node where the value can be spilled after + * or NULL if this block does not provide a better spill location. + */ +#if 1 + if (pressure_upto_use >= k && front_pressure < k && !bitset_contains_irn(env->spilled, irn)) + better_spill_loc = better_spilled_here(br); +#endif - DBG((dbg, DBG_GLOBAL, "\ttrans in var %+F, version %x\n", irn, ges->version)); + /* + * If either we can bring the value to the use or we should try + * to bring it here to do the spill here, let's try to bring it in. + */ + if (better_spill_loc || pressure_upto_use < k) { + block_state_t *bs; + double bring_in_costs, local_costs; + rollback_info_t trans; + int pressure_inc; + + /* process all variables which shall be in a reg at + * the beginning of the block in the order of the next use. */ + local_costs = be_get_reload_costs(env->senv, irn, br->first_use); + + /* reset the lists */ + ges->bs_top = NULL; + ges->ia_top = NULL; + + /* if the variable will live into this block, we must adapt the pressure. + * The new pressure is the MAX of: + * 1) the total block pressure + * 2) the pressure so far + the front pressure increase + 1 + * + * If the second is larger than the first, + * we have to increment the total block pressure and hence + * save the old pressure to restore it in case of failing to + * bring the variable into the block in a register. + */ + trans = trans_begin(ges); + bs = new_block_state(ges, bi); + pressure_inc = MAX(bs->pressure, better_spill_loc ? front_pressure : pressure_upto_use + 1); + bs->pressure = pressure_inc; - bring_in_costs = can_bring_in(ges, block, irn, 1); - DBG((dbg, DBG_GLOBAL, "\tbring in: %f, local: %f", bring_in_costs, local_costs)); + assert(bi->pressure <= k); + DBG((dbg, DBG_GLOBAL, "\ttrans in var %+F, version %x\n", irn, ges->version)); + bring_in_costs = can_bring_in(ges, bl, irn, local_costs, 1); + DBG((dbg, DBG_GLOBAL, "\tbring in: %f, local: %f\n", bring_in_costs, local_costs)); /* - * we were not able to let the value arrive - * in a register at the entrance of the block - * or it is too costly, so we have to do the reload locally + * Following cases can now occur: + * 1) There is room and costs ok + * 2) Cannot bring to use but can spill at begin and costs are ok + * 3) neither of both worked. + * + * following actions can be taken: + * a) commit changes + * b) mark phi as succeeded if node was phi + * c) insert reload at use location + * d) give a spill location hint + * + * this is the case/action matrix + * | abcd + * --+---------- + * 1 | XX + * 2 | XXXX + * 3 | X */ - if (bring_in_costs > local_costs) { - DBG((dbg, DBG_GLOBAL, " -> do local reload\n")); - be_add_reload(env->senv, irn, bi->first_non_in, env->cls, 1); - } else { + /* the costs were acceptable... */ + if (bring_in_costs < local_costs) { + int check = 0; + bring_in_t *iter; + /* + * case 1 and first part of case 2: + * commit all the changes done. this manifests the bring-in action. * if the transport-in was a phi (that is actually used in block) - * it will no longer remain and we have to spill it completely. + * mark it in the succ_phis set to *not* phi spill it. */ - if (is_local_phi(block, irn)) + materialize_and_commit_end_state(ges); + if (is_local_phi(bl, irn)) bitset_add_irn(ges->succ_phis, irn); - DBG((dbg, DBG_GLOBAL, " -> do remote reload\n")); - materialize_and_commit_end_state(ges); + DBG((dbg, DBG_GLOBAL, "\t-> bring it in.", pressure_inc)); + + /* second half of case 2 */ + if (pressure_upto_use >= k) { + DBG((dbg, DBG_GLOBAL, "\t-> use blocked. local reload: %+F, try spill at: %+F\n", + br->first_use, better_spill_loc)); + be_add_reload(env->senv, irn, br->first_use, env->cls, 1); + be_add_spill(env->senv, irn, better_spill_loc); + ir_nodeset_insert(env->extra_spilled, irn); + } + + /* + * go from the last bring in use to the first and add all the variables + * which additionally live through the block to their pressure. + * at the point were the actually treated use is, we have to increase + * the pressure by one more as the brought in value starts to count. + * Finally, adjust the front pressure as well. + */ + pressure_inc = 0; + list_for_each_entry_reverse(bring_in_t, iter, &bi->br_head, list) { + if (iter == br) + pressure_inc += pressure_upto_use < k; + iter->sect_pressure += pressure_inc; + check = MAX(check, iter->sect_pressure); + DBG((dbg, DBG_GLOBAL, "\tinc section pressure of %+F by %d to %d\n", iter->first_use, pressure_inc, iter->sect_pressure)); + } + bi->front_pressure += pressure_inc; + assert(MAX(check, bi->front_pressure) <= bi->pressure); + DBG((dbg, DBG_GLOBAL, "\t-> result: p: %d, fp: %d\n", bi->pressure, bi->front_pressure)); } - DBG((dbg, DBG_GLOBAL, "\n")); + /* case 3: nothing worked. insert normal reload and rollback. */ + else { + DBG((dbg, DBG_GLOBAL, "\t-> bring in was too expensive. local reload: %+F\n", br->first_use)); + be_add_reload(env->senv, irn, br->first_use, env->cls, 1); + bitset_add_irn(env->spilled, irn); + trans_rollback(ges, &trans); + } + } + + /* there was no opportunity for optimization at all. reload and be sad ... */ + else { + DBG((dbg, DBG_GLOBAL, "\t-> can\'t do anything but reload before %+F\n", br->first_use)); + be_add_reload(env->senv, irn, br->first_use, env->cls, 1); + bitset_add_irn(env->spilled, irn); + } + + DBG((dbg, DBG_GLOBAL, "\n")); + + /* reset the obstack and create a new version. */ + obstack_free(&ges->obst, reset_level); + ges->version = ver_make_newer(ges->version); +} + +static bring_in_t **determine_global_order(belady_env_t *env) +{ + bring_in_t **res; + bring_in_t *elm; + int i, n = 0; + + for (i = env->n_blocks - 1; i >= 0; --i) { + block_info_t *bi = get_block_info(env->blocks[i]); + list_for_each_entry(bring_in_t, elm, &bi->br_head, list) { + obstack_ptr_grow(&env->ob, elm); + n += 1; + } } + + obstack_ptr_grow(&env->ob, NULL); + res = obstack_finish(&env->ob); + qsort(res, n, sizeof(res[0]), bring_in_cmp); + return res; } + + + static void global_assign(belady_env_t *env) { + ir_nodeset_iterator_t iter; global_end_state_t ges; - int i; - - obstack_init(&ges.obst); - ges.gauge = 0; - ges.env = env; - ges.version = -1; - ges.end_info = NEW_ARR_F(block_end_state_t, env->n_blocks); - ges.succ_phis = bitset_irg_obstack_alloc(&env->ob, env->irg); + bring_in_t **br; + ir_node *irn; + int i, j; /* * sort the blocks according to execution frequency. * That's not necessary for belady() but for the global pass later on. */ - qsort(env->blocks, env->n_blocks, sizeof(env->blocks[0]), block_freq_gt); + qsort(env->blocks, env->n_blocks, sizeof(env->blocks[0]), block_freq_dfs_gt); + + memset(&ges, 0, sizeof(ges)); + obstack_init(&ges.obst); + ges.env = env; + ges.version = ver_make_newer(ver_oldest); + ges.succ_phis = bitset_irg_obstack_alloc(&ges.obst, env->irg); + ges.committed = bitset_obstack_alloc(&ges.obst, env->n_blocks); + ges.bs_tops = OALLOCN(&ges.obst, block_state_t*, env->n_blocks); + ges.bs_tops_vers = OALLOCN(&ges.obst, unsigned, env->n_blocks); + + /* invalidate all state stack pointer versions */ + for (i = 0; i < env->n_blocks; ++i) { + block_info_t *bi = get_block_info(env->blocks[i]); + ges.bs_tops_vers[i] = ver_oldest; + + /* Set all block end sets entries to the youngest version */ + for (j = workset_get_length(bi->ws_end) - 1; j >= 0; --j) + workset_set_version(bi->ws_end, j, ver_youngest); + } - for (i = 0; i < env->n_blocks; ++i) - fix_block_borders(&ges, env->blocks[i]); + /* determine order and optimize them */ + for (br = determine_global_order(env); *br; ++br) + optimize_variable(&ges, *br); /* * Now we spill phis which cannot be kept since they were replaced @@ -998,13 +1437,15 @@ static void global_assign(belady_env_t *env) if (!is_Phi(irn)) break; - if (arch_irn_consider_in_reg_alloc(env->arch, env->cls, irn) + if (arch_irn_consider_in_reg_alloc(env->cls, irn) && !bitset_contains_irn(ges.succ_phis, irn)) be_spill_phi(env->senv, irn); } } - DEL_ARR_F(ges.end_info); + /* check dominance for specially spilled nodes. */ + foreach_ir_nodeset (env->extra_spilled, irn, iter) + make_spill_locations_dominate_irn(env->senv, irn); } static void collect_blocks(ir_node *bl, void *data) @@ -1014,72 +1455,88 @@ static void collect_blocks(ir_node *bl, void *data) obstack_ptr_grow(&env->ob, bl); } -void be_spill_belady_spill_env2(be_irg_t *birg, const arch_register_class_t *cls, spill_env_t *spill_env) { - ir_graph *irg = be_get_birg_irg(birg); +/** + * Do spilling for a register class on a graph using the belady heuristic. + * In the transformed graph, the register pressure never exceeds the number + * of available registers. + * + * @param irg The graph + * @param cls The register class to spill + */ +static void be_spill_belady(ir_graph *irg, const arch_register_class_t *cls) +{ belady_env_t env; int i, n_regs; /* some special classes contain only ignore regs, nothing to do then */ - n_regs = cls->n_regs - be_put_ignore_regs(birg, cls, NULL); - if(n_regs == 0) + n_regs = cls->n_regs - be_put_ignore_regs(irg, cls, NULL); + if (n_regs == 0) return; be_clear_links(irg); /* init belady env */ obstack_init(&env.ob); - env.irg = irg; - env.arch = birg->main_env->arch_env; - env.cls = cls; - env.lv = be_get_birg_liveness(birg); - env.n_regs = n_regs; - env.ws = new_workset(&env, &env.ob); - env.senv = spill_env ? spill_env : be_new_spill_env(birg); - env.ef = be_get_birg_exec_freq(birg); - env.n_blocks = 0; + env.irg = irg; + env.arch = be_get_irg_arch_env(irg); + env.cls = cls; + env.lv = be_get_irg_liveness(irg); + env.dfs = env.lv->dfs; + env.n_regs = n_regs; + env.ws = new_workset(&env, &env.ob); + env.senv = be_new_spill_env(irg); + env.ef = be_get_irg_exec_freq(irg); + env.spilled = bitset_irg_obstack_alloc(&env.ob, irg); + env.extra_spilled = ir_nodeset_new(64); + env.n_blocks = 0; irg_block_walk_graph(irg, NULL, collect_blocks, &env); obstack_ptr_grow(&env.ob, NULL); env.blocks = obstack_finish(&env.ob); + /* renumbering in the blocks gives nicer debug output as number are smaller. */ +#ifdef DEBUG_libfirm + for (i = 0; i < env.n_blocks; ++i) + sched_renumber(env.blocks[i]); +#endif + /* Fix high register pressure in blocks with belady algorithm */ for (i = 0; i < env.n_blocks; ++i) - belady(env.blocks[i], &env); + belady(&env, i); global_assign(&env); + /* check dominance for specially spilled nodes. */ + { + ir_nodeset_iterator_t iter; + ir_node *irn; + + foreach_ir_nodeset (env.extra_spilled, irn, iter) + make_spill_locations_dominate_irn(env.senv, irn); + } + /* Insert spill/reload nodes into the graph and fix usages */ be_insert_spills_reloads(env.senv); /* clean up */ - if(spill_env == NULL) - be_delete_spill_env(env.senv); + be_delete_spill_env(env.senv); + ir_nodeset_del(env.extra_spilled); obstack_free(&env.ob, NULL); } - -/** - * Do spilling for a register class on a graph using the belady heuristic. - * In the transformed graph, the register pressure never exceeds the number - * of available registers. - * - * @param birg The backend graph - * @param cls The register class to spill - */ -static void be_spill_belady(be_irg_t *birg, const arch_register_class_t *cls) { - be_spill_belady_spill_env2(birg, cls, NULL); -} - - +BE_REGISTER_MODULE_CONSTRUCTOR(be_init_spillbelady2); void be_init_spillbelady2(void) { + lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be"); + lc_opt_entry_t *spill_grp = lc_opt_get_grp(be_grp, "spill"); + lc_opt_entry_t *bel2_grp = lc_opt_get_grp(spill_grp, "belady2"); + static be_spiller_t belady_spiller = { be_spill_belady }; + lc_opt_add_table(bel2_grp, options); be_register_spiller("belady2", &belady_spiller); FIRM_DBG_REGISTER(dbg, "firm.be.spill.belady2"); } - -BE_REGISTER_MODULE_CONSTRUCTOR(be_init_spillbelady2);