X-Git-Url: http://nsz.repo.hu/git/?a=blobdiff_plain;f=ir%2Fbe%2Fbespillbelady2.c;h=43f72e51ba018b0eece3d2661d32cac00a35131e;hb=c23b55879df97f49fc6f1e95651f9f28a980b620;hp=f6da9de8a35beb643abb2ca53db9763e61d71171;hpb=1443c53815cc0b1f97a33673113778cba74790f6;p=libfirm diff --git a/ir/be/bespillbelady2.c b/ir/be/bespillbelady2.c index f6da9de8a..43f72e51b 100644 --- a/ir/be/bespillbelady2.c +++ b/ir/be/bespillbelady2.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 1995-2007 University of Karlsruhe. All right reserved. + * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved. * * This file is part of libFirm. * @@ -53,11 +53,11 @@ #include "ircons_t.h" #include "irprintf.h" #include "execfreq.h" +#include "dfs_t.h" #include "xmalloc.h" #include "beutil.h" #include "bearch_t.h" -#include "bespillbelady.h" #include "besched_t.h" #include "beirgmod.h" #include "belive_t.h" @@ -67,6 +67,10 @@ #include "beloopana.h" #include "beirg_t.h" #include "bemodule.h" +#include "bespill.h" + +#include "lc_opts.h" +#include "lc_opts_enum.h" #define DBG_SPILL 1 #define DBG_WSETS 2 @@ -78,8 +82,22 @@ #define DBG_WORKSET 128 #define DBG_GLOBAL 256 -#define DEAD UINT_MAX -#define LIVE_END (DEAD-1) +#define ALREADY_SPILLED_FACTOR 2 + +#define DEAD UINT_MAX +#define LIVE_END (DEAD-1) +#define REMAT_DIST (DEAD-2) + +static int already_spilled_factor = 2; +static int remat_live_range_ext = 1; +static int global_pass_enabled = 1; + +static const lc_opt_table_entry_t options[] = { + LC_OPT_ENT_INT ("asf", "already spilled factor", &already_spilled_factor), + LC_OPT_ENT_BOOL ("remat", "rematerializable ops get infinite long live ranges", &remat_live_range_ext), + LC_OPT_ENT_BOOL ("global", "enable/disable the global pass", &global_pass_enabled), + LC_OPT_LAST +}; DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;) @@ -103,19 +121,22 @@ typedef struct _workset_t { typedef struct _belady_env_t { struct obstack ob; ir_graph *irg; + const dfs_t *dfs; const arch_env_t *arch; const arch_register_class_t *cls; be_lv_t *lv; ir_exec_freq *ef; - ir_node **blocks; /**< Array of all blocks. */ - int n_blocks; /**< Number of blocks in the graph. */ - int n_regs; /**< number of regs in this reg-class */ - workset_t *ws; /**< the main workset used while processing a block. ob-allocated */ - ir_node *instr; /**< current instruction */ - unsigned instr_nr; /**< current instruction number (relative to block start) */ + ir_node **blocks; /**< Array of all blocks. */ + int n_blocks; /**< Number of blocks in the graph. */ + int n_regs; /**< number of regs in this reg-class */ + workset_t *ws; /**< the main workset used while processing a block. ob-allocated */ + ir_node *instr; /**< current instruction */ + int instr_nr; /**< current instruction number (relative to block start) */ - spill_env_t *senv; /**< see bespill.h */ + spill_env_t *senv; /**< see bespill.h */ + bitset_t *spilled; /**< bitset to keep all the irns which have already been spilled. */ + ir_nodeset_t *extra_spilled; /** All nodes for which a special spill location has been computed. */ } belady_env_t; @@ -185,7 +206,7 @@ static INLINE void workset_insert(belady_env_t *env, workset_t *ws, ir_node *val int i; /* check for current regclass */ if (!arch_irn_consider_in_reg_alloc(env->arch, env->cls, val)) { - DBG((dbg, DBG_WORKSET, "Skipped %+F\n", val)); + // DBG((dbg, DBG_WORKSET, "Skipped %+F\n", val)); return; } @@ -247,30 +268,32 @@ static INLINE int workset_get_index(const workset_t *ws, const ir_node *val) { #define workset_sort(ws) qsort((ws)->vals, (ws)->len, sizeof((ws)->vals[0]), loc_compare); #define workset_contains(ws, n) (workset_get_index(ws, n) >= 0) +typedef struct _bring_in_t bring_in_t; + typedef struct _block_info_t { belady_env_t *bel; - const ir_node *bl; - workset_t *ws_start, *ws_end; - ir_phase next_uses; + ir_node *bl; int id; + ir_phase next_uses; + workset_t *ws_end; /**< The end set after the local belady pass. */ + double exec_freq; /**< The execution frequency of this block. */ + double reload_cost; /**< Cost of a reload in this block. */ ir_node *first_non_in; /**< First node in block which is not a phi. */ ir_node *last_ins; /**< The instruction before which end of block reloads will be inserted. */ - workset_t *entrance_reg; /**< That set will contain all values - transported into the block which - are used before they are displaced. - That means, we later have to care to - bring them into the block in a register - or reload them at the entry of the block. */ + int pressure; /**< The amount of registers which remain free + in this block. This capacity can be used to let + global variables, transported into other blocks, + live through this block. */ - int pressure; /**< The amount of registers which remain free - in this block. This capacity can be used to let - global variables, transported into other blocks, - live through this block. */ + int front_pressure; /**< The pressure right before the first + real (non-phi) node. At the beginning + of the global pass, this is 0. */ + struct list_head br_head; /**< List head for all bring_in variables. */ + int free_at_jump; /**< registers free at jump. */ - double exec_freq; /**< The execution frequency of this block. */ } block_info_t; static INLINE void *new_block_info(belady_env_t *bel, int id) @@ -283,8 +306,10 @@ static INLINE void *new_block_info(belady_env_t *bel, int id) res->bel = bel; res->bl = bl; res->id = id; - res->entrance_reg = new_workset(bel, &bel->ob); res->exec_freq = get_block_execfreq(bel->ef, bl); + res->reload_cost = bel->arch->isa->reload_cost * res->exec_freq; + res->free_at_jump = bel->n_regs; + INIT_LIST_HEAD(&res->br_head); set_irn_link(bl, res); return res; } @@ -305,12 +330,13 @@ typedef struct _next_use_t { in the block. Needed to identify transport in values for the global pass. */ - int step; /**< The time step of the use. */ + sched_timestep_t step; /**< The time step of the use. */ + ir_node *irn; struct _next_use_t *next; /**< The next use int this block or NULL. */ } next_use_t; -static void *next_use_init(ir_phase *phase, ir_node *irn, void *old) +static void *next_use_init(ir_phase *phase, const ir_node *irn, void *old) { (void) phase; (void) irn; @@ -322,9 +348,11 @@ static void build_next_uses(block_info_t *bi) { ir_node *irn; + sched_renumber(bi->bl); + phase_init(&bi->next_uses, "next uses", bi->bel->irg, PHASE_DEFAULT_GROWTH, next_use_init, NULL); sched_foreach_reverse(bi->bl, irn) { - int i, step = sched_get_time_step(irn); + int i; if (is_Phi(irn)) break; @@ -334,13 +362,15 @@ static void build_next_uses(block_info_t *bi) next_use_t *curr = phase_get_irn_data(&bi->next_uses, op); next_use_t *use = phase_alloc(&bi->next_uses, sizeof(use[0])); - assert(step >= 0); use->is_first_use = 1; - use->step = step; + use->step = sched_get_time_step(irn); use->next = curr; + use->irn = irn; - if (curr) + if (curr) { curr->is_first_use = 0; + assert(curr->step >= use->step); + } phase_set_irn_data(&bi->next_uses, op, use); } @@ -357,12 +387,124 @@ static INLINE void advance_current_use(block_info_t *bi, const ir_node *irn) phase_set_irn_data(&bi->next_uses, irn, use->next); } +static __attribute__((unused)) int block_freq_gt(const void *a, const void *b) +{ + const ir_node * const *p = a; + const ir_node * const *q = b; + block_info_t *pi = get_block_info(*p); + block_info_t *qi = get_block_info(*q); + double diff = qi->exec_freq - pi->exec_freq; + return (diff > 0) - (diff < 0); +} + +static int block_freq_dfs_gt(const void *a, const void *b) +{ + const ir_node * const *p = a; + const ir_node * const *q = b; + block_info_t *pi = get_block_info(*p); + block_info_t *qi = get_block_info(*q); + double diff; + + if ((pi->exec_freq > 1.0 && qi->exec_freq > 1.0) + || (pi->exec_freq <= 1.0 && qi->exec_freq <= 1.0)) { + + const dfs_t *dfs = pi->bel->dfs; + int pp = dfs_get_post_num(dfs, pi->bl); + int pq = dfs_get_post_num(dfs, qi->bl); + return pq - pp; + } + + diff = qi->exec_freq - pi->exec_freq; + return (diff > 0) - (diff < 0); +} + +/* + ____ _ ___ + | __ ) _ __(_)_ __ __ _ |_ _|_ __ + | _ \| '__| | '_ \ / _` | | || '_ \ + | |_) | | | | | | | (_| | | || | | | + |____/|_| |_|_| |_|\__, | |___|_| |_| + |___/ + + Data structures to represent bring in variables. +*/ + +struct _bring_in_t { + ir_node *irn; /**< The node to bring in. */ + block_info_t *bi; /**< The block to which bring in should happen. */ + int pressure_so_far; /**< The maximal pressure till the first use of irn in bl. */ + ir_node *first_use; /**< The first user of irn in bl. */ + sched_timestep_t use_step; /**< Schedule sttep of the first use. */ + + int is_remat : 1; /**< Is rematerializable. */ + int sect_pressure; /**< Offset to maximum pressure in block. */ + struct list_head list; + struct list_head sect_list; + bring_in_t *sect_head; +}; + +static INLINE bring_in_t *new_bring_in(block_info_t *bi, ir_node *irn, const next_use_t *use) +{ + bring_in_t *br = obstack_alloc(&bi->bel->ob, sizeof(br[0])); + + br->irn = irn; + br->bi = bi; + br->first_use = use->irn; + br->use_step = use->step; + br->is_remat = be_is_rematerializable(bi->bel->senv, irn, use->irn); + br->pressure_so_far = bi->pressure; + br->sect_pressure = bi->front_pressure; + br->sect_head = br; + + INIT_LIST_HEAD(&br->list); + INIT_LIST_HEAD(&br->sect_list); + list_add_tail(&br->list, &bi->br_head); + return br; +} + +static int bring_in_cmp(const void *a, const void *b) +{ + const bring_in_t *p = *(const bring_in_t * const *) a; + const bring_in_t *q = *(const bring_in_t * const *) b; + double fp, fq; + + /* if one of both is a remat node, it will be done after the other. */ + if (p->is_remat != q->is_remat) + return p->is_remat - q->is_remat; + + /* in the same block, the one further in the front has to be processed first! + * Otherwise the front_pressure 'trick' is not exact. */ + if (p->bi == q->bi) + return p->use_step - q->use_step; + + fp = p->bi->exec_freq; + fq = q->bi->exec_freq; + + /* if both have the same frequency, inspect the frequency of the definition */ + if (fp == fq) { + double fdp = get_block_info(get_nodes_block(p->irn))->exec_freq; + double fdq = get_block_info(get_nodes_block(q->irn))->exec_freq; + + /* if the defs of both have the same freq, we go for reverse dfs post order. */ + if (fdp == fdq) { + const dfs_t *dfs = p->bi->bel->dfs; + int pp = dfs_get_post_num(dfs, p->bi->bl); + int pq = dfs_get_post_num(dfs, q->bi->bl); + return pq - pp; + } + + return (fdq > fdp) - (fdq < fdp); + } + + return (fq > fp) - (fq < fp); +} + static INLINE unsigned get_curr_distance(block_info_t *bi, const ir_node *irn, int is_usage) { - belady_env_t *env = bi->bel; - next_use_t *use = get_current_use(bi, irn); - int curr_step = sched_get_time_step(env->instr); - int flags = arch_irn_get_flags(env->arch, irn); + belady_env_t *env = bi->bel; + sched_timestep_t curr_step = sched_get_time_step(env->instr); + next_use_t *use = get_current_use(bi, irn); + int flags = arch_irn_get_flags(env->arch, irn); assert(!(flags & arch_irn_flags_ignore)); @@ -374,8 +516,18 @@ static INLINE unsigned get_curr_distance(block_info_t *bi, const ir_node *irn, i use = use->next; if (use) { + unsigned res = use->step - curr_step; + assert(use->step >= curr_step); - return use->step - curr_step; + + if (res != 0) { + if (remat_live_range_ext && be_is_rematerializable(env->senv, irn, use->irn)) + res = REMAT_DIST; + else if (bitset_contains_irn(env->spilled, irn)) + res *= already_spilled_factor; + } + + return res; } return be_is_live_end(env->lv, bi->bl, irn) ? LIVE_END : DEAD; @@ -399,7 +551,7 @@ static INLINE int is_local_phi(const ir_node *bl, const ir_node *irn) */ static INLINE int is_transport_in(const ir_node *bl, const ir_node *irn) { - return is_local_phi(bl, irn) || get_nodes_block(irn) != bl; + return get_nodes_block(irn) != bl || is_Phi(irn); } /** @@ -427,10 +579,9 @@ static void displace(block_info_t *bi, workset_t *new_vals, int is_usage) { /* mark value as used */ if (! workset_contains(ws, val)) { - DBG((dbg, DBG_DECIDE, " insert %+F\n", val)); + DBG((dbg, DBG_DECIDE, "\t\tinsert %+F\n", val)); to_insert[demand++] = val; if (is_usage) { - int insert_reload = 1; next_use_t *use = get_current_use(bi, val); /* @@ -440,28 +591,30 @@ static void displace(block_info_t *bi, workset_t *new_vals, int is_usage) { * entrance to here or not. */ assert(use); - assert(sched_get_time_step(env->instr) == use->step); + assert(sched_get_time_step(env->instr) == (int) use->step); if (is_transport_in(bi->bl, val) && use->is_first_use) { - DBG((dbg, DBG_DECIDE, "entrance node %+F, pressure %d:\n", val, bi->pressure)); - if (bi->pressure < env->n_regs) { - workset_insert(env, bi->entrance_reg, val); - insert_reload = 0; - ++bi->pressure; - DBG((dbg, DBG_DECIDE, "... no reload. must be considered at block start\n")); - } + bring_in_t *bri = new_bring_in(bi, val, use); + bri->first_use = env->instr; + + /* reset the section pressure, since a new section starts. */ + bi->front_pressure = 0; + + DBG((dbg, DBG_DECIDE, "\t\tbring in node %+F, pressure %d:\n", val, bi->pressure)); + DBG((dbg, DBG_DECIDE, "\t\tno reload. must be considered at block start\n")); } - if (insert_reload) { - DBG((dbg, DBG_SPILL, "Reload %+F before %+F\n", val, env->instr)); + else { + bitset_add_irn(env->spilled, val); + DBG((dbg, DBG_SPILL, "\t\tReload %+F before %+F\n", val, env->instr)); be_add_reload(env->senv, val, env->instr, env->cls, 1); } } } else { - assert(is_usage || "Defined value already in workset?!?"); - DBG((dbg, DBG_DECIDE, " skip %+F\n", val)); + assert(is_usage && "Defined value already in workset?!?"); + DBG((dbg, DBG_DECIDE, "\t\tskip %+F\n", val)); } } - DBG((dbg, DBG_DECIDE, " demand = %d\n", demand)); + DBG((dbg, DBG_DECIDE, "\t\tdemand = %d\n", demand)); /* 2. Make room for at least 'demand' slots @@ -471,9 +624,7 @@ static void displace(block_info_t *bi, workset_t *new_vals, int is_usage) { /* Only make more free room if we do not have enough */ if (len > max_allowed) { - // int curr_step = sched_get_time_step(env->instr); - - DBG((dbg, DBG_DECIDE, " disposing %d values\n", len - max_allowed)); + DBG((dbg, DBG_DECIDE, "\t\tdisposing %d values\n", len - max_allowed)); /* get current next-use distance */ for (i = 0; i < ws->len; ++i) { @@ -498,7 +649,9 @@ static void displace(block_info_t *bi, workset_t *new_vals, int is_usage) { for (i = 0; i < demand; ++i) workset_insert(env, env->ws, to_insert[i]); - bi->pressure = MAX(bi->pressure, workset_get_length(env->ws)); + /* TODO: simplify this expression? */ + bi->pressure = MAX(bi->pressure, workset_get_length(env->ws)); + bi->front_pressure = MAX(bi->front_pressure, workset_get_length(env->ws)); } /** @@ -509,13 +662,12 @@ static void displace(block_info_t *bi, workset_t *new_vals, int is_usage) { static void belady(belady_env_t *env, int id) { block_info_t *block_info = new_block_info(env, id); const ir_node *block = block_info->bl; - void *obst_state = obstack_base(&env->ob); workset_t *new_vals; ir_node *irn; int iter; - DBG((dbg, DBG_WSETS, "Processing %+F...\n", block_info->bl)); + DBG((dbg, DBG_WSETS, "Belady on %+F\n", block_info->bl)); new_vals = new_workset(env, &env->ob); workset_clear(env->ws); @@ -533,11 +685,9 @@ static void belady(belady_env_t *env, int id) { /* projs are handled with the tuple value. * Phis are no real instr (see insert_starters()) * instr_nr does not increase */ - if (is_Proj(irn) || is_Phi(irn)) { - DBG((dbg, DBG_DECIDE, " ...%+F skipped\n", irn)); + if (is_Proj(irn) || is_Phi(irn)) continue; - } - DBG((dbg, DBG_DECIDE, " ...%+F\n", irn)); + DBG((dbg, DBG_DECIDE, "\t%+F\n", irn)); if (!block_info->first_non_in) block_info->first_non_in = irn; @@ -550,6 +700,7 @@ static void belady(belady_env_t *env, int id) { for(i = 0, arity = get_irn_arity(irn); i < arity; ++i) { workset_insert(env, new_vals, get_irn_n(irn, i)); } + DBG((dbg, DBG_DECIDE, "\t* uses\n")); displace(block_info, new_vals, 1); /* @@ -581,13 +732,20 @@ static void belady(belady_env_t *env, int id) { } else { workset_insert(env, new_vals, irn); } + DBG((dbg, DBG_DECIDE, "\t* defs\n")); displace(block_info, new_vals, 0); + if (is_op_forking(get_irn_op(env->instr))) { + for (i = get_irn_arity(env->instr) - 1; i >= 0; --i) { + ir_node *op = get_irn_n(env->instr, i); + block_info->free_at_jump -= arch_irn_consider_in_reg_alloc(env->arch, env->cls, op); + } + } + env->instr_nr++; } phase_free(&block_info->next_uses); - obstack_free(&env->ob, obst_state); /* Remember end-workset for this block */ block_info->ws_end = workset_clone(env, &env->ob, env->ws); @@ -595,6 +753,9 @@ static void belady(belady_env_t *env, int id) { workset_foreach(block_info->ws_end, irn, iter) DBG((dbg, DBG_WSETS, " %+F (%u)\n", irn, workset_get_time(block_info->ws_end, iter))); DBG((dbg, DBG_WSETS, "Max pressure in block: %d\n", block_info->pressure)); + + /* now, initialize the front pressure to 0. */ + block_info->front_pressure = 0; } /* @@ -616,16 +777,6 @@ static void belady(belady_env_t *env, int id) { #define ver_is_older(v, w) ((v) < (w)) #define ver_is_younger(v, w) ((v) > (w)) -static int block_freq_gt(const void *a, const void *b) -{ - const ir_node * const *p = a; - const ir_node * const *q = b; - block_info_t *pi = get_block_info(*p); - block_info_t *qi = get_block_info(*q); - double diff = qi->exec_freq - pi->exec_freq; - return (diff > 0) - (diff < 0); -} - enum { irn_act_none = 0, irn_act_reload, @@ -636,7 +787,7 @@ typedef struct _block_state_t { struct _block_state_t *next; struct _block_state_t *next_intern; block_info_t *bi; - unsigned pressure; + int pressure; workset_t *end_state; } block_state_t; @@ -667,7 +818,7 @@ typedef struct { irn_action_t *ia_top; } rollback_info_t; -static INLINE block_state_t *get_block_state(global_end_state_t *ges, block_info_t *bi) +static INLINE block_state_t *get_block_state(global_end_state_t *ges, const block_info_t *bi) { int id = bi->id; assert(!ver_is_younger(ges->bs_tops_vers[id], ges->version)); @@ -813,7 +964,7 @@ static double can_make_available_at_end(global_end_state_t *ges, ir_node *bl, ir */ { - int n_regs = bi->bel->n_regs; + int n_regs = bi->free_at_jump; int len = workset_get_length(end); int slot = -1; int i; @@ -853,7 +1004,10 @@ static double can_make_available_at_end(global_end_state_t *ges, ir_node *bl, ir workset_t *end = bs->end_state; ir_node *ins_before = block_info_get_last_ins(bi); double reload_here = be_get_reload_costs(bi->bel->senv, irn, ins_before); - int pressure_ok = bs->pressure < (unsigned) n_regs; + int pressure_ok = bs->pressure < n_regs; + + if (reload_here < bi->reload_cost) + reload_here = 0.0; /* * No matter what we do, the value will be in the end set @@ -871,8 +1025,9 @@ static double can_make_available_at_end(global_end_state_t *ges, ir_node *bl, ir DBG((dbg, DBG_GLOBAL, "\t%2Dthere is a free slot. capacity=%d, reload here=%f, pressure %s\n", level, n_regs - bs->pressure, reload_here, pressure_ok ? "ok" : "insufficient")); + /* look if we can bring the value in. */ - if (pressure_ok) { + if (pressure_ok && reload_here > 0.0) { rollback_info_t rb = trans_begin(ges); double new_limit = MIN(reload_here, limit); @@ -892,6 +1047,7 @@ static double can_make_available_at_end(global_end_state_t *ges, ir_node *bl, ir } } + DBG((dbg, DBG_GLOBAL, "\t%2D%s\n", level, vs->act == irn_act_reload ? "reloading" : "bringing in")); } @@ -914,7 +1070,6 @@ static double can_bring_in(global_end_state_t *ges, ir_node *bl, ir_node *irn, d ir_node **nodes = alloca(get_irn_arity(bl) * sizeof(nodes[0])); rollback_info_t rb = trans_begin(ges); - glob_costs = 0.0; for (i = 0; i < n; ++i) { ir_node *pr = get_Block_cfgpred_block(bl, i); @@ -959,9 +1114,18 @@ static void materialize_and_commit_end_state(global_end_state_t *ges) for (ia = ges->ia_top; ia != NULL; ia = ia->next) { switch (ia->act) { case irn_act_live_through: - if (is_local_phi(ia->bl, ia->irn)) { - bitset_add_irn(ges->succ_phis, ia->irn); - DBG((dbg, DBG_GLOBAL, "\t\tlive through phi kept alive: %+F\n", ia->irn)); + { + block_info_t *bi = get_block_info(ia->bl); + bring_in_t *iter; + + if (is_local_phi(ia->bl, ia->irn)) { + bitset_add_irn(ges->succ_phis, ia->irn); + DBG((dbg, DBG_GLOBAL, "\t\tlive through phi kept alive: %+F\n", ia->irn)); + } + + list_for_each_entry_reverse(bring_in_t, iter, &bi->br_head, list) + ++iter->sect_pressure; + ++bi->front_pressure; } break; case irn_act_reload: @@ -994,77 +1158,259 @@ static void materialize_and_commit_end_state(global_end_state_t *ges) bitset_clear_all(ges->committed); } +static ir_node *better_spilled_here(const bring_in_t *br) +{ + const block_info_t *bi = br->bi; + double spill_ef = get_block_info(get_nodes_block(br->irn))->exec_freq; + + /* + * If the bring in node is a phi in the bring in block, + * we look at all definitions and sum up their execution frequencies, + * since spills will be placed there. + * (except for the case where an operand is also a phi which is spilled :-( ) + * If that cost is higher than spilling the phi in that block, we opt for + * bringing the phi into the block and spill it there. + */ + if (is_local_phi(bi->bl, br->irn)) { + ir_node *bl = bi->bl; + int i; + + spill_ef = 0.0; + for (i = get_Block_n_cfgpreds(bl) - 1; i >= 0; --i) + spill_ef += get_block_info(get_Block_cfgpred_block(bl, i))->exec_freq; + } + + return bi->exec_freq < spill_ef ? sched_prev(bi->first_non_in) : NULL; +} + +static int get_max_pressure_so_far(const block_info_t *bi, const bring_in_t *br) +{ + const struct list_head *l; + int res = INT_MIN; + + assert(br->bi == bi); + for (l = &br->list; l != &bi->br_head; l = l->prev) { + br = list_entry(l, bring_in_t, list); + res = MAX(res, br->sect_pressure); + } + + /* finally consider the front pressure distance and add the reference line (the global block pressure) */ + return MAX(res, bi->front_pressure); +} + +#define block_last_bring_in(bi) list_entry((bi)->br_head.prev, bring_in_t, list) + +#if 0 +static int get_block_max_pressure(const block_info_t *bi) +{ + int max = get_max_pressure_so_far(bi, block_last_bring_in(bi)); + return MAX(bi->pressure, max); +} +#endif + /** - * Examine all irns which shall be in regs at the beginning of the - * block. + * Try to bring a variable into a block. + * @param ges The state of all end sets. + * @param block The block. + * @param irn The variable. */ -static void fix_block_borders(global_end_state_t *ges, ir_node *block) { - block_info_t *bi = get_block_info(block); - belady_env_t *env = ges->env; - void *reset_level = obstack_base(&ges->obst); +static void optimize_variable(global_end_state_t *ges, bring_in_t *br) +{ + block_info_t *bi = br->bi; + ir_node *irn = br->irn; + ir_node *bl = bi->bl; + belady_env_t *env = ges->env; + void *reset_level = obstack_base(&ges->obst); + int k = env->n_regs; + int pressure_upto_use = get_max_pressure_so_far(bi, br); + int front_pressure = bi->front_pressure; + ir_node *better_spill_loc = NULL; - ir_node *irn; - int i; + assert(front_pressure <= k); + assert(pressure_upto_use <= k); + + DBG((dbg, DBG_GLOBAL, "fixing %+F at %+F (%f), front pr: %d, pr to use: %d, first use: %x\n", + irn, bl, bi->exec_freq, front_pressure, pressure_upto_use, br->first_use)); - for (i = workset_get_length(bi->ws_end) - 1; i >= 0; --i) - workset_set_version(bi->ws_end, i, ver_youngest); + // assert(!is_local_phi(bl, irn) || !bitset_contains_irn(ges->succ_phis, irn)); + + /* + * if we cannot bring the value to the use, let's see ifit would be worthwhile + * to bring the value to the beginning of the block to have a better spill + * location. + * + * better _spilled_here will return a node where the value can be spilled after + * or NULL if this block does not provide a better spill location. + */ +#if 1 + if (pressure_upto_use >= k && front_pressure < k && !bitset_contains_irn(env->spilled, irn)) + better_spill_loc = better_spilled_here(br); +#endif - DBG((dbg, DBG_GLOBAL, "fixing block borders at %+F (%f)\n", block, bi->exec_freq)); + /* + * If either we can bring the value to the use or we should try + * to bring it here to do the spill here, let's try to bring it in. + */ + if (better_spill_loc || pressure_upto_use < k) { + block_state_t *bs; + double bring_in_costs, local_costs; + rollback_info_t trans; + int pressure_inc; - /* process all variables which shall be in a reg at - * the beginning of the block in the order of the next use. */ - workset_foreach(bi->entrance_reg, irn, i) { - double local_costs = be_get_reload_costs(env->senv, irn, bi->first_non_in); - double bring_in_costs; + /* process all variables which shall be in a reg at + * the beginning of the block in the order of the next use. */ + local_costs = be_get_reload_costs(env->senv, irn, br->first_use); /* reset the lists */ ges->bs_top = NULL; ges->ia_top = NULL; - DBG((dbg, DBG_GLOBAL, "\ttrans in var %+F, version %x\n", irn, ges->version)); + /* if the variable will live into this block, we must adapt the pressure. + * The new pressure is the MAX of: + * 1) the total block pressure + * 2) the pressure so far + the front pressure increase + 1 + * + * If the second is larger than the first, + * we have to increment the total block pressure and hence + * save the old pressure to restire it in case of failing to + * bring the variable into the block in a register. + */ + trans = trans_begin(ges); + bs = new_block_state(ges, bi); + pressure_inc = MAX(bs->pressure, better_spill_loc ? front_pressure : pressure_upto_use + 1); + bs->pressure = pressure_inc; - bring_in_costs = can_bring_in(ges, block, irn, local_costs, 1); - DBG((dbg, DBG_GLOBAL, "\tbring in: %f, local: %f", bring_in_costs, local_costs)); + assert(bi->pressure <= k); + DBG((dbg, DBG_GLOBAL, "\ttrans in var %+F, version %x\n", irn, ges->version)); + bring_in_costs = can_bring_in(ges, bl, irn, local_costs, 1); + DBG((dbg, DBG_GLOBAL, "\tbring in: %f, local: %f\n", bring_in_costs, local_costs)); /* - * we were not able to let the value arrive - * in a register at the entrance of the block - * or it is too costly, so we have to do the reload locally + * Following cases can now occur: + * 1) There is room and costs ok + * 2) Cannot bring to use but can spill at begin and costs are ok + * 3) neither of both worked. + * + * following actions can be taken: + * a) commit changes + * b) mark phi as succeded if node was phi + * c) insert reload at use location + * d) give a spill location hint + * + * this is the case/action matrix + * | abcd + * --+---------- + * 1 | XX + * 2 | XXXX + * 3 | X */ - if (bring_in_costs >= local_costs) { - DBG((dbg, DBG_GLOBAL, " -> do local reload\n")); - be_add_reload(env->senv, irn, bi->first_non_in, env->cls, 1); - } else { + + /* the costs were acceptable... */ + if (bring_in_costs < local_costs) { + int check = 0; + bring_in_t *iter; + /* + * case 1 and first part of case 2: + * commit all the changes done. this manifests the bring-in action. * if the transport-in was a phi (that is actually used in block) - * it will no longer remain and we have to spill it completely. + * mark it in the succ_phis set to *not* phi spill it. */ - if (is_local_phi(block, irn)) + materialize_and_commit_end_state(ges); + if (is_local_phi(bl, irn)) bitset_add_irn(ges->succ_phis, irn); - DBG((dbg, DBG_GLOBAL, " -> bring it in\n")); - materialize_and_commit_end_state(ges); + DBG((dbg, DBG_GLOBAL, "\t-> bring it in.", pressure_inc)); + + /* second half of case 2 */ + if (pressure_upto_use >= k) { + DBG((dbg, DBG_GLOBAL, "\t-> use blocked. local reload: %+F, try spill at: %+F\n", + br->first_use, better_spill_loc)); + be_add_reload(env->senv, irn, br->first_use, env->cls, 1); + be_add_spill(env->senv, irn, sched_next(better_spill_loc)); + ir_nodeset_insert(env->extra_spilled, irn); + } + + /* + * go from the last bring in use to the first and add all the variabled + * which additionally live through the block to their pressure. + * at the point were the actually treated use is, we have to increase + * the pressure by one more as the nrought in value starts to count. + * Finally, adjust the front pressure as well. + */ + pressure_inc = 0; + list_for_each_entry_reverse(bring_in_t, iter, &bi->br_head, list) { + if (iter == br) + pressure_inc += pressure_upto_use < k; + iter->sect_pressure += pressure_inc; + check = MAX(check, iter->sect_pressure); + DBG((dbg, DBG_GLOBAL, "\tinc section pressure of %+F by %d to %d\n", iter->first_use, pressure_inc, iter->sect_pressure)); + } + bi->front_pressure += pressure_inc; + assert(MAX(check, bi->front_pressure) <= bi->pressure); + DBG((dbg, DBG_GLOBAL, "\t-> result: p: %d, fp: %d\n", bi->pressure, bi->front_pressure)); } - DBG((dbg, DBG_GLOBAL, "\n")); + /* case 3: nothing worked. insert normal reload and rollback. */ + else { + DBG((dbg, DBG_GLOBAL, "\t-> bring in was too expensive. local reload: %+F\n", br->first_use)); + be_add_reload(env->senv, irn, br->first_use, env->cls, 1); + bitset_add_irn(env->spilled, irn); + trans_rollback(ges, &trans); + } + } - /* reset the obstack and create a new version. */ - obstack_free(&ges->obst, reset_level); - ges->version = ver_make_newer(ges->version); + /* there was no opportunity for optimization at all. reload and be sad ... */ + else { + DBG((dbg, DBG_GLOBAL, "\t-> can\'t do anything but reload before %+F\n", br->first_use)); + be_add_reload(env->senv, irn, br->first_use, env->cls, 1); + bitset_add_irn(env->spilled, irn); } + + DBG((dbg, DBG_GLOBAL, "\n")); + + /* reset the obstack and create a new version. */ + obstack_free(&ges->obst, reset_level); + ges->version = ver_make_newer(ges->version); } +static bring_in_t **determine_global_order(belady_env_t *env) +{ + bring_in_t **res; + bring_in_t *elm; + int i, n = 0; + + for (i = env->n_blocks - 1; i >= 0; --i) { + block_info_t *bi = get_block_info(env->blocks[i]); + list_for_each_entry(bring_in_t, elm, &bi->br_head, list) { + obstack_ptr_grow(&env->ob, elm); + n += 1; + } + } + + obstack_ptr_grow(&env->ob, NULL); + res = obstack_finish(&env->ob); + qsort(res, n, sizeof(res[0]), bring_in_cmp); + return res; +} + + + + static void global_assign(belady_env_t *env) { + ir_nodeset_iterator_t iter; global_end_state_t ges; - int i; + bring_in_t **br; + ir_node *irn; + int i, j; /* * sort the blocks according to execution frequency. * That's not necessary for belady() but for the global pass later on. */ - qsort(env->blocks, env->n_blocks, sizeof(env->blocks[0]), block_freq_gt); + qsort(env->blocks, env->n_blocks, sizeof(env->blocks[0]), block_freq_dfs_gt); memset(&ges, 0, sizeof(ges)); obstack_init(&ges.obst); @@ -1075,11 +1421,19 @@ static void global_assign(belady_env_t *env) ges.bs_tops = obstack_alloc(&ges.obst, sizeof(ges.bs_tops[0]) * env->n_blocks); ges.bs_tops_vers = obstack_alloc(&ges.obst, sizeof(ges.bs_tops_vers[0]) * env->n_blocks); - for (i = 0; i < env->n_blocks; ++i) + /* invalidate all state stack pointer versions */ + for (i = 0; i < env->n_blocks; ++i) { + block_info_t *bi = get_block_info(env->blocks[i]); ges.bs_tops_vers[i] = ver_oldest; - for (i = 0; i < env->n_blocks; ++i) - fix_block_borders(&ges, env->blocks[i]); + /* Set all block end sets entries to the youngest version */ + for (j = workset_get_length(bi->ws_end) - 1; j >= 0; --j) + workset_set_version(bi->ws_end, j, ver_youngest); + } + + /* determine ordeer and optimize them */ + for (br = determine_global_order(env); *br; ++br) + optimize_variable(&ges, *br); /* * Now we spill phis which cannot be kept since they were replaced @@ -1098,6 +1452,10 @@ static void global_assign(belady_env_t *env) be_spill_phi(env->senv, irn); } } + + /* check dominance for specially spilled nodes. */ + foreach_ir_nodeset (env->extra_spilled, irn, iter) + make_spill_locations_dominate_irn(env->senv, irn); } static void collect_blocks(ir_node *bl, void *data) @@ -1130,41 +1488,65 @@ void be_spill_belady(be_irg_t *birg, const arch_register_class_t *cls) /* init belady env */ obstack_init(&env.ob); - env.irg = irg; - env.arch = birg->main_env->arch_env; - env.cls = cls; - env.lv = be_get_birg_liveness(birg); - env.n_regs = n_regs; - env.ws = new_workset(&env, &env.ob); - env.senv = be_new_spill_env(birg); - env.ef = be_get_birg_exec_freq(birg); - env.n_blocks = 0; + env.irg = irg; + env.arch = be_get_birg_arch_env(birg); + env.cls = cls; + env.lv = be_get_birg_liveness(birg); + env.dfs = env.lv->dfs; + env.n_regs = n_regs; + env.ws = new_workset(&env, &env.ob); + env.senv = be_new_spill_env(birg); + env.ef = be_get_birg_exec_freq(birg); + env.spilled = bitset_irg_obstack_alloc(&env.ob, irg); + env.extra_spilled = ir_nodeset_new(64); + env.n_blocks = 0; irg_block_walk_graph(irg, NULL, collect_blocks, &env); obstack_ptr_grow(&env.ob, NULL); env.blocks = obstack_finish(&env.ob); + /* renumbering in the blocks gives nicer debug output as number are smaller. */ +#ifdef DEBUG_libfirm + for (i = 0; i < env.n_blocks; ++i) + sched_renumber(env.blocks[i]); +#endif + /* Fix high register pressure in blocks with belady algorithm */ for (i = 0; i < env.n_blocks; ++i) belady(&env, i); global_assign(&env); + /* check dominance for specially spilled nodes. */ + { + ir_nodeset_iterator_t iter; + ir_node *irn; + + foreach_ir_nodeset (env.extra_spilled, irn, iter) + make_spill_locations_dominate_irn(env.senv, irn); + } + /* Insert spill/reload nodes into the graph and fix usages */ be_insert_spills_reloads(env.senv); /* clean up */ be_delete_spill_env(env.senv); + ir_nodeset_del(env.extra_spilled); obstack_free(&env.ob, NULL); } void be_init_spillbelady2(void) { + lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be"); + lc_opt_entry_t *spill_grp = lc_opt_get_grp(be_grp, "spill"); + lc_opt_entry_t *bel2_grp = lc_opt_get_grp(spill_grp, "belady2"); + static be_spiller_t belady_spiller = { be_spill_belady }; + lc_opt_add_table(bel2_grp, options); be_register_spiller("belady2", &belady_spiller); FIRM_DBG_REGISTER(dbg, "firm.be.spill.belady2"); }