X-Git-Url: http://nsz.repo.hu/git/?a=blobdiff_plain;f=ir%2Fbe%2Fbespillbelady2.c;h=4b2386fa9acce7599b7e96a61735c19bec339e59;hb=568a27341dd03831ca76a7fa3b83f7fdcfbb0859;hp=58fd6e4265c2e2b2963ba7c9915731fd180d2609;hpb=bc000a2f2ad1e1e7ca6b0eb832a8f4a8173fd301;p=libfirm diff --git a/ir/be/bespillbelady2.c b/ir/be/bespillbelady2.c index 58fd6e426..4b2386fa9 100644 --- a/ir/be/bespillbelady2.c +++ b/ir/be/bespillbelady2.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 1995-2007 University of Karlsruhe. All right reserved. + * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved. * * This file is part of libFirm. * @@ -32,9 +32,7 @@ * capacity of the blocks to let global variables live through * them. */ -#ifdef HAVE_CONFIG_H #include "config.h" -#endif #include #include @@ -54,24 +52,22 @@ #include "irprintf.h" #include "execfreq.h" #include "dfs_t.h" -#include "xmalloc.h" #include "beutil.h" -#include "bearch_t.h" -#include "bespillbelady.h" -#include "besched_t.h" +#include "bearch.h" +#include "besched.h" #include "beirgmod.h" #include "belive_t.h" -#include "benode_t.h" +#include "benode.h" #include "bechordal_t.h" -#include "bespilloptions.h" +#include "bespill.h" #include "beloopana.h" -#include "beirg_t.h" +#include "beirg.h" #include "bemodule.h" +#include "bespillutil.h" -#include -#include -#include +#include "lc_opts.h" +#include "lc_opts_enum.h" #define DBG_SPILL 1 #define DBG_WSETS 2 @@ -128,15 +124,16 @@ typedef struct _belady_env_t { be_lv_t *lv; ir_exec_freq *ef; - ir_node **blocks; /**< Array of all blocks. */ - int n_blocks; /**< Number of blocks in the graph. */ - int n_regs; /**< number of regs in this reg-class */ - workset_t *ws; /**< the main workset used while processing a block. ob-allocated */ - ir_node *instr; /**< current instruction */ - int instr_nr; /**< current instruction number (relative to block start) */ + ir_node **blocks; /**< Array of all blocks. */ + int n_blocks; /**< Number of blocks in the graph. */ + int n_regs; /**< number of regs in this reg-class */ + workset_t *ws; /**< the main workset used while processing a block. ob-allocated */ + ir_node *instr; /**< current instruction */ + int instr_nr; /**< current instruction number (relative to block start) */ - spill_env_t *senv; /**< see bespill.h */ - bitset_t *spilled; /**< bitset to keep all the irns which have already been spilled. */ + spill_env_t *senv; /**< see bespill.h */ + bitset_t *spilled; /**< bitset to keep all the irns which have already been spilled. */ + ir_nodeset_t *extra_spilled; /** All nodes for which a special spill location has been computed. */ } belady_env_t; @@ -147,11 +144,11 @@ static int loc_compare(const void *a, const void *b) return (p->time > q->time) - (p->time < q->time); } -static INLINE void workset_print(const workset_t *w) +static inline void workset_print(const workset_t *w) { int i; - for(i = 0; i < w->len; ++i) { + for (i = 0; i < w->len; ++i) { ir_fprintf(stderr, "%+F %d\n", w->vals[i].irn, w->vals[i].time); } } @@ -159,22 +156,18 @@ static INLINE void workset_print(const workset_t *w) /** * Alloc a new workset on obstack @p ob with maximum size @p max */ -static INLINE workset_t *new_workset(belady_env_t *env, struct obstack *ob) { - workset_t *res; - size_t size = sizeof(*res) + (env->n_regs)*sizeof(res->vals[0]); - res = obstack_alloc(ob, size); - memset(res, 0, size); - return res; +static inline workset_t *new_workset(belady_env_t *env, struct obstack *ob) +{ + return OALLOCFZ(ob, workset_t, vals, env->n_regs); } /** * Alloc a new instance on obstack and make it equal to @param ws */ -static INLINE workset_t *workset_clone(belady_env_t *env, struct obstack *ob, workset_t *ws) { - workset_t *res; - size_t size = sizeof(*res) + (env->n_regs)*sizeof(res->vals[0]); - res = obstack_alloc(ob, size); - memcpy(res, ws, size); +static inline workset_t *workset_clone(belady_env_t *env, struct obstack *ob, workset_t *ws) +{ + workset_t *res = OALLOCF(ob, workset_t, vals, env->n_regs); + memcpy(res, ws, sizeof(*res) + (env->n_regs)*sizeof(res->vals[0])); return res; } @@ -182,7 +175,8 @@ static INLINE workset_t *workset_clone(belady_env_t *env, struct obstack *ob, wo * Do NOT alloc anything. Make @param tgt equal to @param src. * returns @param tgt for convenience */ -static INLINE workset_t *workset_copy(belady_env_t *env, workset_t *tgt, workset_t *src) { +static inline workset_t *workset_copy(belady_env_t *env, workset_t *tgt, workset_t *src) +{ size_t size = sizeof(*src) + (env->n_regs)*sizeof(src->vals[0]); memcpy(tgt, src, size); return tgt; @@ -193,7 +187,8 @@ static INLINE workset_t *workset_copy(belady_env_t *env, workset_t *tgt, workset * @param count locations given at memory @param locs. * Set the length of @param ws to count. */ -static INLINE void workset_bulk_fill(workset_t *workset, int count, const loc_t *locs) { +static inline void workset_bulk_fill(workset_t *workset, int count, const loc_t *locs) +{ workset->len = count; memcpy(&(workset->vals[0]), locs, count * sizeof(locs[0])); } @@ -202,16 +197,17 @@ static INLINE void workset_bulk_fill(workset_t *workset, int count, const loc_t * Inserts the value @p val into the workset, iff it is not * already contained. The workset must not be full. */ -static INLINE void workset_insert(belady_env_t *env, workset_t *ws, ir_node *val) { +static inline void workset_insert(belady_env_t *env, workset_t *ws, ir_node *val) +{ int i; /* check for current regclass */ - if (!arch_irn_consider_in_reg_alloc(env->arch, env->cls, val)) { + if (!arch_irn_consider_in_reg_alloc(env->cls, val)) { // DBG((dbg, DBG_WORKSET, "Skipped %+F\n", val)); return; } /* check if val is already contained */ - for(i=0; ilen; ++i) + for (i=0; ilen; ++i) if (ws->vals[i].irn == val) return; @@ -223,16 +219,18 @@ static INLINE void workset_insert(belady_env_t *env, workset_t *ws, ir_node *val /** * Removes all entries from this workset */ -static INLINE void workset_clear(workset_t *ws) { +static inline void workset_clear(workset_t *ws) +{ ws->len = 0; } /** * Removes the value @p val from the workset if present. */ -static INLINE void workset_remove(workset_t *ws, ir_node *val) { +static inline void workset_remove(workset_t *ws, ir_node *val) +{ int i; - for(i=0; ilen; ++i) { + for (i=0; ilen; ++i) { if (ws->vals[i].irn == val) { ws->vals[i] = ws->vals[--ws->len]; return; @@ -240,9 +238,10 @@ static INLINE void workset_remove(workset_t *ws, ir_node *val) { } } -static INLINE int workset_get_index(const workset_t *ws, const ir_node *val) { +static inline int workset_get_index(const workset_t *ws, const ir_node *val) +{ int i; - for(i=0; ilen; ++i) { + for (i=0; ilen; ++i) { if (ws->vals[i].irn == val) return i; } @@ -256,7 +255,7 @@ static INLINE int workset_get_index(const workset_t *ws, const ir_node *val) { * @p v A variable to put the current value in * @p i An integer for internal use */ -#define workset_foreach(ws, v, i) for(i=0; \ +#define workset_foreach(ws, v, i) for (i=0; \ v=(i < ws->len) ? ws->vals[i].irn : NULL, i < ws->len; \ ++i) @@ -292,21 +291,22 @@ typedef struct _block_info_t { real (non-phi) node. At the beginning of the global pass, this is 0. */ struct list_head br_head; /**< List head for all bring_in variables. */ + int free_at_jump; /**< registers free at jump. */ } block_info_t; -static INLINE void *new_block_info(belady_env_t *bel, int id) +static inline void *new_block_info(belady_env_t *bel, int id) { ir_node *bl = bel->blocks[id]; - block_info_t *res = obstack_alloc(&bel->ob, sizeof(*res)); - memset(res, 0, sizeof(res[0])); + block_info_t *res = OALLOCZ(&bel->ob, block_info_t); res->first_non_in = NULL; - res->last_ins = NULL; - res->bel = bel; - res->bl = bl; - res->id = id; + res->last_ins = NULL; + res->bel = bel; + res->bl = bl; + res->id = id; res->exec_freq = get_block_execfreq(bel->ef, bl); - res->reload_cost = bel->arch->isa->reload_cost * res->exec_freq; + res->reload_cost = bel->arch->reload_cost * res->exec_freq; + res->free_at_jump = bel->n_regs; INIT_LIST_HEAD(&res->br_head); set_irn_link(bl, res); return res; @@ -315,7 +315,7 @@ static INLINE void *new_block_info(belady_env_t *bel, int id) #define get_block_info(block) ((block_info_t *)get_irn_link(block)) #define set_block_info(block, info) set_irn_link(block, info) -static INLINE ir_node *block_info_get_last_ins(block_info_t *bi) +static inline ir_node *block_info_get_last_ins(block_info_t *bi) { if (!bi->last_ins) bi->last_ins = be_get_end_of_block_insertion_point(bi->bl); @@ -334,7 +334,7 @@ typedef struct _next_use_t { or NULL. */ } next_use_t; -static void *next_use_init(ir_phase *phase, ir_node *irn, void *old) +static void *next_use_init(ir_phase *phase, const ir_node *irn, void *old) { (void) phase; (void) irn; @@ -348,7 +348,7 @@ static void build_next_uses(block_info_t *bi) sched_renumber(bi->bl); - phase_init(&bi->next_uses, "next uses", bi->bel->irg, PHASE_DEFAULT_GROWTH, next_use_init, NULL); + phase_init(&bi->next_uses, bi->bel->irg, next_use_init); sched_foreach_reverse(bi->bl, irn) { int i; @@ -377,7 +377,7 @@ static void build_next_uses(block_info_t *bi) #define get_current_use(bi, irn) phase_get_irn_data(&(bi)->next_uses, (irn)) -static INLINE void advance_current_use(block_info_t *bi, const ir_node *irn) +static inline void advance_current_use(block_info_t *bi, const ir_node *irn) { next_use_t *use = get_current_use(bi, irn); @@ -432,17 +432,18 @@ struct _bring_in_t { block_info_t *bi; /**< The block to which bring in should happen. */ int pressure_so_far; /**< The maximal pressure till the first use of irn in bl. */ ir_node *first_use; /**< The first user of irn in bl. */ - sched_timestep_t use_step; /**< Schedule sttep of the first use. */ + sched_timestep_t use_step; /**< Schedule step of the first use. */ int is_remat : 1; /**< Is rematerializable. */ int sect_pressure; /**< Offset to maximum pressure in block. */ struct list_head list; + struct list_head sect_list; + bring_in_t *sect_head; }; -static INLINE bring_in_t *new_bring_in(block_info_t *bi, ir_node *irn, const next_use_t *use) +static inline bring_in_t *new_bring_in(block_info_t *bi, ir_node *irn, const next_use_t *use) { - bring_in_t *br = obstack_alloc(&bi->bel->ob, sizeof(br[0])); - + bring_in_t *br = OALLOC(&bi->bel->ob, bring_in_t); br->irn = irn; br->bi = bi; br->first_use = use->irn; @@ -450,8 +451,10 @@ static INLINE bring_in_t *new_bring_in(block_info_t *bi, ir_node *irn, const nex br->is_remat = be_is_rematerializable(bi->bel->senv, irn, use->irn); br->pressure_so_far = bi->pressure; br->sect_pressure = bi->front_pressure; + br->sect_head = br; INIT_LIST_HEAD(&br->list); + INIT_LIST_HEAD(&br->sect_list); list_add_tail(&br->list, &bi->br_head); return br; } @@ -493,17 +496,17 @@ static int bring_in_cmp(const void *a, const void *b) return (fq > fp) - (fq < fp); } -static INLINE unsigned get_curr_distance(block_info_t *bi, const ir_node *irn, int is_usage) +static inline unsigned get_curr_distance(block_info_t *bi, const ir_node *irn, int is_usage) { belady_env_t *env = bi->bel; sched_timestep_t curr_step = sched_get_time_step(env->instr); next_use_t *use = get_current_use(bi, irn); - int flags = arch_irn_get_flags(env->arch, irn); + int flags = arch_irn_get_flags(irn); - assert(!(flags & arch_irn_flags_ignore)); + assert(!arch_irn_is_ignore(irn)); - /* We have to keep nonspillable nodes in the workingset */ - if(flags & arch_irn_flags_dont_spill) + /* We have to keep non-spillable nodes in the working set */ + if (flags & arch_irn_flags_dont_spill) return 0; if (!is_usage && use && use->step == curr_step) @@ -527,7 +530,7 @@ static INLINE unsigned get_curr_distance(block_info_t *bi, const ir_node *irn, i return be_is_live_end(env->lv, bi->bl, irn) ? LIVE_END : DEAD; } -static INLINE int is_local_phi(const ir_node *bl, const ir_node *irn) +static inline int is_local_phi(const ir_node *bl, const ir_node *irn) { return is_Phi(irn) && get_nodes_block(irn) == bl; } @@ -540,10 +543,10 @@ static INLINE int is_local_phi(const ir_node *bl, const ir_node *irn) * @param irn The node in question. * @return 1, if node is something transported into @p bl, 0 if not. * @note The function will only give correct answers in the case - * where @p irn is unsed in the block @p bl which is always + * where @p irn is unused in the block @p bl which is always * the case in our usage scenario. */ -static INLINE int is_transport_in(const ir_node *bl, const ir_node *irn) +static inline int is_transport_in(const ir_node *bl, const ir_node *irn) { return get_nodes_block(irn) != bl || is_Phi(irn); } @@ -557,10 +560,11 @@ static INLINE int is_transport_in(const ir_node *bl, const ir_node *irn) * @p is_usage indicates that the values in new_vals are used (not defined) * In this case reloads must be performed */ -static void displace(block_info_t *bi, workset_t *new_vals, int is_usage) { +static void displace(block_info_t *bi, workset_t *new_vals, int is_usage) +{ belady_env_t *env = bi->bel; workset_t *ws = env->ws; - ir_node **to_insert = alloca(env->n_regs * sizeof(to_insert[0])); + ir_node **to_insert = ALLOCAN(ir_node*, env->n_regs); int i, len, max_allowed, demand, iter; ir_node *val; @@ -604,7 +608,7 @@ static void displace(block_info_t *bi, workset_t *new_vals, int is_usage) { } } } else { - assert(is_usage || "Defined value already in workset?!?"); + assert(is_usage && "Defined value already in workset?!?"); DBG((dbg, DBG_DECIDE, "\t\tskip %+F\n", val)); } } @@ -653,7 +657,8 @@ static void displace(block_info_t *bi, workset_t *new_vals, int is_usage) { * whether it is used from a register or is reloaded * before the use. */ -static void belady(belady_env_t *env, int id) { +static void belady(belady_env_t *env, int id) +{ block_info_t *block_info = new_block_info(env, id); const ir_node *block = block_info->bl; @@ -676,7 +681,7 @@ static void belady(belady_env_t *env, int id) { int i, arity; assert(workset_get_length(env->ws) <= env->n_regs && "Too much values in workset!"); - /* projs are handled with the tuple value. + /* Projs are handled with the tuple value. * Phis are no real instr (see insert_starters()) * instr_nr does not increase */ if (is_Proj(irn) || is_Phi(irn)) @@ -691,7 +696,7 @@ static void belady(belady_env_t *env, int id) { /* allocate all values _used_ by this instruction */ workset_clear(new_vals); - for(i = 0, arity = get_irn_arity(irn); i < arity; ++i) { + for (i = 0, arity = get_irn_arity(irn); i < arity; ++i) { workset_insert(env, new_vals, get_irn_n(irn, i)); } DBG((dbg, DBG_DECIDE, "\t* uses\n")); @@ -703,7 +708,7 @@ static void belady(belady_env_t *env, int id) { * augmenting the pressure. Note, that a variable is dead * if it has no further use in this block and is *not* live end */ - for(i = 0, arity = get_irn_arity(irn); i < arity; ++i) { + for (i = 0, arity = get_irn_arity(irn); i < arity; ++i) { ir_node *op = get_irn_n(irn, i); next_use_t *use = get_current_use(block_info, op); @@ -716,7 +721,7 @@ static void belady(belady_env_t *env, int id) { /* allocate all values _defined_ by this instruction */ workset_clear(new_vals); - if (get_irn_mode(irn) == mode_T) { /* special handling for tuples and projs */ + if (get_irn_mode(irn) == mode_T) { /* special handling for Tuples and Projs */ const ir_edge_t *edge; foreach_out_edge(irn, edge) { @@ -729,10 +734,17 @@ static void belady(belady_env_t *env, int id) { DBG((dbg, DBG_DECIDE, "\t* defs\n")); displace(block_info, new_vals, 0); + if (is_op_forking(get_irn_op(env->instr))) { + for (i = get_irn_arity(env->instr) - 1; i >= 0; --i) { + ir_node *op = get_irn_n(env->instr, i); + block_info->free_at_jump -= arch_irn_consider_in_reg_alloc(env->cls, op); + } + } + env->instr_nr++; } - phase_free(&block_info->next_uses); + phase_deinit(&block_info->next_uses); /* Remember end-workset for this block */ block_info->ws_end = workset_clone(env, &env->ob, env->ws); @@ -805,14 +817,14 @@ typedef struct { irn_action_t *ia_top; } rollback_info_t; -static INLINE block_state_t *get_block_state(global_end_state_t *ges, const block_info_t *bi) +static inline block_state_t *get_block_state(global_end_state_t *ges, const block_info_t *bi) { int id = bi->id; assert(!ver_is_younger(ges->bs_tops_vers[id], ges->version)); return ver_is_older(ges->bs_tops_vers[id], ges->version) ? NULL : ges->bs_tops[bi->id]; } -static INLINE const workset_t *get_end_state(global_end_state_t *ges, block_info_t *bi) +static inline const workset_t *get_end_state(global_end_state_t *ges, block_info_t *bi) { block_state_t *bs = get_block_state(ges, bi); return bs ? bs->end_state : bi->ws_end; @@ -821,7 +833,7 @@ static INLINE const workset_t *get_end_state(global_end_state_t *ges, block_info static block_state_t *new_block_state(global_end_state_t *ges, block_info_t *bi) { block_state_t *bs = get_block_state(ges, bi); - block_state_t *nw = obstack_alloc(&ges->obst, sizeof(nw[0])); + block_state_t *nw = OALLOC(&ges->obst, block_state_t); nw->next_intern = bs; nw->next = ges->bs_top; @@ -844,7 +856,7 @@ static block_state_t *new_block_state(global_end_state_t *ges, block_info_t *bi) static irn_action_t *new_irn_action(global_end_state_t *ges, ir_node *irn, const ir_node *bl) { - irn_action_t *ia = obstack_alloc(&ges->obst, sizeof(ia[0])); + irn_action_t *ia = OALLOC(&ges->obst, irn_action_t); ia->irn = irn; ia->bl = bl; @@ -854,7 +866,7 @@ static irn_action_t *new_irn_action(global_end_state_t *ges, ir_node *irn, const return ia; } -static INLINE rollback_info_t trans_begin(global_end_state_t *ges) +static inline rollback_info_t trans_begin(global_end_state_t *ges) { rollback_info_t rb; rb.obst_level = obstack_base(&ges->obst); @@ -863,7 +875,7 @@ static INLINE rollback_info_t trans_begin(global_end_state_t *ges) return rb; } -static INLINE void trans_rollback(global_end_state_t *ges, rollback_info_t *rb) +static inline void trans_rollback(global_end_state_t *ges, rollback_info_t *rb) { block_state_t *bs; @@ -951,7 +963,7 @@ static double can_make_available_at_end(global_end_state_t *ges, ir_node *bl, ir */ { - int n_regs = bi->bel->n_regs; + int n_regs = bi->free_at_jump; int len = workset_get_length(end); int slot = -1; int i; @@ -983,7 +995,7 @@ static double can_make_available_at_end(global_end_state_t *ges, ir_node *bl, ir /* * finally there is some room. we can at least reload the value. - * but we will try to let ot live through anyhow. + * but we will try to let or live through anyhow. */ if (slot >= 0) { irn_action_t *vs = new_irn_action(ges, irn, bi->bl); @@ -1054,7 +1066,6 @@ static double can_bring_in(global_end_state_t *ges, ir_node *bl, ir_node *irn, d if (is_transport_in(bl, irn)) { int i, n = get_irn_arity(bl); - ir_node **nodes = alloca(get_irn_arity(bl) * sizeof(nodes[0])); rollback_info_t rb = trans_begin(ges); glob_costs = 0.0; @@ -1064,10 +1075,10 @@ static double can_bring_in(global_end_state_t *ges, ir_node *bl, ir_node *irn, d double c; /* - * there might by unknwons as operands of phis in that case + * there might by Unknowns as operands of Phis in that case * we set the costs to zero, since they won't get spilled. */ - if (arch_irn_consider_in_reg_alloc(env->arch, env->cls, op)) + if (arch_irn_consider_in_reg_alloc(env->cls, op)) c = can_make_available_at_end(ges, pr, op, limit - glob_costs, level + 1); else c = 0.0; @@ -1101,9 +1112,18 @@ static void materialize_and_commit_end_state(global_end_state_t *ges) for (ia = ges->ia_top; ia != NULL; ia = ia->next) { switch (ia->act) { case irn_act_live_through: - if (is_local_phi(ia->bl, ia->irn)) { - bitset_add_irn(ges->succ_phis, ia->irn); - DBG((dbg, DBG_GLOBAL, "\t\tlive through phi kept alive: %+F\n", ia->irn)); + { + block_info_t *bi = get_block_info(ia->bl); + bring_in_t *iter; + + if (is_local_phi(ia->bl, ia->irn)) { + bitset_add_irn(ges->succ_phis, ia->irn); + DBG((dbg, DBG_GLOBAL, "\t\tlive through phi kept alive: %+F\n", ia->irn)); + } + + list_for_each_entry_reverse(bring_in_t, iter, &bi->br_head, list) + ++iter->sect_pressure; + ++bi->front_pressure; } break; case irn_act_reload: @@ -1128,7 +1148,6 @@ static void materialize_and_commit_end_state(global_end_state_t *ges) DBG((dbg, DBG_GLOBAL, "\t\told pressure: %d, new pressure: %d, end length: %d\n", bi->pressure, bs->pressure, workset_get_length(bs->end_state))); bi->pressure = bs->pressure; - /* TODO: commit front pressure */ bitset_set(ges->committed, bi->id); } } @@ -1195,7 +1214,7 @@ static int get_block_max_pressure(const block_info_t *bi) */ static void optimize_variable(global_end_state_t *ges, bring_in_t *br) { - block_info_t *bi = br->bi; + block_info_t *bi = br->bi; ir_node *irn = br->irn; ir_node *bl = bi->bl; belady_env_t *env = ges->env; @@ -1208,19 +1227,23 @@ static void optimize_variable(global_end_state_t *ges, bring_in_t *br) assert(front_pressure <= k); assert(pressure_upto_use <= k); - DBG((dbg, DBG_GLOBAL, "fixing %+F at %+F (%f), front pr: %d, pr to use: %d, first use: %u\n", + DBG((dbg, DBG_GLOBAL, "fixing %+F at %+F (%f), front pr: %d, pr to use: %d, first use: %x\n", irn, bl, bi->exec_freq, front_pressure, pressure_upto_use, br->first_use)); + // assert(!is_local_phi(bl, irn) || !bitset_contains_irn(ges->succ_phis, irn)); + /* - * if we cannot bring the value to the use, let's see ifit would be worthwhile + * if we cannot bring the value to the use, let's see if it would be worthwhile * to bring the value to the beginning of the block to have a better spill * location. * * better _spilled_here will return a node where the value can be spilled after * or NULL if this block does not provide a better spill location. */ - if (pressure_upto_use >= k && front_pressure < k) +#if 1 + if (pressure_upto_use >= k && front_pressure < k && !bitset_contains_irn(env->spilled, irn)) better_spill_loc = better_spilled_here(br); +#endif /* * If either we can bring the value to the use or we should try @@ -1247,7 +1270,7 @@ static void optimize_variable(global_end_state_t *ges, bring_in_t *br) * * If the second is larger than the first, * we have to increment the total block pressure and hence - * save the old pressure to restire it in case of failing to + * save the old pressure to restore it in case of failing to * bring the variable into the block in a register. */ trans = trans_begin(ges); @@ -1269,7 +1292,7 @@ static void optimize_variable(global_end_state_t *ges, bring_in_t *br) * * following actions can be taken: * a) commit changes - * b) mark phi as succeded if node was phi + * b) mark phi as succeeded if node was phi * c) insert reload at use location * d) give a spill location hint * @@ -1283,6 +1306,7 @@ static void optimize_variable(global_end_state_t *ges, bring_in_t *br) /* the costs were acceptable... */ if (bring_in_costs < local_costs) { + int check = 0; bring_in_t *iter; /* @@ -1295,37 +1319,42 @@ static void optimize_variable(global_end_state_t *ges, bring_in_t *br) if (is_local_phi(bl, irn)) bitset_add_irn(ges->succ_phis, irn); - pressure_inc = bi->pressure - pressure_inc; - assert(pressure_inc >= 0); - - DBG((dbg, DBG_GLOBAL, "\t-> bring it in\n")); + DBG((dbg, DBG_GLOBAL, "\t-> bring it in.", pressure_inc)); /* second half of case 2 */ if (pressure_upto_use >= k) { DBG((dbg, DBG_GLOBAL, "\t-> use blocked. local reload: %+F, try spill at: %+F\n", br->first_use, better_spill_loc)); - be_add_reload2(env->senv, irn, br->first_use, better_spill_loc, env->cls, 1); + be_add_reload(env->senv, irn, br->first_use, env->cls, 1); + be_add_spill(env->senv, irn, better_spill_loc); + ir_nodeset_insert(env->extra_spilled, irn); } /* - * go from the last bring in use to the first and add all the variabled + * go from the last bring in use to the first and add all the variables * which additionally live through the block to their pressure. * at the point were the actually treated use is, we have to increase - * the pressure by one more as the nrought in value starts to count. + * the pressure by one more as the brought in value starts to count. * Finally, adjust the front pressure as well. */ + pressure_inc = 0; list_for_each_entry_reverse(bring_in_t, iter, &bi->br_head, list) { if (iter == br) pressure_inc += pressure_upto_use < k; iter->sect_pressure += pressure_inc; + check = MAX(check, iter->sect_pressure); + DBG((dbg, DBG_GLOBAL, "\tinc section pressure of %+F by %d to %d\n", iter->first_use, pressure_inc, iter->sect_pressure)); } bi->front_pressure += pressure_inc; + assert(MAX(check, bi->front_pressure) <= bi->pressure); + DBG((dbg, DBG_GLOBAL, "\t-> result: p: %d, fp: %d\n", bi->pressure, bi->front_pressure)); } /* case 3: nothing worked. insert normal reload and rollback. */ else { DBG((dbg, DBG_GLOBAL, "\t-> bring in was too expensive. local reload: %+F\n", br->first_use)); be_add_reload(env->senv, irn, br->first_use, env->cls, 1); + bitset_add_irn(env->spilled, irn); trans_rollback(ges, &trans); } } @@ -1334,6 +1363,7 @@ static void optimize_variable(global_end_state_t *ges, bring_in_t *br) else { DBG((dbg, DBG_GLOBAL, "\t-> can\'t do anything but reload before %+F\n", br->first_use)); be_add_reload(env->senv, irn, br->first_use, env->cls, 1); + bitset_add_irn(env->spilled, irn); } DBG((dbg, DBG_GLOBAL, "\n")); @@ -1363,10 +1393,15 @@ static bring_in_t **determine_global_order(belady_env_t *env) return res; } + + + static void global_assign(belady_env_t *env) { + ir_nodeset_iterator_t iter; global_end_state_t ges; bring_in_t **br; + ir_node *irn; int i, j; /* @@ -1381,8 +1416,8 @@ static void global_assign(belady_env_t *env) ges.version = ver_make_newer(ver_oldest); ges.succ_phis = bitset_irg_obstack_alloc(&ges.obst, env->irg); ges.committed = bitset_obstack_alloc(&ges.obst, env->n_blocks); - ges.bs_tops = obstack_alloc(&ges.obst, sizeof(ges.bs_tops[0]) * env->n_blocks); - ges.bs_tops_vers = obstack_alloc(&ges.obst, sizeof(ges.bs_tops_vers[0]) * env->n_blocks); + ges.bs_tops = OALLOCN(&ges.obst, block_state_t*, env->n_blocks); + ges.bs_tops_vers = OALLOCN(&ges.obst, unsigned, env->n_blocks); /* invalidate all state stack pointer versions */ for (i = 0; i < env->n_blocks; ++i) { @@ -1394,7 +1429,7 @@ static void global_assign(belady_env_t *env) workset_set_version(bi->ws_end, j, ver_youngest); } - /* determine ordeer and optimize them */ + /* determine order and optimize them */ for (br = determine_global_order(env); *br; ++br) optimize_variable(&ges, *br); @@ -1410,11 +1445,15 @@ static void global_assign(belady_env_t *env) if (!is_Phi(irn)) break; - if (arch_irn_consider_in_reg_alloc(env->arch, env->cls, irn) + if (arch_irn_consider_in_reg_alloc(env->cls, irn) && !bitset_contains_irn(ges.succ_phis, irn)) be_spill_phi(env->senv, irn); } } + + /* check dominance for specially spilled nodes. */ + foreach_ir_nodeset (env->extra_spilled, irn, iter) + make_spill_locations_dominate_irn(env->senv, irn); } static void collect_blocks(ir_node *bl, void *data) @@ -1432,7 +1471,7 @@ static void collect_blocks(ir_node *bl, void *data) * @param birg The backend graph * @param cls The register class to spill */ -void be_spill_belady(be_irg_t *birg, const arch_register_class_t *cls) +static void be_spill_belady(be_irg_t *birg, const arch_register_class_t *cls) { ir_graph *irg = be_get_birg_irg(birg); belady_env_t env; @@ -1440,7 +1479,7 @@ void be_spill_belady(be_irg_t *birg, const arch_register_class_t *cls) /* some special classes contain only ignore regs, nothing to do then */ n_regs = cls->n_regs - be_put_ignore_regs(birg, cls, NULL); - if(n_regs == 0) + if (n_regs == 0) return; be_clear_links(irg); @@ -1457,6 +1496,7 @@ void be_spill_belady(be_irg_t *birg, const arch_register_class_t *cls) env.senv = be_new_spill_env(birg); env.ef = be_get_birg_exec_freq(birg); env.spilled = bitset_irg_obstack_alloc(&env.ob, irg); + env.extra_spilled = ir_nodeset_new(64); env.n_blocks = 0; irg_block_walk_graph(irg, NULL, collect_blocks, &env); @@ -1475,15 +1515,26 @@ void be_spill_belady(be_irg_t *birg, const arch_register_class_t *cls) global_assign(&env); + /* check dominance for specially spilled nodes. */ + { + ir_nodeset_iterator_t iter; + ir_node *irn; + + foreach_ir_nodeset (env.extra_spilled, irn, iter) + make_spill_locations_dominate_irn(env.senv, irn); + } + /* Insert spill/reload nodes into the graph and fix usages */ be_insert_spills_reloads(env.senv); /* clean up */ be_delete_spill_env(env.senv); + ir_nodeset_del(env.extra_spilled); obstack_free(&env.ob, NULL); } +BE_REGISTER_MODULE_CONSTRUCTOR(be_init_spillbelady2); void be_init_spillbelady2(void) { lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be"); @@ -1498,5 +1549,3 @@ void be_init_spillbelady2(void) be_register_spiller("belady2", &belady_spiller); FIRM_DBG_REGISTER(dbg, "firm.be.spill.belady2"); } - -BE_REGISTER_MODULE_CONSTRUCTOR(be_init_spillbelady2);