From 839487dfb4a714fa7e66063495ade6a3726040ef Mon Sep 17 00:00:00 2001 From: Matthias Braun Date: Thu, 31 Aug 2006 14:39:41 +0000 Subject: [PATCH] - Refactored finish/after_ra phases a bit, stacknode fixup and stack bias walking occurs before finish phase now - changed ia32 to do some things transforms from finish phase in after_ra phase already but do peephole optimization in finish phase. - Added a peephole optimisation that transform IncSP,Store cascades to Pushs - Replaced several HASH_PTR with nodeset_hash to make compiler more deterministic. --- ir/be/beabi.c | 2 - ir/be/becopyheur.c | 7 +- ir/be/becopyilp2.c | 3 +- ir/be/becopyopt.c | 5 +- ir/be/becopyopt_t.h | 3 +- ir/be/belower.c | 5 +- ir/be/bemain.c | 11 +-- ir/be/benodesets.c | 2 +- ir/be/benodesets.h | 2 +- ir/be/bespill.c | 3 +- ir/be/bespillmorgan.c | 9 +- ir/be/bespillslots.c | 9 +- ir/be/bessadestr.c | 5 +- ir/be/beuses.c | 3 +- ir/be/ia32/bearch_ia32.c | 15 ++-- ir/be/ia32/ia32_finish.c | 16 ++-- ir/be/ia32/ia32_map_regs.c | 3 +- ir/be/ia32/ia32_optimize.c | 176 +++++++++++++++++++++++++++++++++++-- ir/be/ia32/ia32_optimize.h | 3 +- 19 files changed, 223 insertions(+), 59 deletions(-) diff --git a/ir/be/beabi.c b/ir/be/beabi.c index 34e3f2965..9ab4e696f 100644 --- a/ir/be/beabi.c +++ b/ir/be/beabi.c @@ -400,7 +400,6 @@ static ir_node *adjust_call(be_abi_irg_t *env, ir_node *irn, ir_node *curr_sp, i const arch_register_t *sp = arch_isa_sp(isa); ir_mode *mach_mode = sp->reg_class->mode; struct obstack *obst = &env->obst; - ir_node *no_mem = get_irg_no_mem(irg); int no_alloc = call->flags.bits.frame_is_setup_on_call; ir_node *res_proj = NULL; @@ -1504,7 +1503,6 @@ static void modify_irg(be_abi_irg_t *env) ir_graph *irg = env->birg->irg; ir_node *bl = get_irg_start_block(irg); ir_node *end = get_irg_end_block(irg); - ir_node *no_mem = get_irg_no_mem(irg); ir_node *mem = get_irg_initial_mem(irg); ir_type *method_type = get_entity_type(get_irg_entity(irg)); pset *dont_save = pset_new_ptr(8); diff --git a/ir/be/becopyheur.c b/ir/be/becopyheur.c index 13679c969..7f24bccfa 100644 --- a/ir/be/becopyheur.c +++ b/ir/be/becopyheur.c @@ -27,6 +27,7 @@ #include "xmalloc.h" #include "becopyopt_t.h" #include "becopystat.h" +#include "benodesets.h" #include "bitset.h" DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;) @@ -38,7 +39,7 @@ DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;) #define SLOTS_CHANGED_NODES 32 #define list_entry_queue(lh) list_entry(lh, qnode_t, queue) -#define HASH_CONFLICT(c) (HASH_PTR(c.n1) ^ HASH_PTR(c.n2)) +#define HASH_CONFLICT(c) (nodeset_hash(c.n1) ^ nodeset_hash(c.n2)) /** * Modeling additional conflicts between nodes. NOT live range interference @@ -126,7 +127,7 @@ static int set_cmp_node_stat_t(const void *x, const void *y, size_t size) { static INLINE node_stat_t *qnode_find_node(const qnode_t *qn, ir_node *irn) { node_stat_t find; find.irn = irn; - return set_find(qn->changed_nodes, &find, sizeof(find), HASH_PTR(irn)); + return set_find(qn->changed_nodes, &find, sizeof(find), nodeset_hash(irn)); } /** @@ -138,7 +139,7 @@ static INLINE node_stat_t *qnode_find_or_insert_node(const qnode_t *qn, ir_node find.irn = irn; find.new_color = NO_COLOR; find.pinned_local = 0; - return set_insert(qn->changed_nodes, &find, sizeof(find), HASH_PTR(irn)); + return set_insert(qn->changed_nodes, &find, sizeof(find), nodeset_hash(irn)); } /** diff --git a/ir/be/becopyilp2.c b/ir/be/becopyilp2.c index 1df4df35d..801e47057 100644 --- a/ir/be/becopyilp2.c +++ b/ir/be/becopyilp2.c @@ -40,6 +40,7 @@ #include "becopyilp_t.h" #include "beifg_t.h" #include "besched_t.h" +#include "benodesets.h" #define DEBUG_LVL 1 @@ -202,7 +203,7 @@ static int compare_edge_t(const void *k1, const void *k2, size_t size) { return ! (e1->n1 == e2->n1 && e1->n2 == e2->n2); } -#define HASH_EDGE(e) (HASH_PTR((e)->n1) ^ HASH_PTR((e)->n2)) +#define HASH_EDGE(e) (nodeset_hash((e)->n1) ^ nodeset_hash((e)->n2)) static INLINE edge_t *add_edge(set *edges, ir_node *n1, ir_node *n2, int *counter) { edge_t new_edge; diff --git a/ir/be/becopyopt.c b/ir/be/becopyopt.c index 316e31a03..b206eaa8e 100644 --- a/ir/be/becopyopt.c +++ b/ir/be/becopyopt.c @@ -38,6 +38,7 @@ #include "belive_t.h" #include "beinsn_t.h" #include "besched_t.h" +#include "benodesets.h" #define DUMP_BEFORE 1 #define DUMP_AFTER 2 @@ -656,7 +657,7 @@ static void add_edge(copy_opt_t *co, ir_node *n1, ir_node *n2, int costs) { new_node.irn = n1; new_node.degree = 0; new_node.neighbours = NULL; - node = set_insert(co->nodes, &new_node, sizeof(new_node), HASH_PTR(new_node.irn)); + node = set_insert(co->nodes, &new_node, sizeof(new_node), nodeset_hash(new_node.irn)); allocnew = 1; for (nbr = node->neighbours; nbr; nbr = nbr->next) @@ -741,7 +742,7 @@ int co_gs_is_optimizable(copy_opt_t *co, ir_node *irn) { ASSERT_GS_AVAIL(co); new_node.irn = irn; - n = set_find(co->nodes, &new_node, sizeof(new_node), HASH_PTR(new_node.irn)); + n = set_find(co->nodes, &new_node, sizeof(new_node), nodeset_hash(new_node.irn)); if (n) { return (n->degree > 0); } else diff --git a/ir/be/becopyopt_t.h b/ir/be/becopyopt_t.h index be1a59872..045ce33cd 100644 --- a/ir/be/becopyopt_t.h +++ b/ir/be/becopyopt_t.h @@ -15,6 +15,7 @@ #include "bearch.h" #include "bechordal_t.h" #include "becopyopt.h" +#include "benodesets.h" /** * Data representing the problem of copy minimization. @@ -120,7 +121,7 @@ static INLINE affinity_node_t *get_affinity_info(const copy_opt_t *co, ir_node * ASSERT_GS_AVAIL(co); find.irn = irn; - return set_find(co->nodes, &find, sizeof(find), HASH_PTR(irn)); + return set_find(co->nodes, &find, sizeof(find), nodeset_hash(irn)); } #define co_gs_nodes_begin(co) set_first((co)->nodes) diff --git a/ir/be/belower.c b/ir/be/belower.c index 89a6a0d7e..abdd5ea80 100644 --- a/ir/be/belower.c +++ b/ir/be/belower.c @@ -23,6 +23,7 @@ #include "bechordal_t.h" #include "besched_t.h" #include "bestat.h" +#include "benodesets.h" #include "irgmod.h" #include "iredges_t.h" @@ -568,7 +569,7 @@ static void gen_assure_different_pattern(ir_node *irn, ir_node *other_different, /* insert the other different and it's copies into the set */ key.op = other_different; key.copies = NULL; - entry = pset_find(op_set, &key, HASH_PTR(other_different)); + entry = pset_find(op_set, &key, nodeset_hash(other_different)); if (! entry) { entry = obstack_alloc(&env->obst, sizeof(*entry)); @@ -583,7 +584,7 @@ static void gen_assure_different_pattern(ir_node *irn, ir_node *other_different, if (be_is_CopyKeep(keep)) pset_insert_ptr(entry->copies, keep); - pset_insert(op_set, entry, HASH_PTR(other_different)); + pset_insert(op_set, entry, nodeset_hash(other_different)); DBG((mod, LEVEL_1, "created %+F for %+F to assure should_be_different\n", keep, irn)); } diff --git a/ir/be/bemain.c b/ir/be/bemain.c index adcf15d3d..c954cd433 100644 --- a/ir/be/bemain.c +++ b/ir/be/bemain.c @@ -572,20 +572,15 @@ static void be_main_loop(FILE *file_handle) /* fix stack offsets */ BE_TIMER_PUSH(t_abi); - //be_abi_fix_stack_bias(birg.abi); + be_abi_fix_stack_nodes(birg.abi, NULL); + be_remove_dead_nodes_from_schedule(birg.irg); + be_abi_fix_stack_bias(birg.abi); BE_TIMER_POP(t_abi); BE_TIMER_PUSH(t_finish); arch_code_generator_finish(birg.cg); BE_TIMER_POP(t_finish); - /* fix stack offsets */ - BE_TIMER_PUSH(t_abi); - be_abi_fix_stack_nodes(birg.abi, NULL); - be_remove_dead_nodes_from_schedule(birg.irg); - be_abi_fix_stack_bias(birg.abi); - BE_TIMER_POP(t_abi); - dump(DUMP_FINAL, irg, "-finish", dump_ir_block_graph_sched); /* check schedule and register allocation */ diff --git a/ir/be/benodesets.c b/ir/be/benodesets.c index fc1468a03..2c24d4f6b 100644 --- a/ir/be/benodesets.c +++ b/ir/be/benodesets.c @@ -6,6 +6,6 @@ * * Use its node number */ -unsigned nodeset_hash(ir_node *n) { +unsigned nodeset_hash(const ir_node *n) { return (unsigned)get_irn_idx(n); } diff --git a/ir/be/benodesets.h b/ir/be/benodesets.h index c83b382c1..63f0f4062 100644 --- a/ir/be/benodesets.h +++ b/ir/be/benodesets.h @@ -14,7 +14,7 @@ typedef struct pset nodeset; /** * Calculates a hash value for a node. */ -unsigned nodeset_hash(ir_node *n); +unsigned nodeset_hash(const ir_node *n); /** * Creates a new nodeset. diff --git a/ir/be/bespill.c b/ir/be/bespill.c index 4eecc3e9a..0477a0fbf 100644 --- a/ir/be/bespill.c +++ b/ir/be/bespill.c @@ -32,6 +32,7 @@ #include "benode_t.h" #include "bechordal_t.h" #include "bejavacoal.h" +#include "benodesets.h" // only rematerialise when costs are less than REMAT_COST_LIMIT // TODO determine a good value here... @@ -82,7 +83,7 @@ static int cmp_spillinfo(const void *x, const void *y, size_t size) { */ static spill_info_t *get_spillinfo(const spill_env_t *env, ir_node *value) { spill_info_t info, *res; - int hash = HASH_PTR(value); + int hash = nodeset_hash(value); info.spilled_node = value; res = set_find(env->spills, &info, sizeof(info), hash); diff --git a/ir/be/bespillmorgan.c b/ir/be/bespillmorgan.c index 43ed55e31..fd696b867 100644 --- a/ir/be/bespillmorgan.c +++ b/ir/be/bespillmorgan.c @@ -25,6 +25,7 @@ #include "bespillbelady.h" #include "beverify.h" +#include "benodesets.h" #define DBG_LIVE 1 #define DBG_LOOPANA 2 @@ -89,15 +90,19 @@ static int block_attr_cmp(const void *e1, const void *e2, size_t s) { } static INLINE int loop_attr_hash(const loop_attr_t *a) { +#ifdef DEBUG_libfirm + return a->loop->loop_nr; +#else return HASH_PTR(a->loop); +#endif } static INLINE int block_attr_hash(const block_attr_t *b) { - return HASH_PTR(b->block); + return nodeset_hash(b->block); } static INLINE int loop_edge_hash(const loop_edge_t *e) { - return HASH_PTR(e->block) ^ (e->pos * 31); + return nodeset_hash(e->block) ^ (e->pos * 31); } static INLINE loop_attr_t *get_loop_attr(morgan_env_t *env, const ir_loop *loop) { diff --git a/ir/be/bespillslots.c b/ir/be/bespillslots.c index c3faf513b..9cbe76e80 100644 --- a/ir/be/bespillslots.c +++ b/ir/be/bespillslots.c @@ -25,6 +25,7 @@ #include "bespillslots.h" #include "bechordal_t.h" #include "bejavacoal.h" +#include "benodesets.h" #define DBG_COALESCING 1 @@ -74,7 +75,7 @@ static int cmp_spill(const void* d1, const void* d2, size_t size) { static spill_t *get_spill(ss_env_t *env, ir_node *node) { spill_t spill, *res; - int hash = HASH_PTR(node); + int hash = nodeset_hash(node); spill.spill = node; res = set_find(env->spills, &spill, sizeof(spill), hash); @@ -108,7 +109,7 @@ static spill_t *collect_spill(ss_env_t *env, ir_node *node) { const arch_env_t *arch_env = env->arch_env; const arch_register_class_t *cls; spill_t spill, *res; - int hash = HASH_PTR(node); + int hash = nodeset_hash(node); assert(arch_irn_class_is(arch_env, node, spill)); @@ -134,7 +135,7 @@ static spill_t *collect_spill(ss_env_t *env, ir_node *node) { static spill_t *collect_memphi(ss_env_t *env, ir_node *node) { int i, arity; spill_t spill, *res; - int hash = HASH_PTR(node); + int hash = nodeset_hash(node); assert(is_Phi(node)); @@ -526,7 +527,7 @@ static memperm_t *get_memperm(ss_env_t *env, ir_node *block) { int hash; entry.block = block; - hash = HASH_PTR(block); + hash = nodeset_hash(block); res = set_find(env->memperms, &entry, sizeof(entry), hash); diff --git a/ir/be/bessadestr.c b/ir/be/bessadestr.c index 4ef0fae5e..26768f4ff 100644 --- a/ir/be/bessadestr.c +++ b/ir/be/bessadestr.c @@ -28,6 +28,7 @@ #include "belive_t.h" #include "benode_t.h" #include "besched_t.h" +#include "benodesets.h" DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;) @@ -109,7 +110,7 @@ static void insert_all_perms_walker(ir_node *bl, void *data) { for(phi = get_irn_link(bl); phi; phi = get_irn_link(phi)) { perm_proj_t templ; ir_node *arg = get_irn_n(phi, i); - unsigned hash = HASH_PTR(arg); + unsigned hash = nodeset_hash(arg); templ.arg = arg; pp = set_find(arg_set, &templ, sizeof(templ), hash); @@ -165,7 +166,7 @@ static void insert_all_perms_walker(ir_node *bl, void *data) { perm_proj_t templ; templ.arg = get_irn_n(phi, i); - pp = set_find(arg_set, &templ, sizeof(templ), HASH_PTR(templ.arg)); + pp = set_find(arg_set, &templ, sizeof(templ), nodeset_hash(templ.arg)); /* If not found, it was an interfering argument */ if (pp) diff --git a/ir/be/beuses.c b/ir/be/beuses.c index dc7f30288..d9cf998ab 100644 --- a/ir/be/beuses.c +++ b/ir/be/beuses.c @@ -32,6 +32,7 @@ #include "beirgmod.h" #include "bearch.h" #include "beuses_t.h" +#include "benodesets.h" #define DBG_LEVEL SET_LEVEL_0 @@ -59,7 +60,7 @@ static int cmp_use(const void *a, const void *b, size_t n) static INLINE be_use_t *get_or_set_use(be_uses_t *uses, const ir_node *bl, const ir_node *def, unsigned next_use) { - unsigned hash = HASH_COMBINE(HASH_PTR(bl), HASH_PTR(def)); + unsigned hash = HASH_COMBINE(nodeset_hash(bl), nodeset_hash(def)); be_use_t templ; be_use_t* result; diff --git a/ir/be/ia32/bearch_ia32.c b/ir/be/ia32/bearch_ia32.c index 989be6e80..f510919af 100644 --- a/ir/be/ia32/bearch_ia32.c +++ b/ir/be/ia32/bearch_ia32.c @@ -1230,13 +1230,11 @@ static void ia32_after_ra_walker(ir_node *block, void *env) { */ static void ia32_after_ra(void *self) { ia32_code_gen_t *cg = self; + ir_graph *irg = cg->irg; - irg_block_walk_graph(cg->irg, NULL, ia32_after_ra_walker, self); + irg_block_walk_graph(irg, NULL, ia32_after_ra_walker, self); - /* if we do x87 code generation, rewrite all the virtual instructions and registers */ - if (cg->used_fp == fp_x87 || cg->force_sim) { - x87_simulate_graph(cg->arch_env, cg->irg, cg->blk_sched); - } + ia32_finish_irg(irg, cg); } /** @@ -1246,7 +1244,12 @@ static void ia32_finish(void *self) { ia32_code_gen_t *cg = self; ir_graph *irg = cg->irg; - ia32_finish_irg(irg, cg); + /* if we do x87 code generation, rewrite all the virtual instructions and registers */ + if (cg->used_fp == fp_x87 || cg->force_sim) { + x87_simulate_graph(cg->arch_env, irg, cg->blk_sched); + } + + ia32_peephole_optimization(irg, cg); } /** diff --git a/ir/be/ia32/ia32_finish.c b/ir/be/ia32/ia32_finish.c index 93163946d..848e40d40 100644 --- a/ir/be/ia32/ia32_finish.c +++ b/ir/be/ia32/ia32_finish.c @@ -33,7 +33,7 @@ static void ia32_transform_sub_to_neg_add(ir_node *irn, ia32_code_gen_t *cg) { const arch_register_t *in1_reg, *in2_reg, *out_reg, **slots; /* Return if AM node or not a Sub or xSub */ - if (get_ia32_op_type(irn) != ia32_Normal || !(is_ia32_Sub(irn) || is_ia32_xSub(irn))) + if (!(is_ia32_Sub(irn) || is_ia32_xSub(irn)) || get_ia32_op_type(irn) != ia32_Normal) return; noreg = ia32_new_NoReg_gp(cg); @@ -454,18 +454,14 @@ static void ia32_finish_irg_walker(ir_node *block, void *env) { for (irn = sched_first(block); ! sched_is_end(irn); irn = next) { ia32_code_gen_t *cg = env; - next = sched_next(irn); - if (is_ia32_irn(irn)) { - /* check if there is a sub which need to be transformed */ - ia32_transform_sub_to_neg_add(irn, cg); + next = sched_next(irn); - /* transform a LEA into an Add if possible */ - ia32_transform_lea_to_add(irn, cg); + /* check if there is a sub which need to be transformed */ + ia32_transform_sub_to_neg_add(irn, cg); - /* check for peephole optimization */ - ia32_peephole_optimization(irn, cg); - } + /* transform a LEA into an Add if possible */ + ia32_transform_lea_to_add(irn, cg); } /* second: insert copies and finish irg */ diff --git a/ir/be/ia32/ia32_map_regs.c b/ir/be/ia32/ia32_map_regs.c index 3443488e7..730e66fe2 100644 --- a/ir/be/ia32/ia32_map_regs.c +++ b/ir/be/ia32/ia32_map_regs.c @@ -15,6 +15,7 @@ #include "ia32_map_regs.h" #include "ia32_new_nodes.h" #include "gen_ia32_regalloc_if.h" +#include "benodesets.h" static int maxnum_gpreg_args = 3; /* maximum number of int arguments passed in registers; default 3 */ static int maxnum_fpreg_args = 5; /* maximum number of float arguments passed in registers; default 5 */ @@ -84,7 +85,7 @@ static struct ia32_irn_reg_assoc *get_irn_reg_assoc(const ir_node *irn, set *reg templ.irn = irn; templ.reg = NULL; - hash = HASH_PTR(irn); + hash = nodeset_hash(irn); return set_insert(reg_set, &templ, sizeof(templ), hash); } diff --git a/ir/be/ia32/ia32_optimize.c b/ir/be/ia32/ia32_optimize.c index df2c3082b..eb2901646 100644 --- a/ir/be/ia32/ia32_optimize.c +++ b/ir/be/ia32/ia32_optimize.c @@ -447,6 +447,7 @@ static void ia32_optimize_CondJmp(ir_node *irn, ia32_code_gen_t *cg) { } } +#if 0 /** * Creates a Push from Store(IncSP(gp_reg_size)) */ @@ -516,12 +517,164 @@ static void ia32_create_Push(ir_node *irn, ia32_code_gen_t *cg) { sched_add_before(next, push); sched_add_after(push, proj_res); } +#endif + +// only optimize up to 48 stores behind IncSPs +#define MAXPUSH_OPTIMIZE 48 /** - * Creates a Pop from IncSP(Load(sp)) + * Tries to create pushs from IncSP,Store combinations */ -static void ia32_create_Pop(ir_node *irn, ia32_code_gen_t *cg) { - /* TODO */ +static void ia32_create_Pushs(ir_node *irn, ia32_code_gen_t *cg) { + int i; + int offset; + int firststore; + ir_node *node; + ir_node *stores[MAXPUSH_OPTIMIZE]; + ir_node *block = get_nodes_block(irn); + ir_graph *irg = cg->irg; + ir_node *curr_sp; + ir_mode *spmode = get_irn_mode(irn); + + memset(stores, 0, sizeof(stores)); + + assert(be_is_IncSP(irn)); + + offset = be_get_IncSP_offset(irn); + if(offset <= 0) + return; + + /* + * We first walk the schedule after the IncSP node as long as we find + * suitable stores that could be transformed to a push. + * We save them into the stores array which is sorted by the frame offset/4 + * attached to the node + */ + for(node = sched_next(irn); !sched_is_end(node); node = sched_next(node)) { + const char *am_offs; + ir_node *mem; + int offset = -1; + int n; + int storeslot; + + // it has to be a store + if(!is_ia32_Store(node)) + break; + + // it has to use our sp value + if(get_irn_n(node, 0) != irn) + continue; + // store has to be attached to NoMem + mem = get_irn_n(node, 3); + if(!is_NoMem(mem)) { + continue; + } + + if( (get_ia32_am_flavour(node) & ia32_am_IS) != 0) + break; + + am_offs = get_ia32_am_offs(node); + if(am_offs == NULL) { + offset = 0; + } else { + // the am_offs has to be of the form "+NUMBER" + if(sscanf(am_offs, "+%d%n", &offset, &n) != 1 || am_offs[n] != '\0') { + // we shouldn't have any cases in the compiler at the moment + // that produce something different from esp+XX + assert(0); + break; + } + } + + storeslot = offset / 4; + if(storeslot >= MAXPUSH_OPTIMIZE) + continue; + + // storing into the same slot twice is bad (and shouldn't happen...) + if(stores[storeslot] != NULL) + break; + + // storing at half-slots is bad + if(offset % 4 != 0) + break; + + stores[storeslot] = node; + } + + offset = be_get_IncSP_offset(irn); + + firststore = -1; + for(i = 0; i < MAXPUSH_OPTIMIZE; ++i) { + ir_node *store = stores[i]; + if(store == NULL || is_Bad(store)) + break; + if(offset < 4) + break; + + firststore = i; + + offset -= 4; + } + + curr_sp = get_irn_n(irn, 0); + + // walk the stores in inverse order and create pushs for them + for(i = firststore; i >= 0; --i) { + const ir_edge_t *edge, *next; + const arch_register_t *spreg; + ir_node *push; + ir_node *val, *mem; + ir_node *store = stores[i]; + + val = get_irn_n(store, 2); + mem = get_irn_n(store, 3); + spreg = arch_get_irn_register(cg->arch_env, curr_sp); + + // create a push + push = new_rd_ia32_Push(NULL, irg, block, curr_sp, val, mem); + if(get_ia32_immop_type(store) != ia32_ImmNone) { + copy_ia32_Immop_attr(push, store); + } + sched_add_before(irn, push); + + // create stackpointer proj + curr_sp = new_r_Proj(irg, block, push, spmode, pn_ia32_Push_stack); + arch_set_irn_register(cg->arch_env, curr_sp, spreg); + sched_add_before(irn, curr_sp); + + // rewire memprojs of the store + foreach_out_edge_safe(store, edge, next) { + ir_node *succ = get_edge_src_irn(edge); + + assert(is_Proj(succ) && get_Proj_proj(succ) == pn_ia32_Store_M); + set_irn_n(succ, 0, push); + } + + // we can remove the store from schedule now + sched_remove(store); + } + + be_set_IncSP_offset(irn, offset); + + // can we remove the IncSP now? + if(offset == 0) { + const ir_edge_t *edge, *next; + + sched_remove(irn); + set_irn_n(irn, 0, new_Bad()); + + foreach_out_edge_safe(irn, edge, next) { + ir_node *arg = get_edge_src_irn(edge); + int pos = get_edge_src_pos(edge); + + set_irn_n(arg, pos, curr_sp); + } + + sched_remove(irn); + set_irn_n(irn, 0, new_Bad()); + } else { + set_irn_n(irn, 0, curr_sp); + } } /** @@ -540,13 +693,14 @@ static void ia32_optimize_IncSP(ir_node *irn, ia32_code_gen_t *cg) { /* Omit the optimized IncSP */ be_set_IncSP_pred(irn, be_get_IncSP_pred(prev)); + sched_remove(prev); } } /** * Performs Peephole Optimizations. */ -void ia32_peephole_optimization(ir_node *irn, void *env) { +static void ia32_peephole_optimize_node(ir_node *irn, void *env) { ia32_code_gen_t *cg = env; /* AMD CPUs want explicit compare before conditional jump */ @@ -556,13 +710,17 @@ void ia32_peephole_optimization(ir_node *irn, void *env) { else if (is_ia32_CondJmp(irn)) ia32_optimize_CondJmp(irn, cg); } - /* seems to be buggy when using Pushes */ - else if (be_is_IncSP(irn)) - ia32_optimize_IncSP(irn, cg); - else if (is_ia32_Store(irn)) - ia32_create_Push(irn, cg); + + if (be_is_IncSP(irn)) { + // optimize_IncSP doesn't respect dependency edges yet... + //ia32_optimize_IncSP(irn, cg); + ia32_create_Pushs(irn, cg); + } } +void ia32_peephole_optimization(ir_graph *irg, ia32_code_gen_t *cg) { + irg_walk_graph(irg, ia32_peephole_optimize_node, NULL, cg); +} /****************************************************************** diff --git a/ir/be/ia32/ia32_optimize.h b/ir/be/ia32/ia32_optimize.h index 1348ce458..e0ef59cf7 100644 --- a/ir/be/ia32/ia32_optimize.h +++ b/ir/be/ia32/ia32_optimize.h @@ -22,8 +22,7 @@ void ia32_optimize_addressmode(ia32_code_gen_t *cg); /** * Performs Peephole Optimizations - * This function is called by a walker. */ -void ia32_peephole_optimization(ir_node *irn, void *env); +void ia32_peephole_optimization(ir_graph *irg, ia32_code_gen_t *cg); #endif /* _IA32_OPTIMIZE_H_ */ -- 2.20.1