X-Git-Url: http://nsz.repo.hu/git/?a=blobdiff_plain;f=ir%2Fbe%2Fia32%2Fia32_optimize.c;h=88449406bddaf8c6ffd1b2a818611334cba0d954;hb=3f0b6e8217576d23654d406170c543a791edac56;hp=7ce28c052094d2fb4613dbaf7825afd0b7eb8486;hpb=777a5dce311e8d4d2e1ac2e52e815825ed4374c8;p=libfirm diff --git a/ir/be/ia32/ia32_optimize.c b/ir/be/ia32/ia32_optimize.c index 7ce28c052..88449406b 100644 --- a/ir/be/ia32/ia32_optimize.c +++ b/ir/be/ia32/ia32_optimize.c @@ -421,6 +421,17 @@ static void peephole_IncSP_Store_to_push(ir_node *irn) be_set_IncSP_pred(irn, curr_sp); } +/** + * Return true if a mode can be stored in the GP register set + */ +static INLINE int mode_needs_gp_reg(ir_mode *mode) { + if (mode == mode_fpcw) + return 0; + if (get_mode_size_bits(mode) > 32) + return 0; + return mode_is_int(mode) || mode_is_reference(mode) || mode == mode_b; +} + /** * Tries to create Pops from Load, IncSP combinations. * The Loads are replaced by Pops, the IncSP is modified @@ -429,11 +440,12 @@ static void peephole_IncSP_Store_to_push(ir_node *irn) static void peephole_Load_IncSP_to_pop(ir_node *irn) { const arch_register_t *esp = &ia32_gp_regs[REG_ESP]; - int i, maxslot, inc_ofs; + int i, maxslot, inc_ofs, ofs; ir_node *node, *pred_sp, *block; ir_node *loads[MAXPUSH_OPTIMIZE]; ir_graph *irg; unsigned regmask = 0; + unsigned copymask = ~0; memset(loads, 0, sizeof(loads)); assert(be_is_IncSP(irn)); @@ -454,38 +466,46 @@ static void peephole_Load_IncSP_to_pop(ir_node *irn) ir_node *mem; int offset; int loadslot; - const arch_register_t *dreg; + const arch_register_t *sreg, *dreg; /* it has to be a Load */ if (!is_ia32_Load(node)) { if (be_is_Copy(node)) { - if (get_irn_mode(node) != mode_Iu) { + if (!mode_needs_gp_reg(get_irn_mode(node))) { /* not a GP copy, ignore */ continue; } dreg = arch_get_irn_register(arch_env, node); - if (regmask & (1 << dreg->index)) { + sreg = arch_get_irn_register(arch_env, be_get_Copy_op(node)); + if (regmask & copymask & (1 << sreg->index)) { break; } - /* we CAN skip Copies if the destination is not in our regmask, ie - none of our future Pop will overwrite it */ - regmask |= (1 << dreg->index); + if (regmask & copymask & (1 << dreg->index)) { + break; + } + /* we CAN skip Copies if neither the destination nor the source + * is not in our regmask, ie none of our future Pop will overwrite it */ + regmask |= (1 << dreg->index) | (1 << sreg->index); + copymask &= ~((1 << dreg->index) | (1 << sreg->index)); continue; } break; } /* we can handle only GP loads */ - if (get_ia32_ls_mode(node) != mode_Iu) + if (!mode_needs_gp_reg(get_ia32_ls_mode(node))) continue; /* it has to use our predecessor sp value */ - if (get_irn_n(node, n_ia32_base) != pred_sp) - continue; + if (get_irn_n(node, n_ia32_base) != pred_sp) { + /* it would be ok if this load does not use a Pop result, + * but we do not check this */ + break; + } /* Load has to be attached to Spill-Mem */ mem = skip_Proj(get_irn_n(node, n_ia32_mem)); if (!is_Phi(mem) && !is_ia32_Store(mem) && !is_ia32_Push(mem)) - continue; + break; /* should have NO index */ if (get_ia32_am_scale(node) > 0 || !is_ia32_NoReg_GP(get_irn_n(node, n_ia32_index))) @@ -501,6 +521,9 @@ static void peephole_Load_IncSP_to_pop(ir_node *irn) if (offset < 0 || offset >= MAXPUSH_OPTIMIZE * 4) continue; + /* ignore those outside the possible windows */ + if (offset > inc_ofs - 4) + continue; loadslot = offset >> 2; /* loading from the same slot twice is bad (and shouldn't happen...) */ @@ -522,24 +545,26 @@ static void peephole_Load_IncSP_to_pop(ir_node *irn) if (maxslot < 0) return; - /* walk through the Loads and create Pops for them */ + /* find the first slot */ for (i = maxslot; i >= 0; --i) { ir_node *load = loads[i]; if (load == NULL) break; - inc_ofs -= 4; } + ofs = inc_ofs - (maxslot + 1) * 4; + inc_ofs = (i+1) * 4; + /* create a new IncSP if needed */ block = get_nodes_block(irn); irg = cg->irg; - if (inc_ofs != 0) { - assert(inc_ofs > 0); + if (inc_ofs > 0) { pred_sp = be_new_IncSP(esp, irg, block, pred_sp, -inc_ofs, be_get_IncSP_align(irn)); sched_add_before(irn, pred_sp); } + /* walk through the Loads and create Pops for them */ for (++i; i <= maxslot; ++i) { ir_node *load = loads[i]; ir_node *mem, *pop; @@ -569,9 +594,8 @@ static void peephole_Load_IncSP_to_pop(ir_node *irn) /* we can remove the Load now */ sched_remove(load); kill_node(load); - } - be_set_IncSP_offset(irn, 0); + be_set_IncSP_offset(irn, -ofs); be_set_IncSP_pred(irn, pred_sp); }