X-Git-Url: http://nsz.repo.hu/git/?a=blobdiff_plain;f=ir%2Fbe%2Fia32%2Fia32_optimize.c;h=2288e61db7631a61c68e964655c223ff087fe07d;hb=9d8aeb18ba541518f7adaf03884209c1b9c8882d;hp=0839f87c664fc396261683edf4872a893a9614f7;hpb=2d9776010c3f48b99e88e4199f201ba8531dbde8;p=libfirm diff --git a/ir/be/ia32/ia32_optimize.c b/ir/be/ia32/ia32_optimize.c index 0839f87c6..2288e61db 100644 --- a/ir/be/ia32/ia32_optimize.c +++ b/ir/be/ia32/ia32_optimize.c @@ -51,6 +51,7 @@ #include "ia32_optimize.h" #include "bearch_ia32_t.h" #include "gen_ia32_regalloc_if.h" +#include "ia32_common_transform.h" #include "ia32_transform.h" #include "ia32_dbg_stat.h" #include "ia32_util.h" @@ -61,6 +62,16 @@ DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;) static const arch_env_t *arch_env; static ia32_code_gen_t *cg; +static void copy_mark(const ir_node *old, ir_node *new) +{ + if (is_ia32_is_reload(old)) + set_ia32_is_reload(new); + if (is_ia32_is_spill(old)) + set_ia32_is_spill(new); + if (is_ia32_is_remat(old)) + set_ia32_is_remat(new); +} + /** * Returns non-zero if the given node produces * a zero flag. @@ -96,17 +107,17 @@ static int produces_zero_flag(ir_node *node, int pn) case iro_ia32_ShlD: case iro_ia32_ShrD: + assert(n_ia32_ShlD_count == n_ia32_ShrD_count); + count = get_irn_n(node, n_ia32_ShlD_count); + goto check_shift_amount; + case iro_ia32_Shl: case iro_ia32_Shr: case iro_ia32_Sar: - assert(n_ia32_ShlD_count == n_ia32_ShrD_count); assert(n_ia32_Shl_count == n_ia32_Shr_count && n_ia32_Shl_count == n_ia32_Sar_count); - if (is_ia32_ShlD(node) || is_ia32_ShrD(node)) { - count = get_irn_n(node, n_ia32_ShlD_count); - } else { - count = get_irn_n(node, n_ia32_Shl_count); - } + count = get_irn_n(node, n_ia32_Shl_count); +check_shift_amount: /* when shift count is zero the flags are not affected, so we can only * do this for constants != 0 */ if (!is_ia32_Immediate(count)) @@ -159,6 +170,72 @@ static ir_node *turn_into_mode_t(ir_node *node) return new_node; } +/** + * Replace Cmp(x, 0) by a Test(x, x) + */ +static void peephole_ia32_Cmp(ir_node *const node) +{ + ir_node *right; + ia32_immediate_attr_t const *imm; + dbg_info *dbgi; + ir_graph *irg; + ir_node *block; + ir_node *noreg; + ir_node *nomem; + ir_node *op; + ia32_attr_t const *attr; + int ins_permuted; + int cmp_unsigned; + ir_node *test; + arch_register_t const *reg; + ir_edge_t const *edge; + ir_edge_t const *tmp; + + if (get_ia32_op_type(node) != ia32_Normal) + return; + + right = get_irn_n(node, n_ia32_Cmp_right); + if (!is_ia32_Immediate(right)) + return; + + imm = get_ia32_immediate_attr_const(right); + if (imm->symconst != NULL || imm->offset != 0) + return; + + dbgi = get_irn_dbg_info(node); + irg = current_ir_graph; + block = get_nodes_block(node); + noreg = ia32_new_NoReg_gp(cg); + nomem = get_irg_no_mem(irg); + op = get_irn_n(node, n_ia32_Cmp_left); + attr = get_irn_generic_attr(node); + ins_permuted = attr->data.ins_permuted; + cmp_unsigned = attr->data.cmp_unsigned; + + if (is_ia32_Cmp(node)) { + test = new_rd_ia32_Test(dbgi, irg, block, noreg, noreg, nomem, + op, op, ins_permuted, cmp_unsigned); + } else { + test = new_rd_ia32_Test8Bit(dbgi, irg, block, noreg, noreg, nomem, + op, op, ins_permuted, cmp_unsigned); + } + set_ia32_ls_mode(test, get_ia32_ls_mode(node)); + + reg = arch_get_irn_register(arch_env, node); + arch_set_irn_register(arch_env, test, reg); + + foreach_out_edge_safe(node, edge, tmp) { + ir_node *const user = get_edge_src_irn(edge); + + if (is_Proj(user)) + exchange(user, test); + } + + sched_add_before(node, test); + copy_mark(node, test); + be_peephole_exchange(node, test); +} + /** * Peephole optimization for Test instructions. * We can remove the Test, if a zero flags was produced which is still @@ -265,9 +342,7 @@ static void peephole_ia32_Return(ir_node *node) { } } - /* ensure, that the 3 byte return is generated - * actually the emitter tests again if the block beginning has a label and - * isn't just a fallthrough */ + /* ensure, that the 3 byte return is generated */ be_Return_set_emit_pop(node, 1); } @@ -281,13 +356,18 @@ static void peephole_ia32_Return(ir_node *node) { */ static void peephole_IncSP_Store_to_push(ir_node *irn) { - int i, maxslot, inc_ofs; - ir_node *node; - ir_node *stores[MAXPUSH_OPTIMIZE]; - ir_node *block; - ir_graph *irg; - ir_node *curr_sp; - ir_mode *spmode; + int i; + int maxslot; + int inc_ofs; + ir_node *node; + ir_node *stores[MAXPUSH_OPTIMIZE]; + ir_node *block; + ir_graph *irg; + ir_node *curr_sp; + ir_mode *spmode; + ir_node *first_push = NULL; + ir_edge_t const *edge; + ir_edge_t const *next; memset(stores, 0, sizeof(stores)); @@ -323,20 +403,18 @@ static void peephole_IncSP_Store_to_push(ir_node *irn) /* unfortunately we can't support the full AMs possible for push at the * moment. TODO: fix this */ - if (get_ia32_am_scale(node) > 0 || !is_ia32_NoReg_GP(get_irn_n(node, n_ia32_index))) + if (!is_ia32_NoReg_GP(get_irn_n(node, n_ia32_index))) break; offset = get_ia32_am_offs_int(node); /* we should NEVER access uninitialized stack BELOW the current SP */ assert(offset >= 0); - offset = inc_ofs - 4 - offset; - /* storing at half-slots is bad */ if ((offset & 3) != 0) break; - if (offset < 0 || offset >= MAXPUSH_OPTIMIZE * 4) + if (inc_ofs - 4 < offset || offset >= MAXPUSH_OPTIMIZE * 4) continue; storeslot = offset >> 2; @@ -349,29 +427,35 @@ static void peephole_IncSP_Store_to_push(ir_node *irn) maxslot = storeslot; } - curr_sp = be_get_IncSP_pred(irn); + curr_sp = irn; + + for (i = -1; i < maxslot; ++i) { + if (stores[i + 1] == NULL) + break; + } /* walk through the Stores and create Pushs for them */ block = get_nodes_block(irn); spmode = get_irn_mode(irn); irg = cg->irg; - for (i = 0; i <= maxslot; ++i) { + for (; i >= 0; --i) { const arch_register_t *spreg; ir_node *push; ir_node *val, *mem, *mem_proj; ir_node *store = stores[i]; ir_node *noreg = ia32_new_NoReg_gp(cg); - if (store == NULL) - break; - val = get_irn_n(store, n_ia32_unary_op); mem = get_irn_n(store, n_ia32_mem); spreg = arch_get_irn_register(cg->arch_env, curr_sp); push = new_rd_ia32_Push(get_irn_dbg_info(store), irg, block, noreg, noreg, mem, val, curr_sp); + copy_mark(store, push); - sched_add_before(irn, push); + if (first_push == NULL) + first_push = push; + + sched_add_after(curr_sp, push); /* create stackpointer Proj */ curr_sp = new_r_Proj(irg, block, push, spmode, pn_ia32_Push_stack); @@ -386,10 +470,103 @@ static void peephole_IncSP_Store_to_push(ir_node *irn) inc_ofs -= 4; } + foreach_out_edge_safe(irn, edge, next) { + ir_node *const src = get_edge_src_irn(edge); + int const pos = get_edge_src_pos(edge); + + if (src == first_push) + continue; + + set_irn_n(src, pos, curr_sp); + } + be_set_IncSP_offset(irn, inc_ofs); - be_set_IncSP_pred(irn, curr_sp); } +#if 0 +static void peephole_store_incsp(ir_node *store) +{ + dbg_info *dbgi; + ir_node *node; + ir_node *block; + ir_node *noref; + ir_node *mem; + ir_node *push; + ir_node *val; + ir_node *am_base = get_irn_n(store, n_ia32_Store_base); + if (!be_is_IncSP(am_base) + || get_nodes_block(am_base) != get_nodes_block(store)) + return; + mem = get_irn_n(store, n_ia32_Store_mem); + if (!is_ia32_NoReg_GP(get_irn_n(store, n_ia32_Store_index)) + || !is_NoMem(mem)) + return; + + int incsp_offset = be_get_IncSP_offset(am_base); + if (incsp_offset <= 0) + return; + + /* we have to be at offset 0 */ + int my_offset = get_ia32_am_offs_int(store); + if (my_offset != 0) { + /* TODO here: find out wether there is a store with offset 0 before + * us and wether we can move it down to our place */ + return; + } + ir_mode *ls_mode = get_ia32_ls_mode(store); + int my_store_size = get_mode_size_bytes(ls_mode); + + if (my_offset + my_store_size > incsp_offset) + return; + + /* correctness checking: + - noone else must write to that stackslot + (because after translation incsp won't allocate it anymore) + */ + sched_foreach_reverse_from(store, node) { + int i, arity; + + if (node == am_base) + break; + + /* make sure noone else can use the space on the stack */ + arity = get_irn_arity(node); + for (i = 0; i < arity; ++i) { + ir_node *pred = get_irn_n(node, i); + if (pred != am_base) + continue; + + if (i == n_ia32_base && + (get_ia32_op_type(node) == ia32_AddrModeS + || get_ia32_op_type(node) == ia32_AddrModeD)) { + int node_offset = get_ia32_am_offs_int(node); + ir_mode *node_ls_mode = get_ia32_ls_mode(node); + int node_size = get_mode_size_bytes(node); + /* overlapping with our position? abort */ + if (node_offset < my_offset + my_store_size + && node_offset + node_size >= my_offset) + return; + /* otherwise it's fine */ + continue; + } + + /* strange use of esp: abort */ + return; + } + } + + /* all ok, change to push */ + dbgi = get_irn_dbg_info(store); + block = get_nodes_block(store); + noreg = ia32_new_NoReg_gp(cg); + val = get_ia32_ + + push = new_rd_ia32_Push(dbgi, irg, block, noreg, noreg, mem, + + create_push(dbgi, current_ir_graph, block, am_base, store); +} +#endif + /** * Return true if a mode can be stored in the GP register set */ @@ -432,7 +609,6 @@ static void peephole_Load_IncSP_to_pop(ir_node *irn) maxslot = -1; pred_sp = be_get_IncSP_pred(irn); for (node = sched_prev(irn); !sched_is_end(node); node = sched_prev(node)) { - ir_node *mem; int offset; int loadslot; const arch_register_t *sreg, *dreg; @@ -471,13 +647,9 @@ static void peephole_Load_IncSP_to_pop(ir_node *irn) * but we do not check this */ break; } - /* Load has to be attached to Spill-Mem */ - mem = skip_Proj(get_irn_n(node, n_ia32_mem)); - if (!is_Phi(mem) && !is_ia32_Store(mem) && !is_ia32_Push(mem)) - break; /* should have NO index */ - if (get_ia32_am_scale(node) > 0 || !is_ia32_NoReg_GP(get_irn_n(node, n_ia32_index))) + if (!is_ia32_NoReg_GP(get_irn_n(node, n_ia32_index))) break; offset = get_ia32_am_offs_int(node); @@ -546,6 +718,8 @@ static void peephole_Load_IncSP_to_pop(ir_node *irn) pop = new_rd_ia32_Pop(get_irn_dbg_info(load), irg, block, mem, pred_sp); arch_set_irn_register(arch_env, pop, reg); + copy_mark(load, pop); + /* create stackpointer Proj */ pred_sp = new_r_Proj(irg, block, pop, mode_Iu, pn_ia32_Pop_stack); arch_set_irn_register(arch_env, pred_sp, esp); @@ -757,6 +931,7 @@ static void peephole_ia32_Const(ir_node *node) sched_add_before(node, produceval); sched_add_before(node, xor); + copy_mark(node, xor); be_peephole_exchange(node, xor); } @@ -963,6 +1138,7 @@ exchange: /* exchange the Add and the LEA */ sched_add_before(node, res); + copy_mark(node, res); be_peephole_exchange(node, res); } @@ -1045,12 +1221,14 @@ void ia32_peephole_optimization(ia32_code_gen_t *new_cg) /* register peephole optimisations */ clear_irp_opcodes_generic_func(); - register_peephole_optimisation(op_ia32_Const, peephole_ia32_Const); - register_peephole_optimisation(op_be_IncSP, peephole_be_IncSP); - register_peephole_optimisation(op_ia32_Lea, peephole_ia32_Lea); - register_peephole_optimisation(op_ia32_Test, peephole_ia32_Test); + register_peephole_optimisation(op_ia32_Const, peephole_ia32_Const); + register_peephole_optimisation(op_be_IncSP, peephole_be_IncSP); + register_peephole_optimisation(op_ia32_Lea, peephole_ia32_Lea); + register_peephole_optimisation(op_ia32_Cmp, peephole_ia32_Cmp); + register_peephole_optimisation(op_ia32_Cmp8Bit, peephole_ia32_Cmp); + register_peephole_optimisation(op_ia32_Test, peephole_ia32_Test); register_peephole_optimisation(op_ia32_Test8Bit, peephole_ia32_Test); - register_peephole_optimisation(op_be_Return, peephole_ia32_Return); + register_peephole_optimisation(op_be_Return, peephole_ia32_Return); if (! ia32_cg_config.use_imul_mem_imm32) register_peephole_optimisation(op_ia32_IMul, peephole_ia32_Imul_split); if (ia32_cg_config.use_pxor)