From 7c4e33eb7648d9e1cc7efcffc8682a2f27a570a3 Mon Sep 17 00:00:00 2001 From: Michael Beck Date: Sun, 20 Apr 2008 23:49:53 +0000 Subject: [PATCH] - moved the imul mem,imm32 splitting into peephole optimizations - instead of issuing a rep ret, issue a ret 0 as recommended in k10 optimization manual [r19344] --- ir/be/benode.c | 34 +++++++++++++------ ir/be/benode_t.h | 14 ++++++++ ir/be/ia32/ia32_emitter.c | 2 +- ir/be/ia32/ia32_optimize.c | 66 ++++++++++++++++++++++++++++++++++++- ir/be/ia32/ia32_transform.c | 12 ++----- 5 files changed, 107 insertions(+), 21 deletions(-) diff --git a/ir/be/benode.c b/ir/be/benode.c index fe56913bc..e4f1bbfbc 100644 --- a/ir/be/benode.c +++ b/ir/be/benode.c @@ -87,14 +87,15 @@ typedef struct { be_node_attr_t node_attr; /**< base attributes of every be node. */ int num_ret_vals; /**< number of return values */ unsigned pop; /**< number of bytes that should be popped */ + int emit_pop; /**< if set, emit pop bytes, even if pop = 0 */ } be_return_attr_t; /** The be_IncSP attribute type. */ typedef struct { be_node_attr_t node_attr; /**< base attributes of every be node. */ int offset; /**< The offset by which the stack shall be expanded/shrinked. */ - int align; /**< wether stack should be aligned after the - IncSP */ + int align; /**< whether stack should be aligned after the + IncSP */ } be_incsp_attr_t; /** The be_Frame attribute type. */ @@ -107,9 +108,9 @@ typedef struct { /** The be_Call attribute type. */ typedef struct { be_node_attr_t node_attr; /**< base attributes of every be node. */ - ir_entity *ent; /**< The called entity if this is a static call. */ + ir_entity *ent; /**< The called entity if this is a static call. */ unsigned pop; - ir_type *call_tp; /**< The call type, copied from the original Call node. */ + ir_type *call_tp; /**< The call type, copied from the original Call node. */ } be_call_attr_t; typedef struct { @@ -214,6 +215,8 @@ static int Return_cmp_attr(ir_node *a, ir_node *b) { return 1; if (a_attr->pop != b_attr->pop) return 1; + if (a_attr->emit_pop != b_attr->emit_pop) + return 1; return _node_cmp_attr(&a_attr->node_attr, &b_attr->node_attr); } @@ -697,25 +700,36 @@ ir_node *be_new_Return(dbg_info *dbg, ir_graph *irg, ir_node *block, int n_res, a = get_irn_attr(res); a->num_ret_vals = n_res; a->pop = pop; + a->emit_pop = 0; return res; } /* Returns the number of real returns values */ -int be_Return_get_n_rets(const ir_node *ret) -{ +int be_Return_get_n_rets(const ir_node *ret) { const be_return_attr_t *a = get_irn_generic_attr_const(ret); return a->num_ret_vals; } -unsigned be_Return_get_pop(const ir_node *ret) -{ +/* return the number of bytes that should be popped from stack when executing the Return. */ +unsigned be_Return_get_pop(const ir_node *ret) { const be_return_attr_t *a = get_irn_generic_attr_const(ret); return a->pop; } -int be_Return_append_node(ir_node *ret, ir_node *node) -{ +/* return non-zero, if number of popped bytes must be always emitted */ +int be_Return_get_emit_pop(const ir_node *ret) { + const be_return_attr_t *a = get_irn_generic_attr_const(ret); + return a->emit_pop; +} + +/* return non-zero, if number of popped bytes must be always emitted */ +void be_Return_set_emit_pop(ir_node *ret, int emit_pop) { + be_return_attr_t *a = get_irn_generic_attr(ret); + a->emit_pop = emit_pop; +} + +int be_Return_append_node(ir_node *ret, ir_node *node) { int pos; pos = add_irn_n(ret, node); diff --git a/ir/be/benode_t.h b/ir/be/benode_t.h index cc00d1855..c7d756748 100644 --- a/ir/be/benode_t.h +++ b/ir/be/benode_t.h @@ -361,6 +361,20 @@ int be_Return_get_n_rets(const ir_node *ret); */ unsigned be_Return_get_pop(const ir_node *ret); +/** + * Return non-zero, if number of popped bytes must be always emitted. + * + * @param ret the be_Return node + */ +int be_Return_get_emit_pop(const ir_node *ret); + +/** + * Set the emit_pop flag. + * + * @param ret the be_Return node + */ +void be_Return_set_emit_pop(ir_node *ret, int emit_pop); + /** appends a node to the return node, returns the position of the node */ int be_Return_append_node(ir_node *ret, ir_node *node); diff --git a/ir/be/ia32/ia32_emitter.c b/ir/be/ia32/ia32_emitter.c index 0ba657a18..21bd11ad5 100644 --- a/ir/be/ia32/ia32_emitter.c +++ b/ir/be/ia32/ia32_emitter.c @@ -1809,7 +1809,7 @@ static void emit_be_Return(const ir_node *node) be_emit_cstring("\tret"); pop = be_Return_get_pop(node); - if(pop > 0) { + if (pop > 0 || be_Return_get_emit_pop(node)) { be_emit_irprintf(" $%d", pop); } be_emit_finish_line_gas(node); diff --git a/ir/be/ia32/ia32_optimize.c b/ir/be/ia32/ia32_optimize.c index 1510522cd..1f5fa6d82 100644 --- a/ir/be/ia32/ia32_optimize.c +++ b/ir/be/ia32/ia32_optimize.c @@ -345,9 +345,15 @@ static void peephole_ia32_Return(ir_node *node) { } } /* yep, return is the first real instruction in this block */ +#if 0 + /* add an rep prefix to the return */ rep = new_rd_ia32_RepPrefix(get_irn_dbg_info(node), current_ir_graph, block); keep_alive(rep); sched_add_before(node, rep); +#else + /* ensure, that the 3 byte return is generated */ + be_Return_set_emit_pop(node, 1); +#endif } /* only optimize up to 48 stores behind IncSPs */ @@ -876,12 +882,68 @@ exchange: be_peephole_after_exchange(res); } +/** + * Split a Imul mem, imm into a Load mem and Imul reg, imm if possible. + */ +static void peephole_ia32_Imul_split(ir_node *imul) { + const ir_node *right = get_irn_n(imul, n_ia32_IMul_right); + const arch_register_t *reg; + ir_node *load, *block, *base, *index, *mem, *res, *noreg; + dbg_info *dbgi; + ir_graph *irg; + + if (! is_ia32_Immediate(right) || get_ia32_op_type(imul) != ia32_AddrModeS) { + /* no memory, imm form ignore */ + return; + } + /* we need a free register */ + reg = get_free_gp_reg(); + if (reg == NULL) + return; + + /* fine, we can rebuild it */ + dbgi = get_irn_dbg_info(imul); + block = get_nodes_block(imul); + irg = current_ir_graph; + base = get_irn_n(imul, n_ia32_IMul_base); + index = get_irn_n(imul, n_ia32_IMul_index); + mem = get_irn_n(imul, n_ia32_IMul_mem); + load = new_rd_ia32_Load(dbgi, irg, block, base, index, mem); + + /* copy all attributes */ + set_irn_pinned(load, get_irn_pinned(imul)); + set_ia32_op_type(load, ia32_AddrModeS); + set_ia32_ls_mode(load, get_ia32_ls_mode(imul)); + + set_ia32_am_scale(load, get_ia32_am_scale(imul)); + set_ia32_am_sc(load, get_ia32_am_sc(imul)); + set_ia32_am_offs_int(load, get_ia32_am_offs_int(imul)); + if (is_ia32_am_sc_sign(imul)) + set_ia32_am_sc_sign(load); + if (is_ia32_use_frame(imul)) + set_ia32_use_frame(load); + set_ia32_frame_ent(load, get_ia32_frame_ent(imul)); + + sched_add_before(imul, load); + + mem = new_rd_Proj(dbgi, irg, block, load, mode_M, pn_ia32_Load_M); + res = new_rd_Proj(dbgi, irg, block, load, mode_Iu, pn_ia32_Load_res); + + arch_set_irn_register(arch_env, res, reg); + be_peephole_after_exchange(res); + + set_irn_n(imul, n_ia32_IMul_mem, mem); + noreg = get_irn_n(imul, n_ia32_IMul_left); + set_irn_n(imul, n_ia32_IMul_left, res); + set_ia32_op_type(imul, ia32_Normal); +} + /** * Register a peephole optimisation function. */ static void register_peephole_optimisation(ir_op *op, peephole_opt_func func) { assert(op->ops.generic == NULL); - op->ops.generic = (void*) func; + op->ops.generic = (op_func)func; } /* Perform peephole-optimizations. */ @@ -899,6 +961,8 @@ void ia32_peephole_optimization(ia32_code_gen_t *new_cg) register_peephole_optimisation(op_ia32_Test, peephole_ia32_Test); register_peephole_optimisation(op_ia32_Test8Bit, peephole_ia32_Test); register_peephole_optimisation(op_be_Return, peephole_ia32_Return); + if (! ia32_cg_config.use_imul_mem_imm32) + register_peephole_optimisation(op_ia32_IMul, peephole_ia32_Imul_split); be_peephole_opt(cg->birg); } diff --git a/ir/be/ia32/ia32_transform.c b/ir/be/ia32/ia32_transform.c index 9403ac9cb..da37dc96c 100644 --- a/ir/be/ia32/ia32_transform.c +++ b/ir/be/ia32/ia32_transform.c @@ -1216,7 +1216,6 @@ static ir_node *gen_Mul(ir_node *node) { ir_node *op1 = get_Mul_left(node); ir_node *op2 = get_Mul_right(node); ir_mode *mode = get_irn_mode(node); - unsigned flags; if (mode_is_float(mode)) { if (ia32_cg_config.use_sse2) @@ -1226,14 +1225,9 @@ static ir_node *gen_Mul(ir_node *node) { return gen_binop_x87_float(node, op1, op2, new_rd_ia32_vfmul, match_commutative | match_am); } - - /* for the lower 32bit of the result it doesn't matter whether we use - * signed or unsigned multiplication so we use IMul as it has fewer - * constraints */ - flags = match_commutative | match_am | match_mode_neutral | match_immediate; - if (ia32_cg_config.use_imul_mem_imm32) - flags |= match_am_and_immediates; - return gen_binop(node, op1, op2, new_rd_ia32_IMul, flags); + return gen_binop(node, op1, op2, new_rd_ia32_IMul, + match_commutative | match_am | match_mode_neutral | + match_immediate | match_am_and_immediates); } /** -- 2.20.1