From 5d113398726c2c6cfeb7bda33c30babc3ae9813d Mon Sep 17 00:00:00 2001 From: Christoph Mallon Date: Sun, 11 Nov 2012 10:07:05 +0100 Subject: [PATCH] Merge the r and non-r variants of fdiv and fsub. Let the emitter handle printing the r by checking the assigned registers. --- ir/be/ia32/ia32_emitter.c | 101 +++++++++++++++++++------------------- ir/be/ia32/ia32_spec.pl | 49 ++---------------- ir/be/ia32/ia32_x87.c | 47 ++++++------------ 3 files changed, 68 insertions(+), 129 deletions(-) diff --git a/ir/be/ia32/ia32_emitter.c b/ir/be/ia32/ia32_emitter.c index 4ce91bcc0..de4b2756f 100644 --- a/ir/be/ia32/ia32_emitter.c +++ b/ir/be/ia32/ia32_emitter.c @@ -580,6 +580,27 @@ emit_AM: if (*fmt == 'M') { ++fmt; ia32_emit_x87_mode_suffix(node); + } else if (*fmt == 'R') { + ++fmt; + /* NOTE: Work around a gas quirk for non-commutative operations if the + * destination register is not %st0. In this case r/non-r is swapped. + * %st0 = %st0 - %st1 -> fsub %st1, %st0 (as expected) + * %st0 = %st1 - %st0 -> fsubr %st1, %st0 (as expected) + * %st1 = %st0 - %st1 -> fsub %st0, %st1 (expected: fsubr) + * %st1 = %st1 - %st0 -> fsubr %st0, %st1 (expected: fsub) + * In fact this corresponds to the encoding of the instruction: + * - The r suffix selects whether %st0 is on the left (no r) or on the + * right (r) side of the executed operation. + * - The placement of %st0 selects whether the result is written to + * %st0 (right) or the other register (left). + * This results in testing whether the left operand register is %st0 + * instead of the expected test whether the output register equals the + * left operand register. */ + ia32_x87_attr_t const *const attr = get_ia32_x87_attr_const(node); + if (get_ia32_op_type(node) == ia32_Normal ? + attr->x87[0] != &ia32_registers[REG_ST0] : + attr->attr.data.ins_permuted) + be_emit_char('r'); } else if (*fmt == 'X') { ++fmt; ia32_emit_xmm_mode_suffix(node); @@ -3234,38 +3255,40 @@ static void bemit_copybi(const ir_node *node) } } -static void bemit_fbinop(const ir_node *node, unsigned code, unsigned code_to) +static void bemit_fbinop(ir_node const *const node, unsigned const op_fwd, unsigned const op_rev) { + ia32_x87_attr_t const *const attr = get_ia32_x87_attr_const(node); + arch_register_t const *const st0 = &ia32_registers[REG_ST0]; if (get_ia32_op_type(node) == ia32_Normal) { - ia32_x87_attr_t const *const x87_attr = get_ia32_x87_attr_const(node); - arch_register_t const *const out = x87_attr->x87[2]; - arch_register_t const * in = x87_attr->x87[1]; - if (out == in) - in = x87_attr->x87[0]; - - if (out->index == 0) { - bemit8(0xD8); - bemit8(MOD_REG | ENC_REG(code) | ENC_RM(in->index)); - } else { - bemit8(0xDC); - bemit8(MOD_REG | ENC_REG(code_to) | ENC_RM(out->index)); + arch_register_t const *const out = attr->x87[2]; + assert(out == attr->x87[0] || out == attr->x87[1]); + assert(!attr->attr.data.ins_permuted); + + unsigned char op0 = 0xD8; + if (out != st0) op0 |= 0x04; + bemit8(op0); + + unsigned op = op_rev; + arch_register_t const *reg = attr->x87[0]; + if (reg == st0) { + op = op_fwd; + reg = attr->x87[1]; } + bemit8(MOD_REG | ENC_REG(op) | ENC_RM(reg->index)); } else { - if (get_mode_size_bits(get_ia32_ls_mode(node)) == 32) { - bemit8(0xD8); - } else { - bemit8(0xDC); - } - bemit_mod_am(code, node); + assert(attr->x87[2] == st0); + + unsigned const size = get_mode_size_bits(get_ia32_ls_mode(node)); + bemit8(size == 32 ? 0xD8 : 0xDC); + bemit_mod_am(attr->attr.data.ins_permuted ? op_rev : op_fwd, node); } } -static void bemit_fbinopp(const ir_node *node, unsigned const code) +static void bemit_fbinopp(const ir_node *node, unsigned const op_fwd, unsigned const op_rev) { - const ia32_x87_attr_t *x87_attr = get_ia32_x87_attr_const(node); - const arch_register_t *out = x87_attr->x87[2]; + ia32_x87_attr_t const *const attr = get_ia32_x87_attr_const(node); bemit8(0xDE); - bemit8(code + out->index); + bemit8((attr->x87[0] == &ia32_registers[REG_ST0] ? op_fwd : op_rev) + attr->x87[2]->index); } static void bemit_fop_reg(ir_node const *const node, unsigned char const op0, unsigned char const op1) @@ -3289,7 +3312,7 @@ static void bemit_fadd(const ir_node *node) static void bemit_faddp(const ir_node *node) { - bemit_fbinopp(node, 0xC0); + bemit_fbinopp(node, 0xC0, 0xC0); } static void bemit_fchs(const ir_node *node) @@ -3307,17 +3330,7 @@ static void bemit_fdiv(const ir_node *node) static void bemit_fdivp(const ir_node *node) { - bemit_fbinopp(node, 0xF8); -} - -static void bemit_fdivr(const ir_node *node) -{ - bemit_fbinop(node, 7, 6); -} - -static void bemit_fdivrp(const ir_node *node) -{ - bemit_fbinopp(node, 0xF0); + bemit_fbinopp(node, 0xF0, 0xF8); } static void bemit_ffreep(ir_node const *const node) @@ -3450,7 +3463,7 @@ static void bemit_fmul(const ir_node *node) static void bemit_fmulp(const ir_node *node) { - bemit_fbinopp(node, 0xC8); + bemit_fbinopp(node, 0xC8, 0xC8); } static void bemit_fpop(const ir_node *node) @@ -3516,17 +3529,7 @@ static void bemit_fsub(const ir_node *node) static void bemit_fsubp(const ir_node *node) { - bemit_fbinopp(node, 0xE8); -} - -static void bemit_fsubr(const ir_node *node) -{ - bemit_fbinop(node, 5, 4); -} - -static void bemit_fsubrp(const ir_node *node) -{ - bemit_fbinopp(node, 0xE0); + bemit_fbinopp(node, 0xE0, 0xE8); } static void bemit_fnstcw(const ir_node *node) @@ -3716,8 +3719,6 @@ static void ia32_register_binary_emitters(void) register_emitter(op_ia32_fchs, bemit_fchs); register_emitter(op_ia32_fdiv, bemit_fdiv); register_emitter(op_ia32_fdivp, bemit_fdivp); - register_emitter(op_ia32_fdivr, bemit_fdivr); - register_emitter(op_ia32_fdivrp, bemit_fdivrp); register_emitter(op_ia32_ffreep, bemit_ffreep); register_emitter(op_ia32_fild, bemit_fild); register_emitter(op_ia32_fist, bemit_fist); @@ -3735,8 +3736,6 @@ static void ia32_register_binary_emitters(void) register_emitter(op_ia32_fstp, bemit_fstp); register_emitter(op_ia32_fsub, bemit_fsub); register_emitter(op_ia32_fsubp, bemit_fsubp); - register_emitter(op_ia32_fsubr, bemit_fsubr); - register_emitter(op_ia32_fsubrp, bemit_fsubrp); register_emitter(op_ia32_fxch, bemit_fxch); /* ignore the following nodes */ diff --git a/ir/be/ia32/ia32_spec.pl b/ir/be/ia32/ia32_spec.pl index 7bd6634e7..bece27669 100644 --- a/ir/be/ia32/ia32_spec.pl +++ b/ir/be/ia32/ia32_spec.pl @@ -2305,38 +2305,15 @@ fmulp => { fsub => { state => "exc_pinned", - emit => 'fsub%FM %AF', + emit => 'fsub%FR%FM %AF', latency => 4, attr_type => "ia32_x87_attr_t", constructors => {}, }, -# Note: gas is strangely buggy: fdivrp and fdivp as well as fsubrp and fsubp -# are swapped, we work this around in the emitter... - fsubp => { state => "exc_pinned", -# see note about gas bugs - emit => 'fsubrp%FM %AF', - latency => 4, - attr_type => "ia32_x87_attr_t", - constructors => {}, -}, - -fsubr => { - state => "exc_pinned", - irn_flags => [ "rematerializable" ], - emit => 'fsubr%FM %AF', - latency => 4, - attr_type => "ia32_x87_attr_t", - constructors => {}, -}, - -fsubrp => { - state => "exc_pinned", - irn_flags => [ "rematerializable" ], -# see note about gas bugs before fsubp - emit => 'fsubp%FM %AF', + emit => 'fsub%FRp%FM %AF', latency => 4, attr_type => "ia32_x87_attr_t", constructors => {}, @@ -2351,7 +2328,7 @@ fprem => { fdiv => { state => "exc_pinned", - emit => 'fdiv%FM %AF', + emit => 'fdiv%FR%FM %AF', latency => 20, attr_type => "ia32_x87_attr_t", constructors => {}, @@ -2359,25 +2336,7 @@ fdiv => { fdivp => { state => "exc_pinned", -# see note about gas bugs before fsubp - emit => 'fdivrp%FM %AF', - latency => 20, - attr_type => "ia32_x87_attr_t", - constructors => {}, -}, - -fdivr => { - state => "exc_pinned", - emit => 'fdivr%FM %AF', - latency => 20, - attr_type => "ia32_x87_attr_t", - constructors => {}, -}, - -fdivrp => { - state => "exc_pinned", -# see note about gas bugs before fsubp - emit => 'fdivp%FM %AF', + emit => 'fdiv%FRp%FM %AF', latency => 20, attr_type => "ia32_x87_attr_t", constructors => {}, diff --git a/ir/be/ia32/ia32_x87.c b/ir/be/ia32/ia32_x87.c index 6f579622a..2f1bfd139 100644 --- a/ir/be/ia32/ia32_x87.c +++ b/ir/be/ia32/ia32_x87.c @@ -58,20 +58,6 @@ DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;) /* Forward declaration. */ typedef struct x87_simulator x87_simulator; -/** - * An exchange template. - * Note that our virtual functions have the same inputs - * and attributes as the real ones, so we can simple exchange - * their opcodes! - * Further, x87 supports inverse instructions, so we can handle them. - */ -typedef struct exchange_tmpl { - ir_op *normal_op; /**< the normal one */ - ir_op *reverse_op; /**< the reverse one if exists */ - ir_op *normal_pop_op; /**< the normal one with tos pop */ - ir_op *reverse_pop_op; /**< the reverse one with tos pop */ -} exchange_tmpl; - /** * An entry on the simulated x87 stack. */ @@ -747,11 +733,10 @@ static void vfp_dump_live(vfp_liveness live) * * @param state the x87 state * @param n the node that should be simulated (and patched) - * @param tmpl the template containing the 4 possible x87 opcodes * * @return NO_NODE_ADDED */ -static int sim_binop(x87_state *state, ir_node *n, const exchange_tmpl *tmpl) +static int sim_binop(x87_state *const state, ir_node *const n, ir_op *const normal_op, ir_op *const normal_pop_op) { int op2_idx = 0, op1_idx; int out_idx, do_pop = 0; @@ -805,7 +790,7 @@ static int sim_binop(x87_state *state, ir_node *n, const exchange_tmpl *tmpl) op1_idx = 0; op2_idx += 1; out_idx = 0; - dst = tmpl->normal_op; + dst = normal_op; } else { /* Second live, first operand is dead here, bring it to tos. */ if (op1_idx != 0) { @@ -816,7 +801,7 @@ static int sim_binop(x87_state *state, ir_node *n, const exchange_tmpl *tmpl) } /* now do fxxx (tos=tos X op) */ out_idx = 0; - dst = tmpl->normal_op; + dst = normal_op; } } else { /* Second operand is dead. */ @@ -830,25 +815,25 @@ static int sim_binop(x87_state *state, ir_node *n, const exchange_tmpl *tmpl) } /* now do fxxxr (tos = op X tos) */ out_idx = 0; - dst = tmpl->reverse_op; + dst = normal_op; } else { /* Both operands are dead here, pop them from the stack. */ if (op2_idx == 0) { if (op1_idx == 0) { /* Both are identically and on tos, no pop needed. */ /* here fxxx (tos = tos X tos) */ - dst = tmpl->normal_op; + dst = normal_op; out_idx = 0; } else { /* now do fxxxp (op = op X tos, pop) */ - dst = tmpl->normal_pop_op; + dst = normal_pop_op; do_pop = 1; out_idx = op1_idx; } } else if (op1_idx == 0) { assert(op1_idx != op2_idx); /* now do fxxxrp (op = tos X op, pop) */ - dst = tmpl->reverse_pop_op; + dst = normal_pop_op; do_pop = 1; out_idx = op2_idx; } else { @@ -859,13 +844,13 @@ static int sim_binop(x87_state *state, ir_node *n, const exchange_tmpl *tmpl) op1_idx = 0; op2_idx = 0; /* use fxxx (tos = tos X tos) */ - dst = tmpl->normal_op; + dst = normal_op; out_idx = 0; } else { /* op2 is on tos now */ op2_idx = 0; /* use fxxxp (op = op X tos, pop) */ - dst = tmpl->normal_pop_op; + dst = normal_pop_op; out_idx = op1_idx; do_pop = 1; } @@ -887,7 +872,7 @@ static int sim_binop(x87_state *state, ir_node *n, const exchange_tmpl *tmpl) } /* use fxxx (tos = tos X mem) */ - dst = permuted ? tmpl->reverse_op : tmpl->normal_op; + dst = normal_op; out_idx = 0; } @@ -1089,15 +1074,11 @@ static int sim_store(x87_state *state, ir_node *n, ir_op *op, ir_op *op_p) return insn; } -#define _GEN_BINOP(op, rev) \ +#define GEN_BINOP(op) \ static int sim_##op(x87_state *state, ir_node *n) { \ - exchange_tmpl tmpl = { op_ia32_##op, op_ia32_##rev, op_ia32_##op##p, op_ia32_##rev##p }; \ - return sim_binop(state, n, &tmpl); \ + return sim_binop(state, n, op_ia32_##op, op_ia32_##op##p); \ } -#define GEN_BINOP(op) _GEN_BINOP(op, op) -#define GEN_BINOPR(op) _GEN_BINOP(op, op##r) - #define GEN_LOAD(op) \ static int sim_##op(x87_state *state, ir_node *n) { \ return sim_load(state, n, op_ia32_##op, pn_ia32_v##op##_res); \ @@ -1115,9 +1096,9 @@ static int sim_##op(x87_state *state, ir_node *n) { \ /* all stubs */ GEN_BINOP(fadd) -GEN_BINOPR(fsub) +GEN_BINOP(fsub) GEN_BINOP(fmul) -GEN_BINOPR(fdiv) +GEN_BINOP(fdiv) GEN_UNOP(fabs) GEN_UNOP(fchs) -- 2.20.1