From 9ea4799def655e6aa68c4d2fb1593a3e418addfd Mon Sep 17 00:00:00 2001 From: Matthias Braun Date: Tue, 19 Sep 2006 13:32:19 +0000 Subject: [PATCH] xchg is slow on athlons, so use 3 xors instead --- ir/be/ia32/ia32_emitter.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/ir/be/ia32/ia32_emitter.c b/ir/be/ia32/ia32_emitter.c index 38fa4a2cc..397709e83 100644 --- a/ir/be/ia32/ia32_emitter.c +++ b/ir/be/ia32/ia32_emitter.c @@ -1765,7 +1765,18 @@ static void emit_be_Perm(const ir_node *irn, ia32_emit_env_t *emit_env) { assert(cls1 == cls2 && "Register class mismatch at Perm"); if (cls1 == &ia32_reg_classes[CLASS_ia32_gp]) { - lc_esnprintf(ia32_get_arg_env(), cmd_buf, SNPRINTF_BUF_LEN, "xchg %1S, %2S", irn, irn); + if(emit_env->isa->opt_arch == arch_athlon) { + // xchg commands are Vector path on athlons and therefore stall the DirectPath pipeline + // it is nearly always beneficial to use the 3 xor trick instead of an xchg + cmnt_buf[0] = 0; + lc_esnprintf(ia32_get_arg_env(), cmd_buf, SNPRINTF_BUF_LEN, "xor %1S, %2S", irn, irn); + IA32_DO_EMIT(irn); + lc_esnprintf(ia32_get_arg_env(), cmd_buf, SNPRINTF_BUF_LEN, "xor %2S, %1S", irn, irn); + IA32_DO_EMIT(irn); + lc_esnprintf(ia32_get_arg_env(), cmd_buf, SNPRINTF_BUF_LEN, "xor %1S, %2S", irn, irn); + } else { + lc_esnprintf(ia32_get_arg_env(), cmd_buf, SNPRINTF_BUF_LEN, "xchg %1S, %2S", irn, irn); + } } else if (cls1 == &ia32_reg_classes[CLASS_ia32_xmm]) { lc_esnprintf(ia32_get_arg_env(), cmd_buf, SNPRINTF_BUF_LEN, @@ -2090,7 +2101,7 @@ static void ia32_emit_align_label(FILE *F, cpu_support cpu) { align = 4; } if(cpu == arch_athlon) { - maximum_skip = 7; + maximum_skip = 3; } else { maximum_skip = (1 << align) - 1; } -- 2.20.1