assert(cls1 == cls2 && "Register class mismatch at Perm");
if (cls1 == &ia32_reg_classes[CLASS_ia32_gp]) {
- lc_esnprintf(ia32_get_arg_env(), cmd_buf, SNPRINTF_BUF_LEN, "xchg %1S, %2S", irn, irn);
+ if(emit_env->isa->opt_arch == arch_athlon) {
+ // xchg commands are Vector path on athlons and therefore stall the DirectPath pipeline
+ // it is nearly always beneficial to use the 3 xor trick instead of an xchg
+ cmnt_buf[0] = 0;
+ lc_esnprintf(ia32_get_arg_env(), cmd_buf, SNPRINTF_BUF_LEN, "xor %1S, %2S", irn, irn);
+ IA32_DO_EMIT(irn);
+ lc_esnprintf(ia32_get_arg_env(), cmd_buf, SNPRINTF_BUF_LEN, "xor %2S, %1S", irn, irn);
+ IA32_DO_EMIT(irn);
+ lc_esnprintf(ia32_get_arg_env(), cmd_buf, SNPRINTF_BUF_LEN, "xor %1S, %2S", irn, irn);
+ } else {
+ lc_esnprintf(ia32_get_arg_env(), cmd_buf, SNPRINTF_BUF_LEN, "xchg %1S, %2S", irn, irn);
+ }
}
else if (cls1 == &ia32_reg_classes[CLASS_ia32_xmm]) {
lc_esnprintf(ia32_get_arg_env(), cmd_buf, SNPRINTF_BUF_LEN,
align = 4;
}
if(cpu == arch_athlon) {
- maximum_skip = 7;
+ maximum_skip = 3;
} else {
maximum_skip = (1 << align) - 1;
}