xchg is slow on athlons, so use 3 xors instead
authorMatthias Braun <matze@braunis.de>
Tue, 19 Sep 2006 13:32:19 +0000 (13:32 +0000)
committerMatthias Braun <matze@braunis.de>
Tue, 19 Sep 2006 13:32:19 +0000 (13:32 +0000)
ir/be/ia32/ia32_emitter.c

index 38fa4a2..397709e 100644 (file)
@@ -1765,7 +1765,18 @@ static void emit_be_Perm(const ir_node *irn, ia32_emit_env_t *emit_env) {
        assert(cls1 == cls2 && "Register class mismatch at Perm");
 
        if (cls1 == &ia32_reg_classes[CLASS_ia32_gp]) {
-               lc_esnprintf(ia32_get_arg_env(), cmd_buf, SNPRINTF_BUF_LEN, "xchg %1S, %2S", irn, irn);
+               if(emit_env->isa->opt_arch == arch_athlon) {
+                       // xchg commands are Vector path on athlons and therefore stall the DirectPath pipeline
+                       // it is nearly always beneficial to use the 3 xor trick instead of an xchg
+                       cmnt_buf[0] = 0;
+                       lc_esnprintf(ia32_get_arg_env(), cmd_buf, SNPRINTF_BUF_LEN, "xor %1S, %2S", irn, irn);
+                       IA32_DO_EMIT(irn);
+                       lc_esnprintf(ia32_get_arg_env(), cmd_buf, SNPRINTF_BUF_LEN, "xor %2S, %1S", irn, irn);
+                       IA32_DO_EMIT(irn);
+                       lc_esnprintf(ia32_get_arg_env(), cmd_buf, SNPRINTF_BUF_LEN, "xor %1S, %2S", irn, irn);
+               } else {
+                       lc_esnprintf(ia32_get_arg_env(), cmd_buf, SNPRINTF_BUF_LEN, "xchg %1S, %2S", irn, irn);
+               }
        }
        else if (cls1 == &ia32_reg_classes[CLASS_ia32_xmm]) {
                lc_esnprintf(ia32_get_arg_env(), cmd_buf, SNPRINTF_BUF_LEN,
@@ -2090,7 +2101,7 @@ static void ia32_emit_align_label(FILE *F, cpu_support cpu) {
                        align = 4;
        }
        if(cpu == arch_athlon) {
-               maximum_skip = 7;
+               maximum_skip = 3;
        } else {
                maximum_skip = (1 << align) - 1;
        }