X-Git-Url: http://nsz.repo.hu/git/?a=blobdiff_plain;f=ir%2Fbe%2Fia32%2Fia32_architecture.c;h=6fdfb607abe76769bd93e070c1f2fd084997c33b;hb=50fcbd2c84474270ca6c5c5787c79b4571251b82;hp=4927ee275d78f19e3528420e690159689824c5b1;hpb=ad626856654a9714386abd75c49d6a25c1beca0d;p=libfirm diff --git a/ir/be/ia32/ia32_architecture.c b/ir/be/ia32/ia32_architecture.c index 4927ee275..6fdfb607a 100644 --- a/ir/be/ia32/ia32_architecture.c +++ b/ir/be/ia32/ia32_architecture.c @@ -21,7 +21,6 @@ * @file * @brief ia32 architecture variants * @author Michael Beck, Matthias Braun - * @version $Id: bearch_ia32_t.h 16363 2007-10-25 23:27:07Z beck $ */ #include "config.h" @@ -98,7 +97,7 @@ enum cpu_arch_features { arch_sse4_2_insn = arch_feature_sse4_2 | arch_sse4_1_insn, /**< SSE4.2 instructions, include SSE4.1 */ arch_sse4a_insn = arch_feature_sse4a | arch_ssse3_insn, /**< SSE4a instructions, include SSSE3 */ - arch_3DNow_insn = arch_feature_3DNow | arch_feature_mmx, /**< 3DNow! instructions, including MMX */ + arch_3DNow_insn = arch_feature_3DNow | arch_feature_mmx, /**< 3DNow! instructions, including MMX */ arch_3DNowE_insn = arch_feature_3DNowE | arch_3DNow_insn, /**< Enhanced 3DNow! instructions */ arch_64bit_insn = arch_feature_64bit | arch_sse2_insn, /**< x86_64 support, includes SSE2 */ }; @@ -109,33 +108,43 @@ enum cpu_arch_features { * CPU's. */ typedef enum cpu_support { - cpu_generic = arch_generic32, + cpu_generic = arch_generic32, /* intel CPUs */ - cpu_i386 = arch_i386, - cpu_i486 = arch_i486, - cpu_pentium = arch_pentium, - cpu_pentium_mmx = arch_pentium | arch_mmx_insn, - cpu_pentium_pro = arch_ppro | arch_feature_cmov | arch_feature_p6_insn, - cpu_pentium_2 = arch_ppro | arch_feature_cmov | arch_feature_p6_insn | arch_mmx_insn, - cpu_pentium_3 = arch_ppro | arch_feature_cmov | arch_feature_p6_insn | arch_sse1_insn, - cpu_pentium_m = arch_ppro | arch_feature_cmov | arch_feature_p6_insn | arch_sse2_insn, - cpu_pentium_4 = arch_netburst | arch_feature_cmov | arch_feature_p6_insn | arch_sse2_insn, - cpu_prescott = arch_nocona | arch_feature_cmov | arch_feature_p6_insn | arch_sse3_insn, - cpu_nocona = arch_nocona | arch_feature_cmov | arch_feature_p6_insn | arch_64bit_insn | arch_sse3_insn, - cpu_core2 = arch_core2 | arch_feature_cmov | arch_feature_p6_insn | arch_64bit_insn | arch_ssse3_insn, - cpu_penryn = arch_core2 | arch_feature_cmov | arch_feature_p6_insn | arch_64bit_insn | arch_sse4_1_insn, + cpu_i386 = arch_i386, + cpu_i486 = arch_i486, + cpu_pentium = arch_pentium, + cpu_pentium_mmx = arch_pentium | arch_mmx_insn, + cpu_pentium_pro_generic = arch_ppro | arch_feature_p6_insn, + cpu_pentium_pro = arch_ppro | arch_feature_cmov | arch_feature_p6_insn, + cpu_pentium_2 = arch_ppro | arch_feature_cmov | arch_feature_p6_insn | arch_mmx_insn, + cpu_pentium_3 = arch_ppro | arch_feature_cmov | arch_feature_p6_insn | arch_sse1_insn, + cpu_pentium_m = arch_ppro | arch_feature_cmov | arch_feature_p6_insn | arch_sse2_insn, + cpu_netburst_generic = arch_netburst | arch_feature_p6_insn, + cpu_pentium_4 = arch_netburst | arch_feature_cmov | arch_feature_p6_insn | arch_sse2_insn, + cpu_prescott = arch_nocona | arch_feature_cmov | arch_feature_p6_insn | arch_sse3_insn, + cpu_nocona = arch_nocona | arch_feature_cmov | arch_feature_p6_insn | arch_64bit_insn | arch_sse3_insn, + cpu_core2_generic = arch_core2 | arch_feature_p6_insn, + cpu_core2 = arch_core2 | arch_feature_cmov | arch_feature_p6_insn | arch_64bit_insn | arch_ssse3_insn, + cpu_penryn = arch_core2 | arch_feature_cmov | arch_feature_p6_insn | arch_64bit_insn | arch_sse4_1_insn, + cpu_atom_generic = arch_atom | arch_feature_p6_insn, + cpu_atom = arch_atom | arch_feature_cmov | arch_feature_p6_insn | arch_ssse3_insn, /* AMD CPUs */ - cpu_k6 = arch_k6 | arch_mmx_insn, - cpu_k6_PLUS = arch_k6 | arch_3DNow_insn, - cpu_geode = arch_geode | arch_sse1_insn | arch_3DNowE_insn, - cpu_athlon_old = arch_athlon | arch_3DNowE_insn | arch_feature_cmov | arch_feature_p6_insn, - cpu_athlon = arch_athlon | arch_sse1_insn | arch_3DNowE_insn | arch_feature_cmov | arch_feature_p6_insn, - cpu_athlon64 = arch_athlon | arch_sse2_insn | arch_3DNowE_insn | arch_feature_cmov | arch_feature_p6_insn | arch_64bit_insn, - cpu_k8 = arch_k8 | arch_3DNowE_insn | arch_feature_cmov | arch_feature_p6_insn | arch_64bit_insn, - cpu_k8_sse3 = arch_k8 | arch_3DNowE_insn | arch_feature_cmov | arch_feature_p6_insn | arch_64bit_insn | arch_sse3_insn, - cpu_k10 = arch_k10 | arch_3DNowE_insn | arch_feature_cmov | arch_feature_p6_insn | arch_feature_popcnt | arch_64bit_insn | arch_sse4a_insn, + cpu_k6_generic = arch_k6, + cpu_k6 = arch_k6 | arch_mmx_insn, + cpu_k6_PLUS = arch_k6 | arch_3DNow_insn, + cpu_geode_generic = arch_geode, + cpu_geode = arch_geode | arch_sse1_insn | arch_3DNowE_insn, + cpu_athlon_generic = arch_athlon | arch_feature_p6_insn, + cpu_athlon_old = arch_athlon | arch_3DNowE_insn | arch_feature_cmov | arch_feature_p6_insn, + cpu_athlon = arch_athlon | arch_sse1_insn | arch_3DNowE_insn | arch_feature_cmov | arch_feature_p6_insn, + cpu_athlon64 = arch_athlon | arch_sse2_insn | arch_3DNowE_insn | arch_feature_cmov | arch_feature_p6_insn | arch_64bit_insn, + cpu_k8_generic = arch_k8 | arch_feature_p6_insn, + cpu_k8 = arch_k8 | arch_3DNowE_insn | arch_feature_cmov | arch_feature_p6_insn | arch_64bit_insn, + cpu_k8_sse3 = arch_k8 | arch_3DNowE_insn | arch_feature_cmov | arch_feature_p6_insn | arch_64bit_insn | arch_sse3_insn, + cpu_k10_generic = arch_k10 | arch_feature_p6_insn, + cpu_k10 = arch_k10 | arch_3DNowE_insn | arch_feature_cmov | arch_feature_p6_insn | arch_feature_popcnt | arch_64bit_insn | arch_sse4a_insn, /* other CPUs */ cpu_winchip_c6 = arch_i486 | arch_feature_mmx, @@ -150,7 +159,7 @@ static int opt_size = 0; static int emit_machcode = 0; static cpu_support arch = cpu_generic; static cpu_support opt_arch = cpu_generic; -static int use_sse2 = 0; +static int fpu_arch = 0; static int opt_cc = 1; static int opt_unsafe_floatconv = 0; @@ -178,6 +187,7 @@ static const lc_opt_enum_int_items_t arch_items[] = { { "merom", cpu_core2 }, { "core2", cpu_core2 }, { "penryn", cpu_penryn }, + { "atom", cpu_atom }, { "k6", cpu_k6 }, { "k6-2", cpu_k6_PLUS }, @@ -222,29 +232,24 @@ static lc_opt_enum_int_var_t opt_arch_var = { }; static const lc_opt_enum_int_items_t fp_unit_items[] = { - { "x87" , 0 }, - { "sse2", 1 }, - { NULL, 0 } + { "x87" , IA32_FPU_ARCH_X87 }, + { "sse2", IA32_FPU_ARCH_SSE2 }, + { "softfloat", IA32_FPU_ARCH_SOFTFLOAT }, + { NULL, IA32_FPU_ARCH_NONE } }; static lc_opt_enum_int_var_t fp_unit_var = { - &use_sse2, fp_unit_items + &fpu_arch, fp_unit_items }; static const lc_opt_table_entry_t ia32_architecture_options[] = { - LC_OPT_ENT_BOOL("size", "optimize for size", &opt_size), - LC_OPT_ENT_ENUM_INT("arch", "select the instruction architecture", - &arch_var), - LC_OPT_ENT_ENUM_INT("opt", "optimize for instruction architecture", - &opt_arch_var), - LC_OPT_ENT_ENUM_INT("fpunit", "select the floating point unit", - &fp_unit_var), - LC_OPT_ENT_NEGBIT("nooptcc", "do not optimize calling convention", - &opt_cc, 1), - LC_OPT_ENT_BIT("unsafe_floatconv", "do unsafe floating point controlword " - "optimisations", &opt_unsafe_floatconv, 1), - LC_OPT_ENT_BOOL("machcode", "output machine code instead of assembler", - &emit_machcode), + LC_OPT_ENT_BOOL ("size", "optimize for size", &opt_size), + LC_OPT_ENT_ENUM_INT("arch", "select the instruction architecture", &arch_var), + LC_OPT_ENT_ENUM_INT("opt", "optimize for instruction architecture", &opt_arch_var), + LC_OPT_ENT_ENUM_INT("fpunit", "select the floating point unit", &fp_unit_var), + LC_OPT_ENT_NEGBOOL ("nooptcc", "do not optimize calling convention", &opt_cc), + LC_OPT_ENT_BOOL ("unsafe_floatconv", "do unsafe floating point controlword optimisations", &opt_unsafe_floatconv), + LC_OPT_ENT_BOOL ("machcode", "output machine code instead of assembler", &emit_machcode), LC_OPT_LAST }; @@ -588,10 +593,10 @@ static cpu_support auto_detect_Intel(x86_cpu_info_t const *info) switch (family) { case 4: - auto_arch = arch_i486; + auto_arch = cpu_i486; break; case 5: - auto_arch = arch_pentium; + auto_arch = cpu_pentium; break; case 6: switch (model) { @@ -605,16 +610,14 @@ static cpu_support auto_detect_Intel(x86_cpu_info_t const *info) case 0x0A: /* Pentium III Model 0A */ case 0x0B: /* Pentium III Model 0B */ case 0x0D: /* Pentium M Model 0D */ - auto_arch = arch_ppro | arch_feature_p6_insn; - break; case 0x0E: /* Core Model 0E */ - auto_arch = arch_ppro | arch_feature_p6_insn; + auto_arch = cpu_pentium_pro_generic; break; case 0x0F: /* Core2 Model 0F */ case 0x15: /* Intel EP80579 */ case 0x16: /* Celeron Model 16 */ case 0x17: /* Core2 Model 17 */ - auto_arch = arch_core2 | arch_feature_p6_insn; + auto_arch = cpu_core2_generic; break; default: /* unknown */ @@ -629,16 +632,16 @@ static cpu_support auto_detect_Intel(x86_cpu_info_t const *info) case 0x03: /* Pentium 4 Model 03 */ case 0x04: /* Pentium 4 Model 04 */ case 0x06: /* Pentium 4 Model 06 */ - auto_arch = arch_netburst | arch_feature_p6_insn; + auto_arch = cpu_netburst_generic; break; case 0x1A: /* Core i7 */ - auto_arch = arch_core2 | arch_feature_p6_insn; + auto_arch = cpu_core2_generic; break; case 0x1C: /* Atom */ - auto_arch = arch_atom; + auto_arch = cpu_atom_generic; break; case 0x1D: /* Xeon MP */ - auto_arch = arch_core2 | arch_feature_p6_insn; + auto_arch = cpu_core2_generic; break; default: /* unknown */ @@ -668,7 +671,7 @@ static cpu_support auto_detect_AMD(x86_cpu_info_t const *info) { switch (family) { case 0x04: - auto_arch = arch_i486; + auto_arch = cpu_i486; break; case 0x05: switch (model) { @@ -676,21 +679,21 @@ static cpu_support auto_detect_AMD(x86_cpu_info_t const *info) { case 0x01: /* K5 Model 1 */ case 0x02: /* K5 Model 2 */ case 0x03: /* K5 Model 3 */ - auto_arch = arch_pentium; + auto_arch = cpu_pentium; break; case 0x06: /* K6 Model 6 */ case 0x07: /* K6 Model 7 */ case 0x08: /* K6-2 Model 8 */ case 0x09: /* K6-III Model 9 */ case 0x0D: /* K6-2+ or K6-III+ */ - auto_arch = arch_k6; + auto_arch = cpu_k6_generic; break; case 0x0A: /* Geode LX */ - auto_arch = arch_geode; + auto_arch = cpu_geode_generic; break; default: /* unknown K6 */ - auto_arch = arch_k6; + auto_arch = cpu_k6_generic; break; } break; @@ -704,20 +707,17 @@ static cpu_support auto_detect_AMD(x86_cpu_info_t const *info) { case 0x07: /* Mobile Duron Model 7 */ case 0x08: /* Athlon (TH/AP core) including Geode NX */ case 0x0A: /* Athlon (BT core) */ - auto_arch = arch_athlon | arch_feature_p6_insn; - break; - default: - /* unknown K7 */ - auto_arch = arch_athlon | arch_feature_p6_insn; + default: /* unknown K7 */ + auto_arch = cpu_athlon_generic; break; } break; case 0x0F: - auto_arch = arch_k8 | arch_feature_p6_insn; + auto_arch = cpu_k8_generic; break; case 0x1F: case 0x2F: /* AMD Family 11h */ - auto_arch = arch_k10 | arch_feature_p6_insn; + auto_arch = cpu_k10_generic; break; default: /* unknown */ @@ -740,7 +740,7 @@ typedef union { static void x86_cpuid(cpuid_registers *regs, unsigned level) { #if defined(__GNUC__) -# ifdef __PIC__ // GCC cannot handle EBX in PIC +# if defined(__PIC__) && !defined(__amd64) // GCC cannot handle EBX in PIC __asm ( "pushl %%ebx\n\t" "cpuid\n\t" @@ -757,6 +757,8 @@ static void x86_cpuid(cpuid_registers *regs, unsigned level) # endif #elif defined(_MSC_VER) __cpuid(regs->bulk, level); +#else +# error CPUID is missing #endif } @@ -809,13 +811,11 @@ static void autodetect_arch(void) /* We use the cpuid instruction to detect the CPU features */ if (x86_toogle_cpuid()) { cpuid_registers regs; - unsigned highest_level; char vendorid[13]; x86_cpu_info_t cpu_info; /* get vendor ID */ x86_cpuid(®s, 0); - highest_level = regs.r.eax; memcpy(&vendorid[0], ®s.r.ebx, 4); memcpy(&vendorid[4], ®s.r.edx, 4); memcpy(&vendorid[8], ®s.r.ecx, 4); @@ -839,7 +839,7 @@ static void autodetect_arch(void) } else if (0 == strcmp(vendorid, "AuthenticAMD")) { auto_arch = auto_detect_AMD(&cpu_info); } else if (0 == strcmp(vendorid, "Geode by NSC")) { - auto_arch = arch_geode; + auto_arch = cpu_geode_generic; } if (cpu_info.edx_features & CPUID_FEAT_EDX_CMOV) @@ -887,7 +887,8 @@ void ia32_setup_cg_config(void) /* P4s don't like inc/decs because they only partially write the flags * register which produces false dependencies */ c->use_incdec = !FLAGS(opt_arch, arch_netburst | arch_nocona | arch_core2 | arch_geode) || opt_size; - c->use_sse2 = use_sse2 && FLAGS(arch, arch_feature_sse2); + c->use_softfloat = FLAGS(fpu_arch, IA32_FPU_ARCH_SOFTFLOAT); + c->use_sse2 = FLAGS(fpu_arch, IA32_FPU_ARCH_SSE2) && FLAGS(arch, arch_feature_sse2); c->use_ffreep = FLAGS(opt_arch, arch_athlon_plus); c->use_ftst = !FLAGS(arch, arch_feature_p6_insn); /* valgrind can't cope with femms yet and the usefulness of the optimization @@ -909,13 +910,13 @@ void ia32_setup_cg_config(void) c->use_pxor = FLAGS(opt_arch, arch_netburst); c->use_mov_0 = FLAGS(opt_arch, arch_k6) && !opt_size; c->use_short_sex_eax = !FLAGS(opt_arch, arch_k6) && !opt_size; - c->use_pad_return = FLAGS(opt_arch, arch_athlon_plus | arch_core2 | arch_generic32) && !opt_size; + c->use_pad_return = FLAGS(opt_arch, arch_athlon_plus) && !opt_size; c->use_bt = FLAGS(opt_arch, arch_core2 | arch_athlon_plus) || opt_size; c->use_fisttp = FLAGS(opt_arch & arch, arch_feature_sse3); c->use_sse_prefetch = FLAGS(arch, (arch_feature_3DNowE | arch_feature_sse1)); c->use_3dnow_prefetch = FLAGS(arch, arch_feature_3DNow); c->use_popcnt = FLAGS(arch, arch_feature_popcnt); - c->use_i486 = (arch & arch_mask) >= arch_i486; + c->use_bswap = (arch & arch_mask) >= arch_i486; c->optimize_cc = opt_cc; c->use_unsafe_floatconv = opt_unsafe_floatconv; c->emit_machcode = emit_machcode;