X-Git-Url: http://nsz.repo.hu/git/?a=blobdiff_plain;f=ir%2Fbe%2Fia32%2Fia32_architecture.c;h=b025b4ff3eeb2e5894d4b31b9747b60816f6bc3c;hb=dabecf7efb18569c6f06f047c68d124f108cfced;hp=a8f927ab9c34a2c4d328b63db0861f6930a6b16b;hpb=c080deff990818b37e237ad982c9e1bb1b855bd8;p=libfirm diff --git a/ir/be/ia32/ia32_architecture.c b/ir/be/ia32/ia32_architecture.c index a8f927ab9..b025b4ff3 100644 --- a/ir/be/ia32/ia32_architecture.c +++ b/ir/be/ia32/ia32_architecture.c @@ -70,12 +70,18 @@ enum cpu_arch_features { arch_feature_3DNow = 0x00100000, /**< 3DNow! instructions */ arch_feature_3DNowE = 0x00200000, /**< Enhanced 3DNow! instructions */ arch_feature_64bit = 0x00400000, /**< x86_64 support */ + arch_feature_sse4_1 = 0x00800000, /**< SSE4.1 instructions */ + arch_feature_sse4_2 = 0x01000000, /**< SSE4.2 instructions */ + arch_feature_sse4a = 0x02000000, /**< SSE4a instructions */ arch_mmx_insn = arch_feature_mmx, /**< MMX instructions */ - arch_sse1_insn = arch_feature_sse1 | arch_mmx_insn, /**< SSE1 instructions, include MMX */ - arch_sse2_insn = arch_feature_sse2 | arch_sse1_insn, /**< SSE2 instructions, include SSE1 */ - arch_sse3_insn = arch_feature_sse3 | arch_sse2_insn, /**< SSE3 instructions, include SSE2 */ - arch_ssse3_insn = arch_feature_ssse3 | arch_sse3_insn, /**< SSSE3 instructions, include SSE3 */ + arch_sse1_insn = arch_feature_sse1 | arch_mmx_insn, /**< SSE1 instructions, include MMX */ + arch_sse2_insn = arch_feature_sse2 | arch_sse1_insn, /**< SSE2 instructions, include SSE1 */ + arch_sse3_insn = arch_feature_sse3 | arch_sse2_insn, /**< SSE3 instructions, include SSE2 */ + arch_ssse3_insn = arch_feature_ssse3 | arch_sse3_insn, /**< SSSE3 instructions, include SSE3 */ + arch_sse4_1_insn = arch_feature_sse4_1 | arch_ssse3_insn, /**< SSE4.1 instructions, include SSSE3 */ + arch_sse4_2_insn = arch_feature_sse4_2 | arch_sse4_1_insn, /**< SSE4.2 instructions, include SSE4.1 */ + arch_sse4a_insn = arch_feature_sse4a | arch_ssse3_insn, /**< SSE4a instructions, include SSSE3 */ arch_3DNow_insn = arch_feature_3DNow | arch_feature_mmx, /**< 3DNow! instructions, including MMX */ arch_3DNowE_insn = arch_feature_3DNowE | arch_3DNow_insn, /**< Enhanced 3DNow! instructions */ @@ -87,7 +93,7 @@ enum cpu_arch_features { /** * CPU's. */ -enum cpu_support { +typedef enum cpu_support { cpu_generic = arch_generic32, /* intel CPUs */ @@ -103,25 +109,28 @@ enum cpu_support { cpu_prescott = arch_nocona | arch_feature_p6_insn | arch_sse3_insn, cpu_nocona = arch_nocona | arch_feature_p6_insn | arch_64bit_insn | arch_sse3_insn, cpu_core2 = arch_core2 | arch_feature_p6_insn | arch_64bit_insn | arch_ssse3_insn, + cpu_penryn = arch_core2 | arch_feature_p6_insn | arch_64bit_insn | arch_sse4_1_insn, /* AMD CPUs */ cpu_k6 = arch_k6 | arch_mmx_insn, cpu_k6_PLUS = arch_k6 | arch_3DNow_insn, cpu_geode = arch_geode | arch_sse1_insn | arch_3DNowE_insn, + cpu_athlon_old = arch_athlon | arch_3DNowE_insn | arch_feature_p6_insn, cpu_athlon = arch_athlon | arch_sse1_insn | arch_3DNowE_insn | arch_feature_p6_insn, cpu_athlon64 = arch_athlon | arch_sse2_insn | arch_3DNowE_insn | arch_feature_p6_insn | arch_64bit_insn, cpu_k8 = arch_k8 | arch_3DNowE_insn | arch_feature_p6_insn | arch_64bit_insn, cpu_k8_sse3 = arch_k8 | arch_3DNowE_insn | arch_feature_p6_insn | arch_64bit_insn | arch_sse3_insn, - cpu_k10 = arch_k10 | arch_3DNowE_insn | arch_feature_p6_insn | arch_64bit_insn | arch_sse3_insn, + cpu_k10 = arch_k10 | arch_3DNowE_insn | arch_feature_p6_insn | arch_64bit_insn | arch_sse4a_insn, /* other CPUs */ cpu_winchip_c6 = arch_i486 | arch_feature_mmx, cpu_winchip2 = arch_i486 | arch_feature_mmx | arch_feature_3DNow, cpu_c3 = arch_i486 | arch_feature_mmx | arch_feature_3DNow, - cpu_c3_2 = arch_ppro | arch_sse1_insn, /* really no 3DNow! */ -}; + cpu_c3_2 = arch_ppro | arch_feature_p6_insn | arch_sse1_insn, /* really no 3DNow! */ +} cpu_support; static int opt_size = 0; +static int emit_machcode = 0; static cpu_support arch = cpu_generic; static cpu_support opt_arch = cpu_generic; static int use_sse2 = 0; @@ -151,12 +160,13 @@ static const lc_opt_enum_int_items_t arch_items[] = { { "nocona", cpu_nocona }, { "merom", cpu_core2 }, { "core2", cpu_core2 }, + { "penryn", cpu_penryn }, { "k6", cpu_k6 }, { "k6-2", cpu_k6_PLUS }, { "k6-3", cpu_k6_PLUS }, { "geode", cpu_geode }, - { "athlon", cpu_athlon }, + { "athlon", cpu_athlon_old }, { "athlon-tbird", cpu_athlon }, { "athlon-4", cpu_athlon }, { "athlon-xp", cpu_athlon }, @@ -211,6 +221,8 @@ static const lc_opt_table_entry_t ia32_architecture_options[] = { &opt_cc, 1), LC_OPT_ENT_BIT("unsafe_floatconv", "do unsafe floating point controlword " "optimisations", &opt_unsafe_floatconv, 1), + LC_OPT_ENT_BOOL("machcode", "output machine code instead of assembler", + &emit_machcode), LC_OPT_LAST }; @@ -426,12 +438,13 @@ static void set_arch_costs(void) } /* Evaluate the costs of an instruction. */ -int ia32_evaluate_insn(insn_kind kind, tarval *tv) { +int ia32_evaluate_insn(insn_kind kind, const ir_mode *mode, ir_tarval *tv) +{ int cost; switch (kind) { case MUL: - cost = arch_costs->cost_mul_start; + cost = arch_costs->cost_mul_start; if (arch_costs->cost_mul_bit > 0) { char *bitstr = get_tarval_bitpattern(tv); int i; @@ -443,14 +456,27 @@ int ia32_evaluate_insn(insn_kind kind, tarval *tv) { } free(bitstr); } - return cost; + if (get_mode_size_bits(mode) <= 32) + return cost; + /* 64bit mul supported, approx 4times of a 32bit mul*/ + return 4 * cost; case LEA: - return arch_costs->lea_cost; + /* lea is only supported for 32 bit */ + if (get_mode_size_bits(mode) <= 32) + return arch_costs->lea_cost; + /* in 64bit mode, the Lea cost are at wort 2 shifts and one add */ + return 2 * arch_costs->add_cost + 2 * (2 * arch_costs->const_shf_cost); case ADD: case SUB: - return arch_costs->add_cost; + if (get_mode_size_bits(mode) <= 32) + return arch_costs->add_cost; + /* 64bit add/sub supported, double the cost */ + return 2 * arch_costs->add_cost; case SHIFT: - return arch_costs->const_shf_cost; + if (get_mode_size_bits(mode) <= 32) + return arch_costs->const_shf_cost; + /* 64bit shift supported, double the cost */ + return 2 * arch_costs->const_shf_cost; case ZERO: return arch_costs->add_cost; default: @@ -466,11 +492,11 @@ void ia32_setup_cg_config(void) set_arch_costs(); c->optimize_size = opt_size != 0; - /* on newer intel cpus mov, pop is often faster then leave although it has a + /* on newer intel cpus mov, pop is often faster than leave although it has a * longer opcode */ c->use_leave = FLAGS(opt_arch, arch_i386 | arch_all_amd | arch_core2) || opt_size; /* P4s don't like inc/decs because they only partially write the flags - register which produces false dependencies */ + * register which produces false dependencies */ c->use_incdec = !FLAGS(opt_arch, arch_netburst | arch_nocona | arch_core2 | arch_geode) || opt_size; c->use_sse2 = use_sse2 && FLAGS(arch, arch_feature_sse2); c->use_ffreep = FLAGS(opt_arch, arch_athlon_plus); @@ -493,11 +519,17 @@ void ia32_setup_cg_config(void) c->use_imul_mem_imm32 = !FLAGS(opt_arch, arch_k8 | arch_k10) || opt_size; c->use_pxor = FLAGS(opt_arch, arch_netburst); c->use_mov_0 = FLAGS(opt_arch, arch_k6) && !opt_size; + c->use_short_sex_eax = !FLAGS(opt_arch, arch_k6) && !opt_size; c->use_pad_return = FLAGS(opt_arch, arch_athlon_plus | arch_core2 | arch_generic32) && !opt_size; c->use_bt = FLAGS(opt_arch, arch_core2 | arch_athlon_plus) || opt_size; c->use_fisttp = FLAGS(opt_arch & arch, arch_feature_sse3); + c->use_sse_prefetch = FLAGS(arch, (arch_feature_3DNowE | arch_feature_sse1)); + c->use_3dnow_prefetch = FLAGS(arch, arch_feature_3DNow); + c->use_popcnt = FLAGS(arch, (arch_feature_sse4_2 | arch_feature_sse4a)); + c->use_i486 = (arch & arch_mask) >= arch_i486; c->optimize_cc = opt_cc; c->use_unsafe_floatconv = opt_unsafe_floatconv; + c->emit_machcode = emit_machcode; c->function_alignment = arch_costs->function_alignment; c->label_alignment = arch_costs->label_alignment;