2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief ia32 architecture variants
23 * @author Michael Beck, Matthias Braun
24 * @version $Id: bearch_ia32_t.h 16363 2007-10-25 23:27:07Z beck $
31 #include "lc_opts_enum.h"
35 #include "bearch_ia32_t.h"
36 #include "ia32_architecture.h"
38 ia32_code_gen_config_t ia32_cg_config;
43 enum cpu_arch_features {
44 arch_feature_intel = 0x80000000, /**< Intel CPU */
45 arch_feature_amd = 0x40000000, /**< AMD CPU */
46 arch_feature_p6 = 0x20000000, /**< P6 instructions */
47 arch_feature_mmx = 0x10000000, /**< MMX instructions */
48 arch_feature_sse1 = 0x08000000 | arch_feature_mmx, /**< SSE1 instructions, include MMX */
49 arch_feature_sse2 = 0x04000000 | arch_feature_sse1, /**< SSE2 instructions, include SSE1 */
50 arch_feature_sse3 = 0x02000000 | arch_feature_sse2, /**< SSE3 instructions, include SSE2 */
51 arch_feature_ssse3 = 0x01000000 | arch_feature_sse3, /**< SSSE3 instructions, include SSE3 */
52 arch_feature_3DNow = 0x00800000, /**< 3DNow! instructions */
53 arch_feature_3DNowE = 0x00400000 | arch_feature_3DNow, /**< Enhanced 3DNow! instructions */
54 arch_feature_netburst = 0x00200000 | arch_feature_intel, /**< Netburst architecture */
55 arch_feature_64bit = 0x00100000 | arch_feature_sse2, /**< x86_64 support, include SSE2 */
67 arch_pentium = 3 | arch_feature_intel,
68 arch_pentium_mmx = 4 | arch_feature_intel | arch_feature_mmx,
69 arch_pentium_pro = 5 | arch_feature_intel | arch_feature_p6,
70 arch_pentium_2 = 6 | arch_feature_intel | arch_feature_p6 | arch_feature_mmx,
71 arch_pentium_3 = 7 | arch_feature_intel | arch_feature_p6 | arch_feature_sse1,
72 arch_pentium_4 = 8 | arch_feature_netburst | arch_feature_p6 | arch_feature_sse2,
73 arch_prescott = 9 | arch_feature_netburst | arch_feature_p6 | arch_feature_sse3,
74 arch_nocona = 10 | arch_feature_netburst | arch_feature_p6 | arch_feature_64bit | arch_feature_sse3,
75 arch_pentium_m = 11 | arch_feature_intel | arch_feature_p6 | arch_feature_sse2,
76 arch_core = 12 | arch_feature_intel | arch_feature_p6 | arch_feature_sse3,
77 arch_core2 = 13 | arch_feature_intel | arch_feature_p6 | arch_feature_64bit | arch_feature_ssse3,
80 arch_k6 = 14 | arch_feature_amd | arch_feature_mmx,
81 arch_geode = 15 | arch_feature_amd | arch_feature_mmx | arch_feature_3DNowE,
82 arch_k6_2 = 16 | arch_feature_amd | arch_feature_mmx | arch_feature_3DNow,
83 arch_k6_3 = 17 | arch_feature_amd | arch_feature_mmx | arch_feature_3DNow,
84 arch_athlon = 18 | arch_feature_amd | arch_feature_mmx | arch_feature_3DNowE | arch_feature_p6,
85 arch_athlon_xp = 19 | arch_feature_amd | arch_feature_sse1 | arch_feature_3DNowE | arch_feature_p6,
86 arch_opteron = 20 | arch_feature_amd | arch_feature_64bit | arch_feature_3DNowE | arch_feature_p6,
87 arch_k10 = 21 | arch_feature_amd | arch_feature_64bit | arch_feature_3DNowE | arch_feature_p6 | arch_feature_sse3,
90 arch_winchip_c6 = 22 | arch_feature_mmx,
91 arch_winchip2 = 23 | arch_feature_mmx | arch_feature_3DNow,
92 arch_c3 = 24 | arch_feature_mmx | arch_feature_3DNow,
93 arch_c3_2 = 25 | arch_feature_sse1, /* really no 3DNow! */
96 /** checks for l <= x <= h */
97 #define _IN_RANGE(x, l, h) ((unsigned)((x) - (l)) <= (unsigned)((h) - (l)))
99 /** returns true if it's Intel architecture */
100 #define ARCH_INTEL(x) (((x) & arch_feature_intel) != 0)
102 /** returns true if it's AMD architecture */
103 #define ARCH_AMD(x) (((x) & arch_feature_amd) != 0)
105 /** return true if the CPU is a K6 architecture */
106 #define ARCH_K6(x) _IN_RANGE((x), arch_k6, arch_k6_3)
108 /** return true if it's a Athlon/Opteron */
109 #define ARCH_K8(x) _IN_RANGE((x), arch_athlon, arch_opteron)
111 /** return true if it's a Athlon or newer */
112 #define ARCH_ATHLON_PLUS(x) _IN_RANGE((x), arch_athlon, arch_k10)
114 /** return true if the CPU has MMX support */
115 #define ARCH_MMX(x) (((x) & arch_feature_mmx) != 0)
117 /** return true if the CPU has 3DNow! support */
118 #define ARCH_3DNow(x) (((x) & arch_feature_3DNow) != 0)
120 /** return true if the CPU has P6 features (CMOV) */
121 #define IS_P6_ARCH(x) (((x) & arch_feature_p6) != 0)
123 /** return true if the CPU has the NetBurst architecture */
124 #define IS_NETBURST_ARCH(x) (((x) & arch_feature_netburst) != 0)
126 static cpu_support arch = arch_generic;
127 static cpu_support opt_arch = arch_pentium_4;
128 static int use_sse2 = 0;
129 static int opt_cc = 1;
130 static int opt_unsafe_floatconv = 0;
132 /* instruction set architectures. */
133 static const lc_opt_enum_int_items_t arch_items[] = {
134 { "i386", arch_i386, },
135 { "i486", arch_i486, },
136 { "pentium", arch_pentium, },
137 { "i586", arch_pentium, },
138 { "pentiumpro", arch_pentium_pro, },
139 { "i686", arch_pentium_pro, },
140 { "pentiummmx", arch_pentium_mmx, },
141 { "pentium2", arch_pentium_2, },
142 { "p2", arch_pentium_2, },
143 { "pentium3", arch_pentium_3, },
144 { "p3", arch_pentium_3, },
145 { "pentium4", arch_pentium_4, },
146 { "p4", arch_pentium_4, },
147 { "prescott", arch_prescott, },
148 { "nocona", arch_nocona, },
149 { "pentiumm", arch_pentium_m, },
150 { "pm", arch_pentium_m, },
154 { "core", arch_core, },
155 { "yonah", arch_core, },
157 * core2 CPUs: Conroe (XE, L), Allendale, Merom (XE),
158 * Kentsfield (XE), Yorkfield XE, Penryn, Wolfdale, Yorkfield
161 { "merom", arch_core2, },
162 { "core2", arch_core2, },
164 { "k6-2", arch_k6_2, },
165 { "k6-3", arch_k6_2, },
166 { "geode", arch_geode, },
167 { "athlon", arch_athlon, },
168 { "athlon-xp", arch_athlon_xp, },
169 { "athlon-mp", arch_athlon_xp, },
170 { "athlon-4", arch_athlon_xp, },
171 { "athlon64", arch_opteron, },
172 { "k8", arch_opteron, },
173 { "opteron", arch_opteron, },
174 { "k10", arch_k10, },
175 { "barcelona", arch_k10, },
176 { "generic", arch_generic, },
177 { "generic32", arch_generic, },
181 static lc_opt_enum_int_var_t arch_var = {
182 (int*) &arch, arch_items
185 static lc_opt_enum_int_var_t opt_arch_var = {
186 (int*) &opt_arch, arch_items
189 static const lc_opt_enum_int_items_t fp_unit_items[] = {
195 static lc_opt_enum_int_var_t fp_unit_var = {
196 &use_sse2, fp_unit_items
199 static const lc_opt_table_entry_t ia32_architecture_options[] = {
200 LC_OPT_ENT_ENUM_INT("arch", "select the instruction architecture",
202 LC_OPT_ENT_ENUM_INT("opt", "optimize for instruction architecture",
204 LC_OPT_ENT_ENUM_INT("fpunit", "select the floating point unit",
206 LC_OPT_ENT_NEGBIT("nooptcc", "do not optimize calling convention",
208 LC_OPT_ENT_BIT("unsafe_floatconv", "do unsafe floating point controlword "
209 "optimisations", &opt_unsafe_floatconv, 1),
213 typedef struct insn_const {
214 int add_cost; /**< cost of an add instruction */
215 int lea_cost; /**< cost of a lea instruction */
216 int const_shf_cost; /**< cost of a constant shift instruction */
217 int cost_mul_start; /**< starting cost of a multiply instruction */
218 int cost_mul_bit; /**< cost of multiply for every set bit */
221 /* costs for the i386 */
222 static const insn_const i386_cost = {
223 1, /* cost of an add instruction */
224 1, /* cost of a lea instruction */
225 3, /* cost of a constant shift instruction */
226 9, /* starting cost of a multiply instruction */
227 1 /* cost of multiply for every set bit */
230 /* costs for the i486 */
231 static const insn_const i486_cost = {
232 1, /* cost of an add instruction */
233 1, /* cost of a lea instruction */
234 2, /* cost of a constant shift instruction */
235 12, /* starting cost of a multiply instruction */
236 1 /* cost of multiply for every set bit */
239 /* costs for the Pentium */
240 static const insn_const pentium_cost = {
241 1, /* cost of an add instruction */
242 1, /* cost of a lea instruction */
243 1, /* cost of a constant shift instruction */
244 11, /* starting cost of a multiply instruction */
245 0 /* cost of multiply for every set bit */
248 /* costs for the Pentium Pro */
249 static const insn_const pentiumpro_cost = {
250 1, /* cost of an add instruction */
251 1, /* cost of a lea instruction */
252 1, /* cost of a constant shift instruction */
253 4, /* starting cost of a multiply instruction */
254 0 /* cost of multiply for every set bit */
257 /* costs for the K6 */
258 static const insn_const k6_cost = {
259 1, /* cost of an add instruction */
260 2, /* cost of a lea instruction */
261 1, /* cost of a constant shift instruction */
262 3, /* starting cost of a multiply instruction */
263 0 /* cost of multiply for every set bit */
266 /* costs for the Geode */
267 static const insn_const geode_cost = {
268 1, /* cost of an add instruction */
269 1, /* cost of a lea instruction */
270 1, /* cost of a constant shift instruction */
271 7, /* starting cost of a multiply instruction */
272 0 /* cost of multiply for every set bit */
275 /* costs for the Athlon */
276 static const insn_const athlon_cost = {
277 1, /* cost of an add instruction */
278 2, /* cost of a lea instruction */
279 1, /* cost of a constant shift instruction */
280 5, /* starting cost of a multiply instruction */
281 0 /* cost of multiply for every set bit */
284 /* costs for the Opteron/K8/K10 */
285 static const insn_const opteron_cost = {
286 1, /* cost of an add instruction */
287 2, /* cost of a lea instruction */
288 1, /* cost of a constant shift instruction */
289 3, /* starting cost of a multiply instruction */
290 0 /* cost of multiply for every set bit */
293 /* costs for the Pentium 4 */
294 static const insn_const pentium4_cost = {
295 1, /* cost of an add instruction */
296 3, /* cost of a lea instruction */
297 4, /* cost of a constant shift instruction */
298 15, /* starting cost of a multiply instruction */
299 0 /* cost of multiply for every set bit */
302 /* costs for the Nocona and Core */
303 static const insn_const nocona_cost = {
304 1, /* cost of an add instruction */
305 1, /* cost of a lea instruction */
306 1, /* cost of a constant shift instruction */
307 10, /* starting cost of a multiply instruction */
308 0 /* cost of multiply for every set bit */
311 /* costs for the Core2 */
312 static const insn_const core2_cost = {
313 1, /* cost of an add instruction */
314 1, /* cost of a lea instruction */
315 1, /* cost of a constant shift instruction */
316 3, /* starting cost of a multiply instruction */
317 0 /* cost of multiply for every set bit */
320 /* costs for the generic */
321 static const insn_const generic_cost = {
322 1, /* cost of an add instruction */
323 2, /* cost of a lea instruction */
324 1, /* cost of a constant shift instruction */
325 4, /* starting cost of a multiply instruction */
326 0 /* cost of multiply for every set bit */
329 static const insn_const *arch_costs = &generic_cost;
331 static void set_arch_costs(void)
335 arch_costs = &i386_cost;
338 arch_costs = &i486_cost;
341 case arch_pentium_mmx:
342 arch_costs = &pentium_cost;
344 case arch_pentium_pro:
347 arch_costs = &pentiumpro_cost;
350 arch_costs = &pentium4_cost;
353 arch_costs = &pentiumpro_cost;
358 arch_costs = &nocona_cost;
361 arch_costs = &core2_cost;
365 arch_costs = &k6_cost;
368 arch_costs = &geode_cost;
372 arch_costs = &athlon_cost;
376 arch_costs = &opteron_cost;
380 arch_costs = &generic_cost;
385 * Evaluate a given simple instruction.
387 int ia32_evaluate_insn(insn_kind kind, tarval *tv) {
392 cost = arch_costs->cost_mul_start;
393 if (arch_costs->cost_mul_bit > 0) {
394 char *bitstr = get_tarval_bitpattern(tv);
397 for (i = 0; bitstr[i] != '\0'; ++i) {
398 if (bitstr[i] == '1') {
399 cost += arch_costs->cost_mul_bit;
406 return arch_costs->lea_cost;
409 return arch_costs->add_cost;
411 return arch_costs->const_shf_cost;
413 return arch_costs->add_cost;
419 void ia32_setup_cg_config(void)
421 memset(&ia32_cg_config, 0, sizeof(ia32_cg_config));
423 /* on newer intel cpus mov, pop is often faster then leave although it has a
425 ia32_cg_config.use_leave = !ARCH_INTEL(opt_arch)
426 || !IS_P6_ARCH(opt_arch);
427 /* P4s don't like inc/decs because they only partially write the flags
428 register which produces false dependencies */
429 ia32_cg_config.use_incdec = !IS_NETBURST_ARCH(opt_arch) && (opt_arch != arch_generic);
430 ia32_cg_config.use_sse2 = use_sse2;
431 ia32_cg_config.use_ffreep = ARCH_ATHLON_PLUS(opt_arch);
432 ia32_cg_config.use_ftst = !IS_P6_ARCH(arch);
433 ia32_cg_config.use_femms = ARCH_ATHLON_PLUS(opt_arch)
434 && ARCH_MMX(arch) && ARCH_AMD(arch);
435 ia32_cg_config.use_fucomi = IS_P6_ARCH(arch);
436 ia32_cg_config.use_cmov = IS_P6_ARCH(arch);
437 ia32_cg_config.use_add_esp_4 = ARCH_ATHLON_PLUS(opt_arch) || (opt_arch == arch_geode) ||
438 IS_NETBURST_ARCH(opt_arch) || (opt_arch == arch_core2) ||
439 (opt_arch == arch_generic);
440 ia32_cg_config.use_add_esp_8 = ARCH_ATHLON_PLUS(opt_arch) || (opt_arch == arch_geode) ||
441 IS_P6_ARCH(opt_arch) || IS_NETBURST_ARCH(opt_arch) ||
442 (opt_arch == arch_core2) || (opt_arch == arch_generic) ||
443 (opt_arch == arch_i386) || (opt_arch == arch_i486);
444 ia32_cg_config.use_sub_esp_4 = ARCH_ATHLON_PLUS(opt_arch) || IS_P6_ARCH(opt_arch) ||
445 IS_NETBURST_ARCH(opt_arch) || (opt_arch == arch_core2) ||
446 (opt_arch == arch_generic);
447 ia32_cg_config.use_sub_esp_8 = ARCH_ATHLON_PLUS(opt_arch) ||
448 IS_P6_ARCH(opt_arch) || IS_NETBURST_ARCH(opt_arch) ||
449 (opt_arch == arch_core2) || (opt_arch == arch_generic) ||
450 (opt_arch == arch_i386) || (opt_arch == arch_i486);
451 ia32_cg_config.use_imul_mem_imm32 = !(opt_arch == arch_opteron || opt_arch == arch_k10);
452 ia32_cg_config.use_mov_0 = ARCH_K6(opt_arch);
453 ia32_cg_config.use_pad_return = ARCH_ATHLON_PLUS(opt_arch) || (opt_arch == arch_core2) ||
454 (opt_arch == arch_generic);
455 ia32_cg_config.optimize_cc = opt_cc;
456 ia32_cg_config.use_unsafe_floatconv = opt_unsafe_floatconv;
458 if(opt_arch == arch_i386) {
459 ia32_cg_config.function_alignment = 2;
460 } else if(opt_arch == arch_i486) {
461 ia32_cg_config.function_alignment = 4;
462 } else if(opt_arch == arch_k6) {
463 ia32_cg_config.function_alignment = 5;
464 ia32_cg_config.label_alignment = 5;
466 ia32_cg_config.function_alignment = 4;
467 ia32_cg_config.label_alignment = 4;
470 if(opt_arch == arch_i386 || opt_arch == arch_i486) {
471 ia32_cg_config.label_alignment_factor = -1;
472 } else if(ARCH_AMD(opt_arch)) {
473 ia32_cg_config.label_alignment_factor = 3;
475 ia32_cg_config.label_alignment_factor = 2;
481 void ia32_init_architecture(void)
483 lc_opt_entry_t *be_grp, *ia32_grp;
485 memset(&ia32_cg_config, 0, sizeof(ia32_cg_config));
487 be_grp = lc_opt_get_grp(firm_opt_get_root(), "be");
488 ia32_grp = lc_opt_get_grp(be_grp, "ia32");
490 lc_opt_add_table(ia32_grp, ia32_architecture_options);