#include "ia32_finish.h"
#include "ia32_util.h"
#include "ia32_fpu.h"
+#include "ia32_architecture.h"
DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
&ia32_xmm_regs[REG_XMM_NOREG]);
}
-/* Creates the unique per irg FP NoReg node. */
-ir_node *ia32_new_NoReg_fp(ia32_code_gen_t *cg) {
- return USE_SSE2(cg) ? ia32_new_NoReg_xmm(cg) : ia32_new_NoReg_vfp(cg);
-}
-
ir_node *ia32_new_Unknown_gp(ia32_code_gen_t *cg) {
return create_const(cg, &cg->unknown_gp, new_rd_ia32_Unknown_GP,
&ia32_gp_regs[REG_GP_UKNWN]);
if (req->cls == &ia32_reg_classes[CLASS_ia32_gp])
return ia32_new_NoReg_gp(cg);
- return ia32_new_NoReg_fp(cg);
+ if (ia32_cg_config.use_sse2) {
+ return ia32_new_NoReg_xmm(cg);
+ } else {
+ return ia32_new_NoReg_vfp(cg);
+ }
}
/**************************************************
ir_mode *mode_bp = env->isa->bp->reg_class->mode;
ir_graph *irg = current_ir_graph;
- if (ARCH_AMD(isa->opt_arch)) {
+ if (ia32_cg_config.use_leave) {
ir_node *leave;
/* leave */
*/
static int ia32_get_op_estimated_cost(const void *self, const ir_node *irn)
{
- int cost;
+ int cost;
ia32_op_type_t op_tp;
- const ia32_irn_ops_t *ops = self;
+ (void) self;
if (is_Proj(irn))
return 0;
if (is_ia32_CopyB(irn)) {
cost = 250;
- if (ARCH_INTEL(ops->cg->arch))
- cost += 150;
}
else if (is_ia32_CopyB_i(irn)) {
int size = get_ia32_copyb_size(irn);
cost = 20 + (int)ceil((4/3) * size);
- if (ARCH_INTEL(ops->cg->arch))
- cost += 150;
}
/* in case of address mode operations add additional cycles */
else if (op_tp == ia32_AddrModeD || op_tp == ia32_AddrModeS) {
}
if (mode_is_float(spillmode)) {
- if (USE_SSE2(cg))
+ if (ia32_cg_config.use_sse2)
new_op = new_rd_ia32_xLoad(dbg, irg, block, ptr, noreg, mem, spillmode);
else
new_op = new_rd_ia32_vfld(dbg, irg, block, ptr, noreg, mem, spillmode);
}
if (mode_is_float(mode)) {
- if (USE_SSE2(cg))
+ if (ia32_cg_config.use_sse2)
store = new_rd_ia32_xStore(dbg, irg, block, ptr, noreg, nomem, val);
else
store = new_rd_ia32_vfst(dbg, irg, block, ptr, noreg, nomem, val, mode);
cg->isa = isa;
cg->birg = birg;
cg->blk_sched = NULL;
- cg->fp_kind = isa->fp_kind;
cg->dump = (birg->main_env->options->dump_flags & DUMP_BE) ? 1 : 0;
- /* copy optimizations from isa for easier access */
- cg->opt = isa->opt;
- cg->arch = isa->arch;
- cg->opt_arch = isa->opt_arch;
-
/* enter it */
isa->cg = cg;
NULL, /* 8bit register names high */
NULL, /* types */
NULL, /* tv_ents */
- (0 |
- IA32_OPT_INCDEC | /* optimize add 1, sub 1 into inc/dec default: on */
- IA32_OPT_CC),
- arch_pentium_4, /* instruction architecture */
- arch_pentium_4, /* optimize for architecture */
- fp_x87, /* floating point mode */
NULL, /* current code generator */
#ifndef NDEBUG
NULL, /* name obstack */
#endif
};
-static void set_arch_costs(enum cpu_support arch);
-
/**
* Initializes the backend ISA.
*/
ia32_register_init();
ia32_create_opcodes();
- set_arch_costs(isa->opt_arch);
-
- if ((ARCH_INTEL(isa->arch) && isa->arch < arch_pentium_4) ||
- (ARCH_AMD(isa->arch) && isa->arch < arch_athlon))
- /* no SSE2 for these cpu's */
- isa->fp_kind = fp_x87;
-
- if (ARCH_INTEL(isa->opt_arch) && isa->opt_arch >= arch_pentium_4) {
- /* Pentium 4 don't like inc and dec instructions */
- isa->opt &= ~IA32_OPT_INCDEC;
- }
-
be_emit_init(file_handle);
isa->regs_16bit = pmap_create();
isa->regs_8bit = pmap_create();
* @param mode The mode in question.
* @return A register class which can hold values of the given mode.
*/
-const arch_register_class_t *ia32_get_reg_class_for_mode(const void *self, const ir_mode *mode) {
- const ia32_isa_t *isa = self;
+const arch_register_class_t *ia32_get_reg_class_for_mode(const void *self,
+ const ir_mode *mode)
+{
+ (void) self;
+
if (mode_is_float(mode)) {
- return USE_SSE2(isa) ? &ia32_reg_classes[CLASS_ia32_xmm] : &ia32_reg_classes[CLASS_ia32_vfp];
+ return ia32_cg_config.use_sse2 ? &ia32_reg_classes[CLASS_ia32_xmm] : &ia32_reg_classes[CLASS_ia32_vfp];
}
else
return &ia32_reg_classes[CLASS_ia32_gp];
* @param method_type The type of the method (procedure) in question.
* @param abi The abi object to be modified
*/
-static void ia32_get_call_abi(const void *self, ir_type *method_type, be_abi_call_t *abi) {
- const ia32_isa_t *isa = self;
+static void ia32_get_call_abi(const void *self, ir_type *method_type,
+ be_abi_call_t *abi)
+{
ir_type *tp;
ir_mode *mode;
unsigned cc;
int n, i, regnum;
be_abi_call_flags_t call_flags = be_abi_call_get_flags(abi);
+ (void) self;
/* set abi flags for calls */
call_flags.bits.left_to_right = 0; /* always last arg first on stack */
} else {
cc = get_method_calling_convention(method_type);
if (get_method_additional_properties(method_type) & mtp_property_private
- && (ia32_isa_template.opt & IA32_OPT_CC)) {
+ && (ia32_cg_config.optimize_cc)) {
/* set the calling conventions to register parameter */
cc = (cc & ~cc_bits) | cc_reg_param;
}
tp = get_method_param_type(method_type, i);
mode = get_type_mode(tp);
if (mode != NULL) {
- reg = ia32_get_RegParam_reg(isa->cg, cc, regnum, mode);
+ reg = ia32_get_RegParam_reg(cc, regnum, mode);
}
if (reg != NULL) {
be_abi_call_param_reg(abi, i, reg);
(void)i;
(void)j;
+ if(!ia32_cg_config.use_cmov) {
+ /* TODO: we could still handle abs(x)... */
+ return 0;
+ }
+
/* we can't handle psis with 64bit compares yet */
if(is_Proj(sel)) {
ir_node *pred = get_Proj_pred(sel);
return 1;
}
-typedef struct insn_const {
- int add_cost; /**< cost of an add instruction */
- int lea_cost; /**< cost of a lea instruction */
- int const_shf_cost; /**< cost of a constant shift instruction */
- int cost_mul_start; /**< starting cost of a multiply instruction */
- int cost_mul_bit; /**< cost of multiply for every set bit */
-} insn_const;
-
-/* costs for the i386 */
-static const insn_const i386_cost = {
- 1, /* cost of an add instruction */
- 1, /* cost of a lea instruction */
- 2, /* cost of a constant shift instruction */
- 6, /* starting cost of a multiply instruction */
- 1 /* cost of multiply for every set bit */
-};
-
-/* costs for the i486 */
-static const insn_const i486_cost = {
- 1, /* cost of an add instruction */
- 1, /* cost of a lea instruction */
- 2, /* cost of a constant shift instruction */
- 12, /* starting cost of a multiply instruction */
- 1 /* cost of multiply for every set bit */
-};
-
-/* costs for the Pentium */
-static const insn_const pentium_cost = {
- 1, /* cost of an add instruction */
- 1, /* cost of a lea instruction */
- 1, /* cost of a constant shift instruction */
- 11, /* starting cost of a multiply instruction */
- 0 /* cost of multiply for every set bit */
-};
-
-/* costs for the Pentium Pro */
-static const insn_const pentiumpro_cost = {
- 1, /* cost of an add instruction */
- 1, /* cost of a lea instruction */
- 1, /* cost of a constant shift instruction */
- 4, /* starting cost of a multiply instruction */
- 0 /* cost of multiply for every set bit */
-};
-
-/* costs for the K6 */
-static const insn_const k6_cost = {
- 1, /* cost of an add instruction */
- 2, /* cost of a lea instruction */
- 1, /* cost of a constant shift instruction */
- 3, /* starting cost of a multiply instruction */
- 0 /* cost of multiply for every set bit */
-};
-
-/* costs for the Athlon */
-static const insn_const athlon_cost = {
- 1, /* cost of an add instruction */
- 2, /* cost of a lea instruction */
- 1, /* cost of a constant shift instruction */
- 5, /* starting cost of a multiply instruction */
- 0 /* cost of multiply for every set bit */
-};
-
-/* costs for the Pentium 4 */
-static const insn_const pentium4_cost = {
- 1, /* cost of an add instruction */
- 3, /* cost of a lea instruction */
- 4, /* cost of a constant shift instruction */
- 15, /* starting cost of a multiply instruction */
- 0 /* cost of multiply for every set bit */
-};
-
-/* costs for the Core */
-static const insn_const core_cost = {
- 1, /* cost of an add instruction */
- 1, /* cost of a lea instruction */
- 1, /* cost of a constant shift instruction */
- 10, /* starting cost of a multiply instruction */
- 0 /* cost of multiply for every set bit */
-};
-
-/* costs for the generic */
-static const insn_const generic_cost = {
- 1, /* cost of an add instruction */
- 2, /* cost of a lea instruction */
- 1, /* cost of a constant shift instruction */
- 4, /* starting cost of a multiply instruction */
- 0 /* cost of multiply for every set bit */
-};
-
-static const insn_const *arch_costs = &generic_cost;
-
-static void set_arch_costs(enum cpu_support arch) {
- switch (arch) {
- case arch_i386:
- arch_costs = &i386_cost;
- break;
- case arch_i486:
- arch_costs = &i486_cost;
- break;
- case arch_pentium:
- case arch_pentium_mmx:
- arch_costs = &pentium_cost;
- break;
- case arch_pentium_pro:
- case arch_pentium_2:
- case arch_pentium_3:
- arch_costs = &pentiumpro_cost;
- break;
- case arch_pentium_4:
- arch_costs = &pentium4_cost;
- break;
- case arch_pentium_m:
- arch_costs = &pentiumpro_cost;
- break;
- case arch_core:
- arch_costs = &core_cost;
- break;
- case arch_prescott:
- arch_costs = &pentium4_cost;
- break;
- case arch_core2:
- arch_costs = &core_cost;
- break;
- case arch_k6:
- case arch_k6_2:
- arch_costs = &k6_cost;
- break;
- case arch_athlon:
- case arch_athlon_xp:
- case arch_opteron:
- arch_costs = &athlon_cost;
- break;
- case arch_generic:
- default:
- arch_costs = &generic_cost;
- }
-}
-
-/**
- * Evaluate a given simple instruction.
- */
-static int ia32_evaluate_insn(insn_kind kind, tarval *tv) {
- int cost;
-
- switch (kind) {
- case MUL:
- cost = arch_costs->cost_mul_start;
- if (arch_costs->cost_mul_bit > 0) {
- char *bitstr = get_tarval_bitpattern(tv);
- int i;
-
- for (i = 0; bitstr[i] != '\0'; ++i) {
- if (bitstr[i] == '1') {
- cost += arch_costs->cost_mul_bit;
- }
- }
- free(bitstr);
- }
- return cost;
- case LEA:
- return arch_costs->lea_cost;
- case ADD:
- case SUB:
- return arch_costs->add_cost;
- case SHIFT:
- return arch_costs->const_shf_cost;
- case ZERO:
- return arch_costs->add_cost;
- default:
- return 1;
- }
-}
-
/**
* Returns the libFirm configuration parameter for this backend.
*/
NULL, /* will be set below */
};
+ ia32_setup_cg_config();
+
p.dep_param = &ad;
p.if_conv_info = &ifconv;
return &p;
}
-/* instruction set architectures. */
-static const lc_opt_enum_int_items_t arch_items[] = {
- { "386", arch_i386, },
- { "486", arch_i486, },
- { "pentium", arch_pentium, },
- { "586", arch_pentium, },
- { "pentiumpro", arch_pentium_pro, },
- { "686", arch_pentium_pro, },
- { "pentiummmx", arch_pentium_mmx, },
- { "pentium2", arch_pentium_2, },
- { "p2", arch_pentium_2, },
- { "pentium3", arch_pentium_3, },
- { "p3", arch_pentium_3, },
- { "pentium4", arch_pentium_4, },
- { "p4", arch_pentium_4, },
- { "prescott", arch_pentium_4, },
- { "pentiumm", arch_pentium_m, },
- { "pm", arch_pentium_m, },
- { "core", arch_core, },
- { "yonah", arch_core, },
- { "merom", arch_core2, },
- { "core2", arch_core2, },
- { "k6", arch_k6, },
- { "k6-2", arch_k6_2, },
- { "k6-3", arch_k6_2, },
- { "athlon", arch_athlon, },
- { "athlon-xp", arch_athlon_xp, },
- { "athlon-mp", arch_athlon_xp, },
- { "athlon-4", arch_athlon_xp, },
- { "athlon64", arch_opteron, },
- { "k8", arch_opteron, },
- { "opteron", arch_opteron, },
- { "generic", arch_generic, },
- { NULL, 0 }
-};
-
-static lc_opt_enum_int_var_t arch_var = {
- &ia32_isa_template.arch, arch_items
-};
-
-static lc_opt_enum_int_var_t opt_arch_var = {
- &ia32_isa_template.opt_arch, arch_items
-};
-
-static const lc_opt_enum_int_items_t fp_unit_items[] = {
- { "x87" , fp_x87 },
- { "sse2", fp_sse2 },
- { NULL, 0 }
-};
-
-static lc_opt_enum_int_var_t fp_unit_var = {
- &ia32_isa_template.fp_kind, fp_unit_items
-};
-
static const lc_opt_enum_int_items_t gas_items[] = {
{ "normal", GAS_FLAVOUR_NORMAL },
{ "mingw", GAS_FLAVOUR_MINGW },
};
static const lc_opt_table_entry_t ia32_options[] = {
- LC_OPT_ENT_ENUM_INT("arch", "select the instruction architecture", &arch_var),
- LC_OPT_ENT_ENUM_INT("opt", "optimize for instruction architecture", &opt_arch_var),
- LC_OPT_ENT_ENUM_INT("fpunit", "select the floating point unit", &fp_unit_var),
- LC_OPT_ENT_NEGBIT("nooptcc", "do not optimize calling convention", &ia32_isa_template.opt, IA32_OPT_CC),
- LC_OPT_ENT_BIT("unsafe_floatconv", "do unsage floating point controlword optimisations", &ia32_isa_template.opt, IA32_OPT_UNSAFE_FLOATCONV),
- LC_OPT_ENT_ENUM_INT("gasmode", "set the GAS compatibility mode", &gas_var),
+ LC_OPT_ENT_ENUM_INT("gasmode", "set the GAS compatibility mode", &gas_var),
LC_OPT_LAST
};
void be_init_arch_ia32(void)
{
- lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be");
+ lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be");
lc_opt_entry_t *ia32_grp = lc_opt_get_grp(be_grp, "ia32");
lc_opt_add_table(ia32_grp, ia32_options);
ia32_init_optimize();
ia32_init_transform();
ia32_init_x87();
+ ia32_init_architecture();
}
BE_REGISTER_MODULE_CONSTRUCTOR(be_init_arch_ia32);
typedef enum cpu_support cpu_support;
typedef enum fp_support fp_support;
-/**
- * Bitmask for the backend optimization settings.
- */
-enum ia32_optimize_t {
- IA32_OPT_INCDEC = 1 << 0, /**< optimize add/sub 1/-1 to inc/dec */
- IA32_OPT_CC = 1 << 1, /**< optimize calling convention of private
- functions */
- IA32_OPT_UNSAFE_FLOATCONV = 1 << 2, /**< disrespect current floating
- point rounding mode at entry and exit of
- functions (this is ok for programs that don't
- explicitly change the rounding mode) */
-};
-
-/**
- * CPU features.
- */
-enum cpu_arch_features {
- arch_feature_intel = 0x80000000, /**< Intel CPU */
- arch_feature_amd = 0x40000000, /**< AMD CPU */
- arch_feature_p6 = 0x20000000, /**< P6 instructions */
- arch_feature_mmx = 0x10000000, /**< MMX instructions */
- arch_feature_sse1 = 0x08000000 | arch_feature_mmx, /**< SSE1 instructions, include MMX */
- arch_feature_sse2 = 0x04000000 | arch_feature_sse1, /**< SSE2 instructions, include SSE1 */
- arch_feature_sse3 = 0x02000000 | arch_feature_sse2, /**< SSE3 instructions, include SSE2 */
- arch_feature_ssse3 = 0x01000000 | arch_feature_sse3, /**< SSSE3 instructions, include SSE3 */
- arch_feature_3DNow = 0x00800000, /**< 3DNow! instructions */
- arch_feature_3DNowE = 0x00400000 | arch_feature_3DNow, /**< Enhanced 3DNow! instructions */
- arch_feature_netburst = 0x00200000 | arch_feature_intel, /**< Netburst architecture */
- arch_feature_64bit = 0x00100000 | arch_feature_sse2, /**< x86_64 support, include SSE2 */
-};
-
-/**
- * Architectures.
- */
-enum cpu_support {
- /* intel CPU's */
- arch_generic = 0,
-
- arch_i386 = 1,
- arch_i486 = 2,
- arch_pentium = 3 | arch_feature_intel,
- arch_pentium_mmx = 4 | arch_feature_intel | arch_feature_mmx,
- arch_pentium_pro = 5 | arch_feature_intel | arch_feature_p6,
- arch_pentium_2 = 6 | arch_feature_intel | arch_feature_p6 | arch_feature_mmx,
- arch_pentium_3 = 7 | arch_feature_intel | arch_feature_p6 | arch_feature_sse1,
- arch_pentium_4 = 8 | arch_feature_netburst | arch_feature_p6 | arch_feature_sse2,
- arch_pentium_m = 9 | arch_feature_intel | arch_feature_p6 | arch_feature_sse2,
- arch_core = 10 | arch_feature_intel | arch_feature_p6 | arch_feature_sse3,
- arch_prescott = 11 | arch_feature_netburst | arch_feature_p6 | arch_feature_sse3,
- arch_core2 = 12 | arch_feature_intel | arch_feature_p6 | arch_feature_64bit | arch_feature_ssse3,
-
- /* AMD CPU's */
- arch_k6 = 13 | arch_feature_amd | arch_feature_mmx,
- arch_k6_2 = 14 | arch_feature_amd | arch_feature_mmx | arch_feature_3DNow,
- arch_k6_3 = 15 | arch_feature_amd | arch_feature_mmx | arch_feature_3DNow,
- arch_athlon = 16 | arch_feature_amd | arch_feature_mmx | arch_feature_3DNowE | arch_feature_p6,
- arch_athlon_xp = 17 | arch_feature_amd | arch_feature_sse1 | arch_feature_3DNowE | arch_feature_p6,
- arch_opteron = 18 | arch_feature_amd | arch_feature_64bit | arch_feature_3DNowE | arch_feature_p6,
-
- /* other */
- arch_winchip_c6 = 19 | arch_feature_mmx,
- arch_winchip2 = 20 | arch_feature_mmx | arch_feature_3DNow,
- arch_c3 = 21 | arch_feature_mmx | arch_feature_3DNow,
- arch_c3_2 = 22 | arch_feature_sse1, /* really no 3DNow! */
-};
-
-/** checks for l <= x <= h */
-#define _IN_RANGE(x, l, h) ((unsigned)((x) - (l)) <= (unsigned)((h) - (l)))
-
-/** returns true if it's Intel architecture */
-#define ARCH_INTEL(x) (((x) & arch_feature_intel) != 0)
-
-/** returns true if it's AMD architecture */
-#define ARCH_AMD(x) (((x) & arch_feature_amd) != 0)
-
-/** return true if it's a Athlon/Opteron */
-#define ARCH_ATHLON(x) _IN_RANGE((x), arch_athlon, arch_opteron)
-
-/** return true if the CPU has MMX support */
-#define ARCH_MMX(x) (((x) & arch_feature_mmx) != 0)
-
-/** return true if the CPU has 3DNow! support */
-#define ARCH_3DNow(x) (((x) & arch_feature_3DNow) != 0)
-
-/** return true if the CPU has P6 features (CMOV) */
-#define IS_P6_ARCH(x) (((x) & arch_feature_p6) != 0)
-
-/** floating point support */
-enum fp_support {
- fp_none, /**< no floating point instructions are used */
- fp_x87, /**< use x87 instructions */
- fp_sse2 /**< use SSE2 instructions */
-};
-
-/** Returns non-zero if the current floating point architecture is SSE2. */
-#define USE_SSE2(cg) ((cg)->fp_kind == fp_sse2)
-
-/** Returns non-zero if the current floating point architecture is x87. */
-#define USE_x87(cg) ((cg)->fp_kind == fp_x87)
-
typedef struct ia32_isa_t ia32_isa_t;
typedef struct ia32_code_gen_t ia32_code_gen_t;
typedef struct ia32_irn_ops_t ia32_irn_ops_t;
ia32_isa_t *isa; /**< for fast access to the isa object */
be_irg_t *birg; /**< The be-irg (contains additional information about the irg) */
ir_node **blk_sched; /**< an array containing the scheduled blocks */
- ia32_optimize_t opt; /**< contains optimization information */
- int arch; /**< instruction architecture */
- int opt_arch; /**< optimize for architecture */
- char fp_kind; /**< floating point kind */
char do_x87_sim; /**< set to 1 if x87 simulation should be enforced */
char dump; /**< set to 1 if graphs should be dumped */
ir_node *unknown_gp; /**< unique Unknown_GP node */
pmap *regs_8bit_high; /**< contains the hight part of the 8 bit names of the gp registers */
pmap *types; /**< A map of modes to primitive types */
pmap *tv_ent; /**< A map of entities that store const tarvals */
- ia32_optimize_t opt; /**< contains optimization information */
- int arch; /**< instruction architecture */
- int opt_arch; /**< optimize for architecture */
- int fp_kind; /**< floating point kind */
ia32_code_gen_t *cg; /**< the current code generator */
const be_machine_t *cpu; /**< the abstract machine */
#ifndef NDEBUG
ir_node *ia32_new_Unknown_xmm(ia32_code_gen_t *cg);
ir_node *ia32_new_Unknown_vfp(ia32_code_gen_t *cg);
-/**
- * Returns the unique per irg FP NoReg node.
- */
-ir_node *ia32_new_NoReg_fp(ia32_code_gen_t *cg);
-
/**
* Returns the unique per irg FPU truncation mode node.
*/
--- /dev/null
+/*
+ * Copyright (C) 1995-2007 University of Karlsruhe. All right reserved.
+ *
+ * This file is part of libFirm.
+ *
+ * This file may be distributed and/or modified under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation and appearing in the file LICENSE.GPL included in the
+ * packaging of this file.
+ *
+ * Licensees holding valid libFirm Professional Edition licenses may use
+ * this file in accordance with the libFirm Commercial License.
+ * Agreement provided with the Software.
+ *
+ * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
+ * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE.
+ */
+
+/**
+ * @file
+ * @brief ia32 architecture variants
+ * @author Michael Beck, Matthias Braun
+ * @version $Id: bearch_ia32_t.h 16363 2007-10-25 23:27:07Z beck $
+ */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <libcore/lc_opts.h>
+#include <libcore/lc_opts_enum.h>
+
+#include "irtools.h"
+
+#include "bearch_ia32_t.h"
+#include "ia32_architecture.h"
+
+ia32_code_gen_config_t ia32_cg_config;
+
+/**
+ * CPU features.
+ */
+enum cpu_arch_features {
+ arch_feature_intel = 0x80000000, /**< Intel CPU */
+ arch_feature_amd = 0x40000000, /**< AMD CPU */
+ arch_feature_p6 = 0x20000000, /**< P6 instructions */
+ arch_feature_mmx = 0x10000000, /**< MMX instructions */
+ arch_feature_sse1 = 0x08000000 | arch_feature_mmx, /**< SSE1 instructions, include MMX */
+ arch_feature_sse2 = 0x04000000 | arch_feature_sse1, /**< SSE2 instructions, include SSE1 */
+ arch_feature_sse3 = 0x02000000 | arch_feature_sse2, /**< SSE3 instructions, include SSE2 */
+ arch_feature_ssse3 = 0x01000000 | arch_feature_sse3, /**< SSSE3 instructions, include SSE3 */
+ arch_feature_3DNow = 0x00800000, /**< 3DNow! instructions */
+ arch_feature_3DNowE = 0x00400000 | arch_feature_3DNow, /**< Enhanced 3DNow! instructions */
+ arch_feature_netburst = 0x00200000 | arch_feature_intel, /**< Netburst architecture */
+ arch_feature_64bit = 0x00100000 | arch_feature_sse2, /**< x86_64 support, include SSE2 */
+};
+
+/**
+ * Architectures.
+ */
+enum cpu_support {
+ /* intel CPU's */
+ arch_generic = 0,
+
+ arch_i386 = 1,
+ arch_i486 = 2,
+ arch_pentium = 3 | arch_feature_intel,
+ arch_pentium_mmx = 4 | arch_feature_intel | arch_feature_mmx,
+ arch_pentium_pro = 5 | arch_feature_intel | arch_feature_p6,
+ arch_pentium_2 = 6 | arch_feature_intel | arch_feature_p6 | arch_feature_mmx,
+ arch_pentium_3 = 7 | arch_feature_intel | arch_feature_p6 | arch_feature_sse1,
+ arch_pentium_4 = 8 | arch_feature_netburst | arch_feature_p6 | arch_feature_sse2,
+ arch_pentium_m = 9 | arch_feature_intel | arch_feature_p6 | arch_feature_sse2,
+ arch_core = 10 | arch_feature_intel | arch_feature_p6 | arch_feature_sse3,
+ arch_prescott = 11 | arch_feature_netburst | arch_feature_p6 | arch_feature_sse3,
+ arch_core2 = 12 | arch_feature_intel | arch_feature_p6 | arch_feature_64bit | arch_feature_ssse3,
+
+ /* AMD CPU's */
+ arch_k6 = 13 | arch_feature_amd | arch_feature_mmx,
+ arch_k6_2 = 14 | arch_feature_amd | arch_feature_mmx | arch_feature_3DNow,
+ arch_k6_3 = 15 | arch_feature_amd | arch_feature_mmx | arch_feature_3DNow,
+ arch_athlon = 16 | arch_feature_amd | arch_feature_mmx | arch_feature_3DNowE | arch_feature_p6,
+ arch_athlon_xp = 17 | arch_feature_amd | arch_feature_sse1 | arch_feature_3DNowE | arch_feature_p6,
+ arch_opteron = 18 | arch_feature_amd | arch_feature_64bit | arch_feature_3DNowE | arch_feature_p6,
+
+ /* other */
+ arch_winchip_c6 = 19 | arch_feature_mmx,
+ arch_winchip2 = 20 | arch_feature_mmx | arch_feature_3DNow,
+ arch_c3 = 21 | arch_feature_mmx | arch_feature_3DNow,
+ arch_c3_2 = 22 | arch_feature_sse1, /* really no 3DNow! */
+};
+
+/** checks for l <= x <= h */
+#define _IN_RANGE(x, l, h) ((unsigned)((x) - (l)) <= (unsigned)((h) - (l)))
+
+/** returns true if it's Intel architecture */
+#define ARCH_INTEL(x) (((x) & arch_feature_intel) != 0)
+
+/** returns true if it's AMD architecture */
+#define ARCH_AMD(x) (((x) & arch_feature_amd) != 0)
+
+/** return true if it's a Athlon/Opteron */
+#define ARCH_ATHLON(x) _IN_RANGE((x), arch_athlon, arch_opteron)
+
+/** return true if the CPU has MMX support */
+#define ARCH_MMX(x) (((x) & arch_feature_mmx) != 0)
+
+/** return true if the CPU has 3DNow! support */
+#define ARCH_3DNow(x) (((x) & arch_feature_3DNow) != 0)
+
+/** return true if the CPU has P6 features (CMOV) */
+#define IS_P6_ARCH(x) (((x) & arch_feature_p6) != 0)
+
+static cpu_support arch = arch_generic;
+static cpu_support opt_arch = arch_pentium_4;
+static int use_sse2 = 0;
+static int opt_cc = 1;
+static int opt_unsafe_floatconv = 0;
+
+/* instruction set architectures. */
+static const lc_opt_enum_int_items_t arch_items[] = {
+ { "386", arch_i386, },
+ { "486", arch_i486, },
+ { "pentium", arch_pentium, },
+ { "586", arch_pentium, },
+ { "pentiumpro", arch_pentium_pro, },
+ { "686", arch_pentium_pro, },
+ { "pentiummmx", arch_pentium_mmx, },
+ { "pentium2", arch_pentium_2, },
+ { "p2", arch_pentium_2, },
+ { "pentium3", arch_pentium_3, },
+ { "p3", arch_pentium_3, },
+ { "pentium4", arch_pentium_4, },
+ { "p4", arch_pentium_4, },
+ { "prescott", arch_pentium_4, },
+ { "pentiumm", arch_pentium_m, },
+ { "pm", arch_pentium_m, },
+ { "core", arch_core, },
+ { "yonah", arch_core, },
+ { "merom", arch_core2, },
+ { "core2", arch_core2, },
+ { "k6", arch_k6, },
+ { "k6-2", arch_k6_2, },
+ { "k6-3", arch_k6_2, },
+ { "athlon", arch_athlon, },
+ { "athlon-xp", arch_athlon_xp, },
+ { "athlon-mp", arch_athlon_xp, },
+ { "athlon-4", arch_athlon_xp, },
+ { "athlon64", arch_opteron, },
+ { "k8", arch_opteron, },
+ { "opteron", arch_opteron, },
+ { "generic", arch_generic, },
+ { NULL, 0 }
+};
+
+static lc_opt_enum_int_var_t arch_var = {
+ (int*) &arch, arch_items
+};
+
+static lc_opt_enum_int_var_t opt_arch_var = {
+ (int*) &opt_arch, arch_items
+};
+
+static const lc_opt_enum_int_items_t fp_unit_items[] = {
+ { "x87" , 0 },
+ { "sse2", 1 },
+ { NULL, 0 }
+};
+
+static lc_opt_enum_int_var_t fp_unit_var = {
+ &use_sse2, fp_unit_items
+};
+
+static const lc_opt_table_entry_t ia32_architecture_options[] = {
+ LC_OPT_ENT_ENUM_INT("arch", "select the instruction architecture",
+ &arch_var),
+ LC_OPT_ENT_ENUM_INT("opt", "optimize for instruction architecture",
+ &opt_arch_var),
+ LC_OPT_ENT_ENUM_INT("fpunit", "select the floating point unit",
+ &fp_unit_var),
+ LC_OPT_ENT_NEGBIT("nooptcc", "do not optimize calling convention",
+ &opt_cc, 1),
+ LC_OPT_ENT_BIT("unsafe_floatconv", "do unsage floating point controlword "
+ "optimisations", &opt_unsafe_floatconv, 1),
+ LC_OPT_LAST
+};
+
+typedef struct insn_const {
+ int add_cost; /**< cost of an add instruction */
+ int lea_cost; /**< cost of a lea instruction */
+ int const_shf_cost; /**< cost of a constant shift instruction */
+ int cost_mul_start; /**< starting cost of a multiply instruction */
+ int cost_mul_bit; /**< cost of multiply for every set bit */
+} insn_const;
+
+/* costs for the i386 */
+static const insn_const i386_cost = {
+ 1, /* cost of an add instruction */
+ 1, /* cost of a lea instruction */
+ 2, /* cost of a constant shift instruction */
+ 6, /* starting cost of a multiply instruction */
+ 1 /* cost of multiply for every set bit */
+};
+
+/* costs for the i486 */
+static const insn_const i486_cost = {
+ 1, /* cost of an add instruction */
+ 1, /* cost of a lea instruction */
+ 2, /* cost of a constant shift instruction */
+ 12, /* starting cost of a multiply instruction */
+ 1 /* cost of multiply for every set bit */
+};
+
+/* costs for the Pentium */
+static const insn_const pentium_cost = {
+ 1, /* cost of an add instruction */
+ 1, /* cost of a lea instruction */
+ 1, /* cost of a constant shift instruction */
+ 11, /* starting cost of a multiply instruction */
+ 0 /* cost of multiply for every set bit */
+};
+
+/* costs for the Pentium Pro */
+static const insn_const pentiumpro_cost = {
+ 1, /* cost of an add instruction */
+ 1, /* cost of a lea instruction */
+ 1, /* cost of a constant shift instruction */
+ 4, /* starting cost of a multiply instruction */
+ 0 /* cost of multiply for every set bit */
+};
+
+/* costs for the K6 */
+static const insn_const k6_cost = {
+ 1, /* cost of an add instruction */
+ 2, /* cost of a lea instruction */
+ 1, /* cost of a constant shift instruction */
+ 3, /* starting cost of a multiply instruction */
+ 0 /* cost of multiply for every set bit */
+};
+
+/* costs for the Athlon */
+static const insn_const athlon_cost = {
+ 1, /* cost of an add instruction */
+ 2, /* cost of a lea instruction */
+ 1, /* cost of a constant shift instruction */
+ 5, /* starting cost of a multiply instruction */
+ 0 /* cost of multiply for every set bit */
+};
+
+/* costs for the Pentium 4 */
+static const insn_const pentium4_cost = {
+ 1, /* cost of an add instruction */
+ 3, /* cost of a lea instruction */
+ 4, /* cost of a constant shift instruction */
+ 15, /* starting cost of a multiply instruction */
+ 0 /* cost of multiply for every set bit */
+};
+
+/* costs for the Core */
+static const insn_const core_cost = {
+ 1, /* cost of an add instruction */
+ 1, /* cost of a lea instruction */
+ 1, /* cost of a constant shift instruction */
+ 10, /* starting cost of a multiply instruction */
+ 0 /* cost of multiply for every set bit */
+};
+
+/* costs for the generic */
+static const insn_const generic_cost = {
+ 1, /* cost of an add instruction */
+ 2, /* cost of a lea instruction */
+ 1, /* cost of a constant shift instruction */
+ 4, /* starting cost of a multiply instruction */
+ 0 /* cost of multiply for every set bit */
+};
+
+static const insn_const *arch_costs = &generic_cost;
+
+static void set_arch_costs(void)
+{
+ switch (opt_arch) {
+ case arch_i386:
+ arch_costs = &i386_cost;
+ break;
+ case arch_i486:
+ arch_costs = &i486_cost;
+ break;
+ case arch_pentium:
+ case arch_pentium_mmx:
+ arch_costs = &pentium_cost;
+ break;
+ case arch_pentium_pro:
+ case arch_pentium_2:
+ case arch_pentium_3:
+ arch_costs = &pentiumpro_cost;
+ break;
+ case arch_pentium_4:
+ arch_costs = &pentium4_cost;
+ break;
+ case arch_pentium_m:
+ arch_costs = &pentiumpro_cost;
+ break;
+ case arch_core:
+ arch_costs = &core_cost;
+ break;
+ case arch_prescott:
+ arch_costs = &pentium4_cost;
+ break;
+ case arch_core2:
+ arch_costs = &core_cost;
+ break;
+ case arch_k6:
+ case arch_k6_2:
+ arch_costs = &k6_cost;
+ break;
+ case arch_athlon:
+ case arch_athlon_xp:
+ case arch_opteron:
+ arch_costs = &athlon_cost;
+ break;
+ case arch_generic:
+ default:
+ arch_costs = &generic_cost;
+ }
+}
+
+/**
+ * Evaluate a given simple instruction.
+ */
+int ia32_evaluate_insn(insn_kind kind, tarval *tv) {
+ int cost;
+
+ switch (kind) {
+ case MUL:
+ cost = arch_costs->cost_mul_start;
+ if (arch_costs->cost_mul_bit > 0) {
+ char *bitstr = get_tarval_bitpattern(tv);
+ int i;
+
+ for (i = 0; bitstr[i] != '\0'; ++i) {
+ if (bitstr[i] == '1') {
+ cost += arch_costs->cost_mul_bit;
+ }
+ }
+ free(bitstr);
+ }
+ return cost;
+ case LEA:
+ return arch_costs->lea_cost;
+ case ADD:
+ case SUB:
+ return arch_costs->add_cost;
+ case SHIFT:
+ return arch_costs->const_shf_cost;
+ case ZERO:
+ return arch_costs->add_cost;
+ default:
+ return 1;
+ }
+}
+
+
+
+void ia32_setup_cg_config(void)
+{
+ memset(&ia32_cg_config, 0, sizeof(ia32_cg_config));
+
+ /* on newer intel cpus mov, pop is often faster then leave although it has a
+ * longer opcode */
+ ia32_cg_config.use_leave = !ARCH_INTEL(opt_arch)
+ || !IS_P6_ARCH(opt_arch);
+ /* P4s don't like inc/decs because they only partially write the flags
+ register which produces false dependencies */
+ ia32_cg_config.use_incdec = (opt_arch != arch_pentium_4);
+ ia32_cg_config.use_sse2 = use_sse2;
+ ia32_cg_config.use_ffreep = ARCH_ATHLON(opt_arch);
+ ia32_cg_config.use_ftst = !IS_P6_ARCH(arch);
+ ia32_cg_config.use_femms = ARCH_ATHLON(opt_arch)
+ && ARCH_MMX(arch) && ARCH_AMD(arch);
+ ia32_cg_config.use_fucomi = IS_P6_ARCH(arch);
+ ia32_cg_config.use_cmov = IS_P6_ARCH(arch);
+ ia32_cg_config.optimize_cc = opt_cc;
+ ia32_cg_config.use_unsafe_floatconv = opt_unsafe_floatconv;
+
+ if(opt_arch == arch_i386) {
+ ia32_cg_config.function_alignment = 2;
+ } else if(opt_arch == arch_i486) {
+ ia32_cg_config.function_alignment = 4;
+ } else if(opt_arch == arch_k6) {
+ ia32_cg_config.function_alignment = 5;
+ ia32_cg_config.label_alignment = 5;
+ } else {
+ ia32_cg_config.function_alignment = 4;
+ ia32_cg_config.label_alignment = 4;
+ }
+
+ if(opt_arch == arch_i386 || opt_arch == arch_i486) {
+ ia32_cg_config.label_alignment_factor = -1;
+ } else if(ARCH_AMD(opt_arch)) {
+ ia32_cg_config.label_alignment_factor = 3;
+ } else {
+ ia32_cg_config.label_alignment_factor = 2;
+ }
+
+ set_arch_costs();
+}
+
+void ia32_init_architecture(void)
+{
+ memset(&ia32_cg_config, 0, sizeof(ia32_cg_config));
+
+ lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be");
+ lc_opt_entry_t *ia32_grp = lc_opt_get_grp(be_grp, "ia32");
+
+ lc_opt_add_table(ia32_grp, ia32_architecture_options);
+}
--- /dev/null
+/*
+ * Copyright (C) 1995-2007 University of Karlsruhe. All right reserved.
+ *
+ * This file is part of libFirm.
+ *
+ * This file may be distributed and/or modified under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation and appearing in the file LICENSE.GPL included in the
+ * packaging of this file.
+ *
+ * Licensees holding valid libFirm Professional Edition licenses may use
+ * this file in accordance with the libFirm Commercial License.
+ * Agreement provided with the Software.
+ *
+ * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
+ * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE.
+ */
+
+/**
+ * @file
+ * @brief ia32 architecture variants
+ * @author Michael Beck, Matthias Braun
+ * @version $Id: bearch_ia32_t.h 16363 2007-10-25 23:27:07Z beck $
+ */
+#ifndef FIRM_BE_IA32_ARCHITECTURE_H
+#define FIRM_BE_IA32_ARCHITECTURE_H
+
+typedef struct {
+ /** use leave in function epilogue */
+ unsigned use_leave:1;
+ /** use inc, dec instead of add ,1 and add, -1 */
+ unsigned use_incdec:1;
+ /** use sse2 instructions (instead of x87) */
+ unsigned use_sse2:1;
+ /** use ffreep instead of fpop */
+ unsigned use_ffreep:1;
+ /** use ftst where possible */
+ unsigned use_ftst:1;
+ /** use femms to pop all float registers */
+ unsigned use_femms:1;
+ /** use emms to pop all float registers */
+ unsigned use_emms:1;
+ /** use the fucomi instruction */
+ unsigned use_fucomi:1;
+ /** use cmovXX instructions */
+ unsigned use_cmov:1;
+ /** optimize calling convention where possible */
+ unsigned optimize_cc:1;
+ /**
+ * disrespect current floating point rounding mode at entry and exit of
+ * functions (this is ok for programs that don't explicitly change the
+ * rounding mode
+ */
+ unsigned use_unsafe_floatconv:1;
+ /** function alignment (a power of two in bytes) */
+ unsigned function_alignment;
+ /** alignment for labels (which are expected to be frequent jump targets) */
+ unsigned label_alignment;
+ /** if a blocks execfreq is factor higher than it's predecessor then align
+ * the blocks label (0 switches of label alignment) */
+ double label_alignment_factor;
+} ia32_code_gen_config_t;
+
+extern ia32_code_gen_config_t ia32_cg_config;
+
+void ia32_init_architecture(void);
+void ia32_setup_cg_config(void);
+
+int ia32_evaluate_insn(insn_kind kind, tarval *tv);
+
+#endif
#include "ia32_nodes_attr.h"
#include "ia32_new_nodes.h"
#include "ia32_map_regs.h"
+#include "ia32_architecture.h"
#include "bearch_ia32_t.h"
DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
/**
* Emits gas alignment directives for Functions depended on cpu architecture.
*/
-static void ia32_emit_align_func(cpu_support cpu)
+static void ia32_emit_align_func(void)
{
- unsigned align;
- unsigned maximum_skip;
+ unsigned align = ia32_cg_config.function_alignment;
+ unsigned maximum_skip = (1 << align) - 1;
- switch (cpu) {
- case arch_i386:
- align = 2;
- break;
- case arch_i486:
- align = 4;
- break;
- case arch_k6:
- align = 5;
- break;
- default:
- align = 4;
- }
- maximum_skip = (1 << align) - 1;
ia32_emit_alignment(align, maximum_skip);
}
/**
* Emits gas alignment directives for Labels depended on cpu architecture.
*/
-static void ia32_emit_align_label(cpu_support cpu)
+static void ia32_emit_align_label(void)
{
- unsigned align; unsigned maximum_skip;
-
- switch (cpu) {
- case arch_i386:
- align = 2;
- break;
- case arch_i486:
- align = 4;
- break;
- case arch_k6:
- align = 5;
- break;
- default:
- align = 4;
- }
- maximum_skip = (1 << align) - 1;
+ unsigned align = ia32_cg_config.label_alignment;
+ unsigned maximum_skip = (1 << align) - 1;
ia32_emit_alignment(align, maximum_skip);
}
double block_freq;
double prev_freq = 0; /**< execfreq of the fallthrough block */
double jmp_freq = 0; /**< execfreq of all non-fallthrough blocks */
- cpu_support cpu = isa->opt_arch;
int i, n_cfgpreds;
if(exec_freq == NULL)
return 0;
- if(cpu == arch_i386 || cpu == arch_i486)
+ if(ia32_cg_config.label_alignment_factor <= 0)
return 0;
block_freq = get_block_execfreq(exec_freq, block);
jmp_freq /= prev_freq;
- switch (cpu) {
- case arch_athlon:
- case arch_athlon_xp:
- case arch_k6:
- return jmp_freq > 3;
- default:
- return jmp_freq > 2;
- }
+ return jmp_freq > ia32_cg_config.label_alignment_factor;
}
static void ia32_emit_block_header(ir_node *block, ir_node *prev)
if (should_align_block(block, prev)) {
assert(need_label);
- ia32_emit_align_label(isa->opt_arch);
+ ia32_emit_align_label();
}
if(need_label) {
{
ir_entity *irg_ent = get_irg_entity(irg);
const char *irg_name = get_entity_ld_name(irg_ent);
- cpu_support cpu = isa->opt_arch;
const be_irg_t *birg = cg->birg;
/* write the begin line (used by scripts processing the assembler... */
be_gas_emit_switch_section(GAS_SECTION_TEXT);
be_dbg_method_begin(birg->main_env->db_handle, irg_ent, be_abi_get_stack_layout(birg->abi));
- ia32_emit_align_func(cpu);
+ ia32_emit_align_func();
if (get_entity_visibility(irg_ent) == visibility_external_visible) {
be_emit_cstring(".global ");
be_emit_string(irg_name);
if(get_ia32_op_type(irn) != ia32_Normal)
return;
- noreg = ia32_new_NoReg_gp(cg);
- noreg_fp = ia32_new_NoReg_fp(cg);
- nomem = new_rd_NoMem(cg->irg);
- in1 = get_irn_n(irn, n_ia32_binary_left);
- in2 = get_irn_n(irn, n_ia32_binary_right);
- in1_reg = arch_get_irn_register(cg->arch_env, in1);
- in2_reg = arch_get_irn_register(cg->arch_env, in2);
- out_reg = get_ia32_out_reg(irn, 0);
+ noreg = ia32_new_NoReg_gp(cg);
+ noreg_fp = ia32_new_NoReg_xmm(cg);
+ nomem = new_rd_NoMem(cg->irg);
+ in1 = get_irn_n(irn, n_ia32_binary_left);
+ in2 = get_irn_n(irn, n_ia32_binary_right);
+ in1_reg = arch_get_irn_register(cg->arch_env, in1);
+ in2_reg = arch_get_irn_register(cg->arch_env, in2);
+ out_reg = get_ia32_out_reg(irn, 0);
assert(get_irn_mode(irn) != mode_T);
#include "ia32_fpu.h"
#include "ia32_new_nodes.h"
+#include "ia32_architecture.h"
#include "gen_ia32_regalloc_if.h"
#include "ircons.h"
ir_node *spill = NULL;
/* we don't spill the fpcw in unsafe mode */
- if(cg->opt & IA32_OPT_UNSAFE_FLOATCONV) {
+ if(ia32_cg_config.use_unsafe_floatconv) {
ir_graph *irg = get_irn_irg(state);
ir_node *block = get_nodes_block(state);
if(force == 1 || !is_ia32_ChangeCW(state)) {
ir_node *noreg = ia32_new_NoReg_gp(cg);
ir_node *reload = NULL;
- if(cg->opt & IA32_OPT_UNSAFE_FLOATCONV) {
+ if(ia32_cg_config.use_unsafe_floatconv) {
if(fpcw_round == NULL) {
create_fpcw_entities();
}
#include "ia32_map_regs.h"
#include "ia32_new_nodes.h"
+#include "ia32_architecture.h"
#include "gen_ia32_regalloc_if.h"
#include "bearch_ia32_t.h"
#include "../benodesets.h"
/**
* Returns the register for parameter nr.
*/
-const arch_register_t *ia32_get_RegParam_reg(ia32_code_gen_t *cg, unsigned cc,
- size_t nr, const ir_mode *mode)
+const arch_register_t *ia32_get_RegParam_reg(unsigned cc, size_t nr,
+ const ir_mode *mode)
{
if(! (cc & cc_reg_param))
return NULL;
if(mode_is_float(mode)) {
- if(!USE_SSE2(cg))
+ if(!ia32_cg_config.use_sse2)
return NULL;
if(nr >= MAXNUM_SSE_ARGS)
return NULL;
/**
* Returns the register for parameter nr.
*/
-const arch_register_t *ia32_get_RegParam_reg(ia32_code_gen_t *cg, unsigned cc,
- size_t nr, const ir_mode *mode);
+const arch_register_t *ia32_get_RegParam_reg(unsigned cc, size_t nr,
+ const ir_mode *mode);
static INLINE int is_unknown_reg(const arch_register_t *reg)
{
#include "ia32_transform.h"
#include "ia32_dbg_stat.h"
#include "ia32_util.h"
+#include "ia32_architecture.h"
DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
}
make_add_immediate:
- if(cg->isa->opt & IA32_OPT_INCDEC) {
+ if(ia32_cg_config.use_incdec) {
if(is_am_one(node)) {
dbgi = get_irn_dbg_info(node);
block = get_nodes_block(node);
#include "ia32_optimize.h"
#include "ia32_util.h"
#include "ia32_address_mode.h"
+#include "ia32_architecture.h"
#include "gen_ia32_regalloc_if.h"
static ia32_code_gen_t *env_cg = NULL;
static ir_node *initial_fpcw = NULL;
static heights_t *heights = NULL;
-static transform_config_t transform_config;
extern ir_op *get_op_Mulh(void);
ir_node *load;
ir_entity *floatent;
- if (USE_SSE2(env_cg)) {
+ if (ia32_cg_config.use_sse2) {
if (is_Const_null(node)) {
load = new_rd_ia32_xZero(dbgi, irg, block);
set_ia32_ls_mode(load, mode);
ir_node *noreg = ia32_new_NoReg_gp(env_cg);
ir_node *nomem = new_NoMem();
- if (USE_SSE2(env_cg))
+ if (ia32_cg_config.use_sse2)
cnst = new_rd_ia32_xLoad(dbgi, irg, block, noreg, noreg, nomem, mode_E);
else
cnst = new_rd_ia32_vfld(dbgi, irg, block, noreg, noreg, nomem, mode_E);
ia32_address_mode_t am;
if (mode_is_float(mode)) {
- if (USE_SSE2(env_cg))
+ if (ia32_cg_config.use_sse2)
return gen_binop(node, op1, op2, new_rd_ia32_xAdd,
match_commutative | match_am);
else
ir_mode *mode = get_irn_mode(node);
if (mode_is_float(mode)) {
- if (USE_SSE2(env_cg))
+ if (ia32_cg_config.use_sse2)
return gen_binop(node, op1, op2, new_rd_ia32_xMul,
match_commutative | match_am);
else
ir_mode *mode = get_irn_mode(node);
if (mode_is_float(mode)) {
- if (USE_SSE2(env_cg))
+ if (ia32_cg_config.use_sse2)
return gen_binop(node, op1, op2, new_rd_ia32_xSub, match_am);
else
return gen_binop_x87_float(node, op1, op2, new_rd_ia32_vfsub,
ir_node *op1 = get_Quot_left(node);
ir_node *op2 = get_Quot_right(node);
- if (USE_SSE2(env_cg)) {
+ if (ia32_cg_config.use_sse2) {
return gen_binop(node, op1, op2, new_rd_ia32_xDiv, match_am);
} else {
return gen_binop_x87_float(node, op1, op2, new_rd_ia32_vfdiv, match_am);
if (mode_is_float(mode)) {
ir_node *new_op = be_transform_node(op);
- if (USE_SSE2(env_cg)) {
+ if (ia32_cg_config.use_sse2) {
/* TODO: non-optimal... if we have many xXors, then we should
* rather create a load for the const and use that instead of
* several AM nodes... */
dbg_info *dbgi = get_irn_dbg_info(node);
ir_mode *mode = get_irn_mode(node);
ir_node *noreg_gp = ia32_new_NoReg_gp(env_cg);
- ir_node *noreg_fp = ia32_new_NoReg_fp(env_cg);
ir_node *nomem = new_NoMem();
ir_node *new_op;
ir_node *new_node;
if (mode_is_float(mode)) {
new_op = be_transform_node(op);
- if (USE_SSE2(env_cg)) {
+ if (ia32_cg_config.use_sse2) {
+ ir_node *noreg_fp = ia32_new_NoReg_xmm(env_cg);
new_node = new_rd_ia32_xAnd(dbgi,irg, new_block, noreg_gp, noreg_gp,
nomem, new_op, noreg_fp);
}
if (mode_is_float(mode)) {
- if (USE_SSE2(env_cg)) {
+ if (ia32_cg_config.use_sse2) {
new_node = new_rd_ia32_xLoad(dbgi, irg, block, base, index, new_mem,
mode);
res_mode = mode_xmm;
val = get_Conv_op(val);
}
new_val = be_transform_node(val);
- if (USE_SSE2(env_cg)) {
+ if (ia32_cg_config.use_sse2) {
new_node = new_rd_ia32_xStore(dbgi, irg, new_block, addr.base,
addr.index, addr.mem, new_val);
} else {
ir_node *new_right;
ir_node *new_node;
- if(transform_config.use_fucomi) {
+ if(ia32_cg_config.use_fucomi) {
new_right = be_transform_node(right);
new_node = new_rd_ia32_vFucomi(dbgi, irg, new_block, new_left,
new_right, 0);
set_ia32_commutative(new_node);
SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
} else {
- if(transform_config.use_ftst && is_Const_null(right)) {
+ if(ia32_cg_config.use_ftst && is_Const_null(right)) {
new_node = new_rd_ia32_vFtstFnstsw(dbgi, irg, new_block, new_left,
0);
} else {
int cmp_unsigned;
if(mode_is_float(cmp_mode)) {
- if (USE_SSE2(env_cg)) {
+ if (ia32_cg_config.use_sse2) {
return create_Ucomi(node);
} else {
return create_Fucom(node);
ia32_address_mode_t am;
ia32_address_t *addr;
- assert(transform_config.use_cmov);
+ assert(ia32_cg_config.use_cmov);
assert(mode_needs_gp_reg(get_irn_mode(val_true)));
addr = &am.addr;
if (src_mode == tgt_mode) {
if (get_Conv_strict(node)) {
- if (USE_SSE2(env_cg)) {
+ if (ia32_cg_config.use_sse2) {
/* when we are in SSE mode, we can kill all strict no-op conversion */
return be_transform_node(op);
}
}
/* ... to float */
- if (USE_SSE2(env_cg)) {
+ if (ia32_cg_config.use_sse2) {
DB((dbg, LEVEL_1, "create Conv(float, float) ..."));
res = new_rd_ia32_Conv_FP2FP(dbgi, irg, new_block, noreg, noreg,
nomem, new_op);
} else {
/* ... to int */
DB((dbg, LEVEL_1, "create Conv(float, int) ..."));
- if (USE_SSE2(env_cg)) {
+ if (ia32_cg_config.use_sse2) {
res = new_rd_ia32_Conv_FP2I(dbgi, irg, new_block, noreg, noreg,
nomem, new_op);
set_ia32_ls_mode(res, src_mode);
if (mode_is_float(tgt_mode)) {
/* ... to float */
DB((dbg, LEVEL_1, "create Conv(int, float) ..."));
- if (USE_SSE2(env_cg)) {
+ if (ia32_cg_config.use_sse2) {
new_op = be_transform_node(op);
res = new_rd_ia32_Conv_I2FP(dbgi, irg, new_block, noreg, noreg,
nomem, new_op);
int pn_ret_val, pn_ret_mem, arity, i;
assert(ret_val != NULL);
- if (be_Return_get_n_rets(node) < 1 || ! USE_SSE2(env_cg)) {
+ if (be_Return_get_n_rets(node) < 1 || ! ia32_cg_config.use_sse2) {
return be_duplicate_node(node);
}
ir_mode *mode = get_irn_mode(node);
if (mode_is_float(mode)) {
- if (USE_SSE2(env_cg)) {
+ if (ia32_cg_config.use_sse2) {
return ia32_new_Unknown_xmm(env_cg);
} else {
/* Unknown nodes are buggy in x87 sim, use zero for now... */
/* all integer operations are on 32bit registers now */
mode = mode_Iu;
} else if(mode_is_float(mode)) {
- if (USE_SSE2(env_cg)) {
+ if (ia32_cg_config.use_sse2) {
mode = mode_xmm;
} else {
mode = mode_vfp;
ir_node *block = be_transform_node(get_nodes_block(node));
ir_node *val = get_irn_n(node, 1);
ir_node *new_val = be_transform_node(val);
- ia32_code_gen_t *cg = env_cg;
ir_node *res = NULL;
ir_graph *irg = current_ir_graph;
dbg_info *dbgi;
ir_node *noreg, *new_ptr, *new_mem;
ir_node *ptr, *mem;
- if (USE_SSE2(cg)) {
+ if (ia32_cg_config.use_sse2) {
return new_val;
}
new_mem = be_transform_node(mem);
ptr = get_irn_n(node, 0);
new_ptr = be_transform_node(ptr);
- noreg = ia32_new_NoReg_gp(cg);
+ noreg = ia32_new_NoReg_gp(env_cg);
dbgi = get_irn_dbg_info(node);
/* Store x87 -> MEM */
ir_node *block = be_transform_node(get_nodes_block(node));
ir_node *val = get_irn_n(node, 1);
ir_node *new_val = be_transform_node(val);
- ia32_code_gen_t *cg = env_cg;
ir_graph *irg = current_ir_graph;
ir_node *res = NULL;
ir_entity *fent = get_ia32_frame_ent(node);
ir_node *ptr, *mem;
dbg_info *dbgi;
- if (! USE_SSE2(cg)) {
+ if (! ia32_cg_config.use_sse2) {
/* SSE unit is not used -> skip this node. */
return new_val;
}
new_ptr = be_transform_node(ptr);
mem = get_irn_n(node, 2);
new_mem = be_transform_node(mem);
- noreg = ia32_new_NoReg_gp(cg);
+ noreg = ia32_new_NoReg_gp(env_cg);
dbgi = get_irn_dbg_info(node);
/* Store SSE -> MEM */
pn_ia32_xLoad_M);
}
}
- if (USE_SSE2(env_cg) && proj >= pn_be_Call_first_res
- && proj < (pn_be_Call_first_res + n_res) && mode_is_float(mode)
- && USE_SSE2(env_cg)) {
+ if (ia32_cg_config.use_sse2 && proj >= pn_be_Call_first_res
+ && proj < (pn_be_Call_first_res + n_res) && mode_is_float(mode)) {
ir_node *fstp;
ir_node *frame = get_irg_frame(irg);
ir_node *noreg = ia32_new_NoReg_gp(env_cg);
void ia32_transform_graph(ia32_code_gen_t *cg) {
int cse_last;
ir_graph *irg = cg->irg;
- int opt_arch = cg->isa->opt_arch;
- int arch = cg->isa->arch;
/* TODO: look at cpu and fill transform config in with that... */
- transform_config.use_incdec = 1;
- transform_config.use_sse2 = 0;
- transform_config.use_ffreep = ARCH_ATHLON(opt_arch);
- transform_config.use_ftst = 0;
- transform_config.use_femms = ARCH_ATHLON(opt_arch) && ARCH_MMX(arch) && ARCH_AMD(arch);
- transform_config.use_fucomi = 1;
- transform_config.use_cmov = IS_P6_ARCH(arch);
-
register_transformers();
env_cg = cg;
initial_fpcw = NULL;
#include "firm_config.h"
#include "bearch_ia32_t.h"
-typedef struct {
- /** use inc, dec instead of add ,1 and add, -1 */
- unsigned use_incdec:1;
- /** use sse2 instructions */
- unsigned use_sse2:1;
- /** use ffreep instead of fpop */
- unsigned use_ffreep:1;
- /** use ftst where possible */
- unsigned use_ftst:1;
- /** use femms to pop all float registers */
- unsigned use_femms:1;
- /** use the fucomi instruction */
- unsigned use_fucomi:1;
- /** use cmovXX instructions */
- unsigned use_cmov:1;
-} transform_config_t;
-
/**
* Transform firm nodes to x86 assembler nodes
*/
#include "gen_ia32_new_nodes.h"
#include "gen_ia32_regalloc_if.h"
#include "ia32_x87.h"
+#include "ia32_architecture.h"
#define N_x87_REGS 8
*/
static ir_node *x87_create_fpop(x87_state *state, ir_node *n, int num)
{
- ir_node *fpop = NULL;
+ ir_node *fpop = NULL;
ia32_x87_attr_t *attr;
- int cpu = state->sim->isa->opt_arch;
assert(num > 0);
while (num > 0) {
x87_pop(state);
- if (ARCH_ATHLON(cpu))
+ if (ia32_cg_config.use_ffreep)
fpop = new_rd_ia32_ffreep(NULL, get_irn_irg(n), get_nodes_block(n));
else
fpop = new_rd_ia32_fpop(NULL, get_irn_irg(n), get_nodes_block(n));
DEBUG_ONLY(x87_dump_stack(state));
if (kill_mask != 0 && live == 0) {
- int cpu = sim->isa->arch;
-
/* special case: kill all registers */
- if (ARCH_ATHLON(sim->isa->opt_arch) && ARCH_MMX(cpu)) {
- if (ARCH_AMD(cpu)) {
+ if (ia32_cg_config.use_femms || ia32_cg_config.use_emms) {
+ if (ia32_cg_config.use_femms) {
/* use FEMMS on AMD processors to clear all */
keep = new_rd_ia32_femms(NULL, get_irn_irg(block), block);
} else {