From: Michael Beck Date: Wed, 14 May 2008 02:18:29 +0000 (+0000) Subject: - don't use inc/dec on core2 X-Git-Url: http://nsz.repo.hu/git/?a=commitdiff_plain;h=b678beb76378c541ac15b40dab86a844e81f59bb;p=libfirm - don't use inc/dec on core2 - add full support for SSE3 vfisttp instruction [r19607] --- diff --git a/ir/be/ia32/bearch_ia32.c b/ir/be/ia32/bearch_ia32.c index 7044b51bd..ab6b3ebf0 100644 --- a/ir/be/ia32/bearch_ia32.c +++ b/ir/be/ia32/bearch_ia32.c @@ -1456,6 +1456,7 @@ static void ia32_collect_frame_entity_nodes(ir_node *node, void *data) is_ia32_xStoreSimple(node) || is_ia32_vfst(node) || is_ia32_vfist(node) || + is_ia32_vfisttp(node) || is_ia32_FnstCW(node)); #endif } diff --git a/ir/be/ia32/ia32_architecture.c b/ir/be/ia32/ia32_architecture.c index 2f4c5c500..b5aac2526 100644 --- a/ir/be/ia32/ia32_architecture.c +++ b/ir/be/ia32/ia32_architecture.c @@ -483,7 +483,7 @@ void ia32_setup_cg_config(void) ia32_cg_config.use_leave = FLAGS(opt_arch, arch_i386 | arch_all_amd | arch_core2); /* P4s don't like inc/decs because they only partially write the flags register which produces false dependencies */ - ia32_cg_config.use_incdec = !FLAGS(opt_arch, arch_netburst | arch_nocona | arch_geode) || opt_size; + ia32_cg_config.use_incdec = !FLAGS(opt_arch, arch_netburst | arch_nocona | arch_core2 | arch_geode) || opt_size; ia32_cg_config.use_sse2 = use_sse2; ia32_cg_config.use_ffreep = FLAGS(opt_arch, arch_athlon_plus); ia32_cg_config.use_ftst = !FLAGS(arch, arch_feature_p6_insn); @@ -511,6 +511,7 @@ void ia32_setup_cg_config(void) ia32_cg_config.use_mov_0 = FLAGS(opt_arch, arch_k6) && !opt_size; ia32_cg_config.use_pad_return = FLAGS(opt_arch, arch_athlon_plus | arch_core2 | arch_generic32) && !opt_size; ia32_cg_config.use_bt = FLAGS(opt_arch, arch_core2 | arch_athlon_plus) || opt_size; + ia32_cg_config.use_fisttp = FLAGS(opt_arch & arch, arch_feature_sse3); ia32_cg_config.optimize_cc = opt_cc; ia32_cg_config.use_unsafe_floatconv = opt_unsafe_floatconv; diff --git a/ir/be/ia32/ia32_architecture.h b/ir/be/ia32/ia32_architecture.h index 4712afc79..89fb4adab 100644 --- a/ir/be/ia32/ia32_architecture.h +++ b/ir/be/ia32/ia32_architecture.h @@ -68,6 +68,8 @@ typedef struct { unsigned use_pad_return:1; /** use the bt instruction */ unsigned use_bt:1; + /** use fisttp instruction (requieres SSE3) */ + unsigned use_fisttp:1; /** optimize calling convention where possible */ unsigned optimize_cc:1; /** diff --git a/ir/be/ia32/ia32_spec.pl b/ir/be/ia32/ia32_spec.pl index adce37925..26775aa65 100644 --- a/ir/be/ia32/ia32_spec.pl +++ b/ir/be/ia32/ia32_spec.pl @@ -2366,7 +2366,7 @@ fisttp => { state => "exc_pinned", rd_constructor => "NONE", reg_req => { }, - emit => '. fist%M %AM', + emit => '. fisttp%M %AM', mode => "mode_M", attr_type => "ia32_x87_attr_t", latency => 2, diff --git a/ir/be/ia32/ia32_transform.c b/ir/be/ia32/ia32_transform.c index a8f7b645a..a381cbc4f 100644 --- a/ir/be/ia32/ia32_transform.c +++ b/ir/be/ia32/ia32_transform.c @@ -2443,6 +2443,29 @@ static ir_node *gen_float_const_Store(ir_node *node, ir_node *cns) { return new_node; } +/** + * Generate a vfist or vfisttp instruction. + */ +static ir_node *gen_vfist(dbg_info *dbgi, ir_graph *irg, ir_node *block, ir_node *base, ir_node *index, + ir_node *mem, ir_node *val) +{ + ir_node *new_node; + + if (ia32_cg_config.use_fisttp) { + /* Note: fisttp ALWAYS pop the tos. We have to ensure here that the value is copied + if other users exists */ + const arch_register_class_t *reg_class = &ia32_reg_classes[CLASS_ia32_vfp]; + val = be_new_Copy(reg_class, irg, block, val); + + new_node = new_rd_ia32_vfisttp(dbgi, irg, block, base, index, mem, val); + } else { + ir_node *trunc_mode = ia32_new_Fpu_truncate(env_cg); + + /* do a fist */ + new_node = new_rd_ia32_vfist(dbgi, irg, block, base, index, mem, val, trunc_mode); + } + return new_node; +} /** * Transforms a normal Store. * @@ -2500,7 +2523,6 @@ static ir_node *gen_normal_Store(ir_node *node) addr.index, addr.mem, new_val, mode); } } else if (is_float_to_int32_conv(val)) { - ir_node *trunc_mode = ia32_new_Fpu_truncate(env_cg); val = get_Conv_op(val); /* convs (and strict-convs) before stores are unnecessary if the mode @@ -2508,10 +2530,8 @@ static ir_node *gen_normal_Store(ir_node *node) while(is_Conv(val) && mode == get_irn_mode(get_Conv_op(val))) { val = get_Conv_op(val); } - new_val = be_transform_node(val); - - new_node = new_rd_ia32_vfist(dbgi, irg, new_block, addr.base, - addr.index, addr.mem, new_val, trunc_mode); + new_val = be_transform_node(val); + new_node = gen_vfist(dbgi, irg, new_block, addr.base, addr.index, addr.mem, new_val); } else { new_val = create_immediate_or_transform(val, 0); assert(mode != mode_b); @@ -3000,14 +3020,10 @@ static ir_node *gen_x87_fp_to_gp(ir_node *node) { ir_graph *irg = current_ir_graph; dbg_info *dbgi = get_irn_dbg_info(node); ir_node *noreg = ia32_new_NoReg_gp(cg); - ir_node *trunc_mode = ia32_new_Fpu_truncate(cg); ir_mode *mode = get_irn_mode(node); ir_node *fist, *load; - /* do a fist */ - fist = new_rd_ia32_vfist(dbgi, irg, block, get_irg_frame(irg), noreg, - new_NoMem(), new_op, trunc_mode); - + fist = gen_vfist(dbgi, irg, block, get_irg_frame(irg), noreg, new_NoMem(), new_op); set_irn_pinned(fist, op_pin_state_floats); set_ia32_use_frame(fist); set_ia32_op_type(fist, ia32_AddrModeD); @@ -4383,13 +4399,10 @@ static ir_node *gen_ia32_l_vfist(ir_node *node) { dbg_info *dbgi = get_irn_dbg_info(node); ir_node *noreg = ia32_new_NoReg_gp(env_cg); ir_mode *mode = get_ia32_ls_mode(node); - ir_node *trunc_mode = ia32_new_Fpu_truncate(env_cg); ir_node *new_op; long am_offs; - new_op = new_rd_ia32_vfist(dbgi, irg, block, new_ptr, noreg, new_mem, - new_val, trunc_mode); - + new_op = gen_vfist(dbgi, irg, block, new_ptr, noreg, new_mem, new_val); am_offs = get_ia32_am_offs_int(node); add_ia32_am_offs_int(new_op, am_offs); @@ -4569,13 +4582,8 @@ static ir_node *gen_ia32_l_FloattoLL(ir_node *node) { ir_node *nomem = new_NoMem(); ir_node *val = get_irn_n(node, n_ia32_l_FloattoLL_val); ir_node *new_val = be_transform_node(val); - ir_node *trunc_mode = ia32_new_Fpu_truncate(env_cg); - - ir_node *fist; - /* do a fist */ - fist = new_rd_ia32_vfist(dbgi, irg, block, frame, noreg, nomem, new_val, - trunc_mode); + ir_node *fist = gen_vfist(dbgi, irg, block, frame, noreg, nomem, new_val); SET_IA32_ORIG_NODE(fist, ia32_get_old_node_name(env_cg, node)); set_ia32_use_frame(fist); set_ia32_op_type(fist, ia32_AddrModeD);