xStore, xLoad should have base latency 0
[libfirm] / ir / be / ia32 / ia32_transform.c
index 1c9d64e..56fce24 100644 (file)
@@ -181,12 +181,12 @@ static ir_type *get_prim_type(pmap *types, ir_mode *mode)
 }
 
 /**
- * Get an entity that is initialized with a tarval
+ * Get an atomic entity that is initialized with a tarval
  */
-static ir_entity *get_entity_for_tv(ia32_code_gen_t *cg, ir_node *cnst)
+static ir_entity *ia32_get_entity_for_tv(ia32_isa_t *isa, ir_node *cnst)
 {
        tarval *tv    = get_Const_tarval(cnst);
-       pmap_entry *e = pmap_find(cg->isa->tv_ent, tv);
+       pmap_entry *e = pmap_find(isa->tv_ent, tv);
        ir_entity *res;
        ir_graph *rem;
 
@@ -194,7 +194,7 @@ static ir_entity *get_entity_for_tv(ia32_code_gen_t *cg, ir_node *cnst)
                ir_mode *mode = get_irn_mode(cnst);
                ir_type *tp = get_Const_type(cnst);
                if (tp == firm_unknown_type)
-                       tp = get_prim_type(cg->isa->types, mode);
+                       tp = get_prim_type(isa->types, mode);
 
                res = new_entity(get_glob_type(), unique_id(".LC%u"), tp);
 
@@ -210,7 +210,7 @@ static ir_entity *get_entity_for_tv(ia32_code_gen_t *cg, ir_node *cnst)
                set_atomic_ent_value(res, new_Const_type(tv, tp));
                current_ir_graph = rem;
 
-               pmap_insert(cg->isa->tv_ent, tv, res);
+               pmap_insert(isa->tv_ent, tv, res);
        } else {
                res = e->value;
        }
@@ -264,10 +264,24 @@ static ir_node *gen_Const(ir_node *node) {
                ir_node   *nomem = new_NoMem();
                ir_node   *load;
                ir_entity *floatent;
+               cnst_classify_t clss = classify_Const(node);
 
-               if (! USE_SSE2(env_cg)) {
-                       cnst_classify_t clss = classify_Const(node);
+               if (USE_SSE2(env_cg)) {
+                       if (clss == CNST_NULL) {
+                               load = new_rd_ia32_xZero(dbgi, irg, block);
+                               set_ia32_ls_mode(load, mode);
+                               res  = load;
+                       } else {
+                               floatent = ia32_get_entity_for_tv(env_cg->isa, node);
 
+                               load     = new_rd_ia32_xLoad(dbgi, irg, block, noreg, noreg, nomem,
+                                                                                        mode);
+                               set_ia32_op_type(load, ia32_AddrModeS);
+                               set_ia32_am_sc(load, floatent);
+                               set_ia32_flags(load, get_ia32_flags(load) | arch_irn_flags_rematerializable);
+                               res = new_r_Proj(irg, block, load, mode_xmm, pn_ia32_xLoad_res);
+                       }
+               } else {
                        if (clss == CNST_NULL) {
                                load = new_rd_ia32_vfldz(dbgi, irg, block);
                                res  = load;
@@ -275,7 +289,7 @@ static ir_node *gen_Const(ir_node *node) {
                                load = new_rd_ia32_vfld1(dbgi, irg, block);
                                res  = load;
                        } else {
-                               floatent = get_entity_for_tv(env_cg, node);
+                               floatent = ia32_get_entity_for_tv(env_cg->isa, node);
 
                                load     = new_rd_ia32_vfld(dbgi, irg, block, noreg, noreg, nomem, mode);
                                set_ia32_op_type(load, ia32_AddrModeS);
@@ -284,16 +298,6 @@ static ir_node *gen_Const(ir_node *node) {
                                res = new_r_Proj(irg, block, load, mode_vfp, pn_ia32_vfld_res);
                        }
                        set_ia32_ls_mode(load, mode);
-               } else {
-                       floatent = get_entity_for_tv(env_cg, node);
-
-                       load     = new_rd_ia32_xLoad(dbgi, irg, block, noreg, noreg, nomem,
-                                                    mode);
-                       set_ia32_op_type(load, ia32_AddrModeS);
-                       set_ia32_am_sc(load, floatent);
-                       set_ia32_flags(load, get_ia32_flags(load) | arch_irn_flags_rematerializable);
-
-                       res = new_r_Proj(irg, block, load, mode_xmm, pn_ia32_xLoad_res);
                }
 
                SET_IA32_ORIG_NODE(load, ia32_get_old_node_name(env_cg, node));
@@ -407,6 +411,9 @@ ir_entity *ia32_gen_fp_known_const(ia32_known_const_t kct) {
                //mode = mode_xmm;
                tv  = new_tarval_from_str(cnst_str, strlen(cnst_str), mode);
                tp  = new_type_primitive(new_id_from_str(tp_name), mode);
+               /* these constants are loaded as part of an instruction, so they must be aligned
+                  to 128 bit. */
+               set_type_alignment_bytes(tp, 16);
                ent = new_entity(get_glob_type(), new_id_from_str(ent_name), tp);
 
                set_entity_ld_ident(ent, get_entity_ident(ent));
@@ -547,7 +554,7 @@ static void set_am_attributes(ir_node *node, ia32_address_mode_t *am)
 
 static void match_arguments(ia32_address_mode_t *am, ir_node *block,
                             ir_node *op1, ir_node *op2, int commutative,
-                            int use_am_and_immediates)
+                            int use_am_and_immediates, int use_am)
 {
        ia32_address_t *addr     = &am->addr;
        ir_node        *noreg_gp = ia32_new_NoReg_gp(env_cg);
@@ -557,13 +564,13 @@ static void match_arguments(ia32_address_mode_t *am, ir_node *block,
        memset(am, 0, sizeof(am[0]));
 
        new_op2 = try_create_Immediate(op2, 0);
-       if(new_op2 == NULL && use_source_address_mode(block, op2, op1)) {
+       if(new_op2 == NULL && use_am && use_source_address_mode(block, op2, op1)) {
                build_address(am, op2);
                new_op1     = be_transform_node(op1);
                new_op2     = noreg_gp;
                am->op_type = ia32_AddrModeS;
        } else if(commutative && (new_op2 == NULL || use_am_and_immediates) &&
-                     use_source_address_mode(block, op1, op2)) {
+                     use_am && use_source_address_mode(block, op1, op2)) {
                build_address(am, op1);
                if(new_op2 != NULL) {
                        new_op1 = noreg_gp;
@@ -634,7 +641,7 @@ static ir_node *gen_binop(ir_node *node, ir_node *op1, ir_node *op2,
        ia32_address_mode_t  am;
        ia32_address_t      *addr = &am.addr;
 
-       match_arguments(&am, src_block, op1, op2, commutative, 0);
+       match_arguments(&am, src_block, op1, op2, commutative, 0, 1);
 
        new_node = func(dbgi, irg, block, addr->base, addr->index, am.new_op1,
                        am.new_op2, addr->mem);
@@ -1871,7 +1878,7 @@ static ir_node *gen_Store(ir_node *node) {
        if(new_op != NULL)
                return new_op;
 
-       /* construct load address */
+       /* construct store address */
        memset(&addr, 0, sizeof(addr));
        ia32_create_address_mode(&addr, ptr, 0);
        base  = addr.base;
@@ -1924,7 +1931,8 @@ static ir_node *gen_Store(ir_node *node) {
 }
 
 static ir_node *try_create_TestJmp(ir_node *block, dbg_info *dbgi, long pnc,
-                                   ir_node *cmp_left, ir_node *cmp_right)
+                                   ir_node *cmp_left, ir_node *cmp_right,
+                                   int use_am)
 {
        ir_node  *arg_left;
        ir_node  *arg_right;
@@ -1951,7 +1959,7 @@ static ir_node *try_create_TestJmp(ir_node *block, dbg_info *dbgi, long pnc,
                mode = mode_Iu;
 
        assert(get_mode_size_bits(mode) <= 32);
-       match_arguments(&am, block, arg_left, arg_right, 1, 1);
+       match_arguments(&am, block, arg_left, arg_right, 1, 1, use_am);
        if(am.flipped)
                pnc = get_inversed_pnc(pnc);
 
@@ -2034,6 +2042,7 @@ static ir_node *gen_Cond(ir_node *node) {
        ir_node  *new_cmp_b;
        ir_mode  *cmp_mode;
        long      pnc;
+       int       use_am;
 
        if (sel_mode != mode_b) {
                return create_Switch(node);
@@ -2042,11 +2051,14 @@ static ir_node *gen_Cond(ir_node *node) {
        if(!is_Proj(sel) || !is_Cmp(get_Proj_pred(sel))) {
                /* it's some mode_b value but not a direct comparison -> create a
                 * testjmp */
-               res = try_create_TestJmp(block, dbgi, pn_Cmp_Lg, sel, NULL);
+               res = try_create_TestJmp(block, dbgi, pn_Cmp_Lg, sel, NULL, 1);
                SET_IA32_ORIG_NODE(res, ia32_get_old_node_name(env_cg, node));
                return res;
        }
 
+       /* address mode makes only sense when we're the only user of the cmp */
+       use_am   = get_irn_n_edges(node) <= 1;
+
        cmp      = get_Proj_pred(sel);
        cmp_a    = get_Cmp_left(cmp);
        cmp_b    = get_Cmp_right(cmp);
@@ -2057,7 +2069,7 @@ static ir_node *gen_Cond(ir_node *node) {
        }
 
        if(mode_needs_gp_reg(cmp_mode)) {
-               res = try_create_TestJmp(block, dbgi, pnc, cmp_a, cmp_b);
+               res = try_create_TestJmp(block, dbgi, pnc, cmp_a, cmp_b, use_am);
                if(res != NULL) {
                        SET_IA32_ORIG_NODE(res, ia32_get_old_node_name(env_cg, node));
                        return res;
@@ -2079,7 +2091,7 @@ static ir_node *gen_Cond(ir_node *node) {
        } else {
                ia32_address_mode_t  am;
                ia32_address_t      *addr = &am.addr;
-               match_arguments(&am, src_block, cmp_a, cmp_b, 1, 1);
+               match_arguments(&am, src_block, cmp_a, cmp_b, 1, 1, use_am);
                if(am.flipped)
                        pnc = get_inversed_pnc(pnc);
 
@@ -2161,7 +2173,7 @@ ir_node *gen_be_Copy(ir_node *node)
 
 
 static ir_node *create_set(long pnc, ir_node *cmp_left, ir_node *cmp_right,
-                           dbg_info *dbgi, ir_node *block)
+                           dbg_info *dbgi, ir_node *block, int use_am)
 {
        ir_graph *irg       = current_ir_graph;
        ir_node  *new_block = be_transform_node(block);
@@ -2193,7 +2205,7 @@ static ir_node *create_set(long pnc, ir_node *cmp_left, ir_node *cmp_right,
 
                assert(get_mode_size_bits(mode) <= 32);
 
-               match_arguments(&am, block, arg_left, arg_right, 1, 1);
+               match_arguments(&am, block, arg_left, arg_right, 1, 1, use_am);
                if(am.flipped)
                        pnc = get_inversed_pnc(pnc);
 
@@ -2220,7 +2232,7 @@ static ir_node *create_set(long pnc, ir_node *cmp_left, ir_node *cmp_right,
        mode = get_irn_mode(cmp_left);
        assert(get_mode_size_bits(mode) <= 32);
 
-       match_arguments(&am, block, cmp_left, cmp_right, 1, 1);
+       match_arguments(&am, block, cmp_left, cmp_right, 1, 1, use_am);
        if(am.flipped)
                pnc = get_inversed_pnc(pnc);
 
@@ -2372,10 +2384,10 @@ static ir_node *gen_Psi(ir_node *node) {
        }
 
        if(is_Const_1(psi_true) && is_Const_0(psi_default)) {
-               new_op = create_set(pnc, cmp_left, cmp_right, dbgi, block);
+               new_op = create_set(pnc, cmp_left, cmp_right, dbgi, block, 1);
        } else if(is_Const_0(psi_true) && is_Const_1(psi_default)) {
                pnc = get_negated_pnc(pnc, cmp_mode);
-               new_op = create_set(pnc, cmp_left, cmp_right, dbgi, block);
+               new_op = create_set(pnc, cmp_left, cmp_right, dbgi, block, 1);
        } else {
                new_op = create_cmov(pnc, cmp_left, cmp_right, psi_true, psi_default,
                                     dbgi, block);
@@ -3726,13 +3738,26 @@ static ir_node *gen_ia32_l_IMul(ir_node *node) {
        ir_node *muls = new_rd_ia32_IMul1OP(dbgi, irg, block, noreg, noreg, new_left,
                                        new_right, new_NoMem());
        clear_ia32_commutative(muls);
-       set_ia32_am_support(muls, ia32_am_Source, ia32_am_binary);
 
        SET_IA32_ORIG_NODE(muls, ia32_get_old_node_name(env_cg, node));
 
        return muls;
 }
 
+static ir_node *gen_ia32_Add64Bit(ir_node *node)
+{
+       ir_node  *a_l    = be_transform_node(get_irn_n(node, 0));
+       ir_node  *a_h    = be_transform_node(get_irn_n(node, 1));
+       ir_node  *b_l    = create_immediate_or_transform(get_irn_n(node, 2), 0);
+       ir_node  *b_h    = create_immediate_or_transform(get_irn_n(node, 3), 0);
+       ir_node  *block  = be_transform_node(get_nodes_block(node));
+       dbg_info *dbgi   = get_irn_dbg_info(node);
+       ir_graph *irg    = current_ir_graph;
+       ir_node  *new_op = new_rd_ia32_Add64Bit(dbgi, irg, block, a_l, a_h, b_l, b_h);
+       SET_IA32_ORIG_NODE(new_op, ia32_get_old_node_name(env_cg, node));
+       return new_op;
+}
+
 /**
  * Transforms a l_ShlD/l_ShrD into a ShlD/ShrD. Those nodes have 3 data inputs:
  * op1 - target to be shifted
@@ -3748,8 +3773,8 @@ static ir_node *gen_lowered_64bit_shifts(ir_node *node, ir_node *op1,
        ir_graph *irg       = current_ir_graph;
        dbg_info *dbgi      = get_irn_dbg_info(node);
        ir_node  *new_op1   = be_transform_node(op1);
-       ir_node  *new_op2   = create_immediate_or_transform(op2, 'I');
-       ir_node  *new_count = be_transform_node(count);
+       ir_node  *new_op2   = be_transform_node(op2);
+       ir_node  *new_count = create_immediate_or_transform(count, 'I');
 
        /* TODO proper AM support */
 
@@ -4278,6 +4303,7 @@ static ir_node *gen_Proj_Cmp(ir_node *node)
        dbg_info *dbgi      = get_irn_dbg_info(cmp);
        ir_node  *block     = get_nodes_block(node);
        ir_node  *res;
+       int       use_am;
 
        assert(!mode_is_float(cmp_mode));
 
@@ -4285,7 +4311,12 @@ static ir_node *gen_Proj_Cmp(ir_node *node)
                pnc |= ia32_pn_Cmp_Unsigned;
        }
 
-       res = create_set(pnc, cmp_left, cmp_right, dbgi, block);
+       /**
+        * address mode makes only sense when we'll be the only node using the cmp
+        */
+       use_am = get_irn_n_edges(cmp) <= 1;
+
+       res = create_set(pnc, cmp_left, cmp_right, dbgi, block, use_am);
        SET_IA32_ORIG_NODE(res, ia32_get_old_node_name(env_cg, cmp));
 
        return res;
@@ -4408,6 +4439,7 @@ static void register_transformers(void)
        GEN(IJmp);
 
        /* transform ops from intrinsic lowering */
+       GEN(ia32_Add64Bit);
        GEN(ia32_l_Add);
        GEN(ia32_l_Adc);
        GEN(ia32_l_Sub);