- add a flag to allow 2 users when matching address mode: needed for Psi nodes
authorMichael Beck <beck@ipd.info.uni-karlsruhe.de>
Wed, 21 May 2008 15:11:46 +0000 (15:11 +0000)
committerMichael Beck <beck@ipd.info.uni-karlsruhe.de>
Wed, 21 May 2008 15:11:46 +0000 (15:11 +0000)
- create code for unsigned Doz (a - b < 0 ? 0 : a - b)

[r19704]

ir/be/ia32/bearch_ia32.c
ir/be/ia32/ia32_nodes_attr.h
ir/be/ia32/ia32_spec.pl
ir/be/ia32/ia32_transform.c

index 2f7a488..2f39e9f 100644 (file)
@@ -2085,18 +2085,19 @@ static int ia32_is_psi_allowed(ir_node *sel, ir_node *phi_list, int i, int j)
                }
 
                if (ia32_cg_config.use_sse2 && cmp != NULL) {
+                       pn_Cmp pn   = get_Proj_proj(sel);
+                       ir_node *cl = get_Cmp_left(cmp);
+                       ir_node *cr = get_Cmp_right(cmp);
+
                        /* check the Phi nodes: no 64bit and no floating point cmov */
                        for (phi = phi_list; phi; phi = get_irn_link(phi)) {
                                ir_mode *mode = get_irn_mode(phi);
 
                                if (mode_is_float(mode)) {
                                        /* check for Min, Max */
-                                       ir_node *t  = get_Phi_pred(phi, i);
-                                       ir_node *f  = get_Phi_pred(phi, j);
-                                       pn_Cmp pn   = get_Proj_proj(sel);
-                                       ir_node *cl = get_Cmp_left(cmp);
-                                       ir_node *cr = get_Cmp_right(cmp);
-                                       int res     = 0;
+                                       ir_node *t = get_Phi_pred(phi, i);
+                                       ir_node *f = get_Phi_pred(phi, j);
+                                       int res    = 0;
 
                                        /* SSE2 supports Min & Max */
                                        if (pn == pn_Cmp_Lt || pn == pn_Cmp_Le || pn == pn_Cmp_Ge || pn == pn_Cmp_Gt) {
@@ -2126,6 +2127,9 @@ static int ia32_is_psi_allowed(ir_node *sel, ir_node *phi_list, int i, int j)
 
                return 1;
        } else {
+               ir_node *cl, *cr;
+               pn_Cmp  pn;
+
                /* No cmov, only some special cases */
                if (! is_Proj(sel))
                        return 0;
@@ -2136,16 +2140,63 @@ static int ia32_is_psi_allowed(ir_node *sel, ir_node *phi_list, int i, int j)
                left     = get_Cmp_left(cmp);
                cmp_mode = get_irn_mode(left);
 
-               /* no floating point and no 64bit yet */
+               /* Now some supported cases here */
+               pn = get_Proj_proj(sel);
+               cl = get_Cmp_left(cmp);
+               cr = get_Cmp_right(cmp);
+
                for (phi = phi_list; phi; phi = get_irn_link(phi)) {
                        ir_mode *mode = get_irn_mode(phi);
+                       int res = 0;
+                       ir_node *t, *f;
+
+                       t = get_Phi_pred(phi, i);
+                       f = get_Phi_pred(phi, j);
 
+                       /* no floating point and no 64bit yet */
                        if (mode_is_float(mode) || get_mode_size_bits(mode) > 32)
                                return 0;
-               }
-               /* Add checks for some supported cases here */
 
-               return 0;
+                       if (is_Const(t) && is_Const(f)) {
+                               if ((is_Const_null(t) && is_Const_one(f)) || (is_Const_one(t) && is_Const_null(f))) {
+                                       /* always support Psi(x, C1, C2) */
+                                       res = 1;
+                               }
+                       } else if (pn == pn_Cmp_Lt || pn == pn_Cmp_Le || pn == pn_Cmp_Ge || pn == pn_Cmp_Gt) {
+                               if (0) {
+#if 0
+                               } else if (cl == t && cr == f) {
+                                       /* Psi(a <=/>= b, a, b) => Min, Max */
+                                       res = 1;
+                               } else if (cl == f && cr == t) {
+                                       /* Psi(a <=/>= b, b, a) => Max, Min */
+                                       res = 1;
+#endif
+                               } else if ((pn & pn_Cmp_Gt) && !mode_is_signed(mode) &&
+                                          is_Const(f) && is_Const_null(f) && is_Sub(t) &&
+                                          get_Sub_left(t) == cl && get_Sub_right(t) == cr) {
+                                       /* Psi(a >=u b, a - b, 0) unsigned Doz */
+                                       res = 1;
+                               } else if ((pn & pn_Cmp_Lt) && !mode_is_signed(mode) &&
+                                          is_Const(t) && is_Const_null(t) && is_Sub(f) &&
+                                          get_Sub_left(f) == cl && get_Sub_right(f) == cr) {
+                                       /* Psi(a <=u b, 0, a - b) unsigned Doz */
+                                       res = 1;
+                               } else if (is_Const(cr) && is_Const_null(cr)) {
+                                       if (cl == t && is_Minus(f) && get_Minus_op(f) == cl) {
+                                               /* Psi(a <=/>= 0 ? a : -a) Nabs/Abs */
+                                               res = 1;
+                                       } else if (cl == f && is_Minus(t) && get_Minus_op(t) == cl) {
+                                               /* Psi(a <=/>= 0 ? -a : a) Abs/Nabs */
+                                               res = 1;
+                                       }
+                               }
+                       }
+                       if (! res)
+                               return 0;
+               }
+               /* all checks passed */
+               return 1;
        }
        return 0;
 }
index 2857697..6b854f1 100644 (file)
@@ -71,6 +71,7 @@ typedef enum {
        match_dest_am           = 1 << 9,
        match_try_am            = 1 << 10,  /**< only try to produce AM node, don't
                                                 do anything if AM isn't possible */
+       match_two_users         = 1 << 11   /**< the instruction uses a load two times ... */
 } match_flags_t;
 
 typedef struct ia32_op_attr_t ia32_op_attr_t;
index 3272a66..612aa96 100644 (file)
@@ -332,6 +332,7 @@ Asm => {
        modified_flags => 1,
 },
 
+# "allocates" a free register
 ProduceVal => {
        op_flags  => "c",
        irn_flags => "R",
index e9d62b5..6edace1 100644 (file)
@@ -581,7 +581,7 @@ const char *ia32_get_old_node_name(ia32_code_gen_t *cg, ir_node *irn) {
  * input here, for unary operations use NULL).
  */
 static int ia32_use_source_address_mode(ir_node *block, ir_node *node,
-                                        ir_node *other, ir_node *other2)
+                                        ir_node *other, ir_node *other2, match_flags_t flags)
 {
        ir_node *load;
        long     pn;
@@ -612,7 +612,7 @@ static int ia32_use_source_address_mode(ir_node *block, ir_node *node,
        if (get_nodes_block(load) != block)
                return 0;
        /* we only use address mode if we're the only user of the load */
-       if (get_irn_n_edges(node) > 1)
+       if (get_irn_n_edges(node) != (flags & match_two_users ? 2 : 1))
                return 0;
        /* in some edge cases with address mode we might reach the load normally
         * and through some AM sequence, if it is already materialized then we
@@ -847,7 +847,7 @@ static void match_arguments(ia32_address_mode_t *am, ir_node *block,
 
        noreg_gp = ia32_new_NoReg_gp(env_cg);
        if (new_op2 == NULL &&
-           use_am && ia32_use_source_address_mode(block, op2, op1, other_op)) {
+           use_am && ia32_use_source_address_mode(block, op2, op1, other_op, flags)) {
                build_address(am, op2);
                new_op1     = (op1 == NULL ? NULL : be_transform_node(op1));
                if (mode_is_float(mode)) {
@@ -858,7 +858,7 @@ static void match_arguments(ia32_address_mode_t *am, ir_node *block,
                am->op_type = ia32_AddrModeS;
        } else if (commutative && (new_op2 == NULL || use_am_and_immediates) &&
                       use_am &&
-                      ia32_use_source_address_mode(block, op1, op2, other_op)) {
+                      ia32_use_source_address_mode(block, op1, op2, other_op, flags)) {
                ir_node *noreg;
                build_address(am, op1);
 
@@ -930,6 +930,7 @@ static ir_node *fix_mem_proj(ir_node *node, ia32_address_mode_t *am)
 /**
  * Construct a standard binary operation, set AM and immediate if required.
  *
+ * @param node  The original node for which the binop is created
  * @param op1   The first operand
  * @param op2   The second operand
  * @param func  The node constructor function
@@ -2955,8 +2956,9 @@ static ir_node *create_CMov(ir_node *node, ir_node *flags, ir_node *new_flags,
        return new_node;
 }
 
-
-
+/**
+ * Creates a ia32 Setcc instruction.
+ */
 static ir_node *create_set_32bit(dbg_info *dbgi, ir_node *new_block,
                                  ir_node *flags, pn_Cmp pnc, ir_node *orig_node,
                                  int ins_permuted)
@@ -2971,7 +2973,7 @@ static ir_node *create_set_32bit(dbg_info *dbgi, ir_node *new_block,
        SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, orig_node));
 
        /* we might need to conv the result up */
-       if(get_mode_size_bits(mode) > 8) {
+       if (get_mode_size_bits(mode) > 8) {
                new_node = new_rd_ia32_Conv_I2I8Bit(dbgi, irg, new_block, noreg, noreg,
                                                    nomem, new_node, mode_Bu);
                SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, orig_node));
@@ -2980,6 +2982,41 @@ static ir_node *create_set_32bit(dbg_info *dbgi, ir_node *new_block,
        return new_node;
 }
 
+/**
+ * Create instruction for an unsigned Difference or Zero.
+ */
+static ir_node *create_Doz(ir_node *psi, ir_node *new_block, ir_node *a, ir_node *b) {
+       ir_graph *irg   = current_ir_graph;
+       ir_mode  *mode  = get_irn_mode(psi);
+       ir_node  *new_node, *sub, *sbb, *eflags, *block, *noreg, *tmpreg, *nomem;
+       dbg_info *dbgi;
+
+       new_node = gen_binop(psi, a, b, new_rd_ia32_Sub,
+               match_mode_neutral | match_am | match_immediate | match_two_users);
+
+       block = get_nodes_block(new_node);
+
+       if (is_Proj(new_node)) {
+               sub = get_Proj_pred(new_node);
+               assert(is_ia32_Sub(sub));
+       } else {
+               sub = new_node;
+               set_irn_mode(sub, mode_T);
+               new_node = new_rd_Proj(NULL, irg, block, sub, mode, pn_ia32_res);
+       }
+       eflags = new_rd_Proj(NULL, irg, block, sub, mode_Iu, pn_ia32_Sub_flags);
+
+       dbgi   = get_irn_dbg_info(psi);
+       noreg  = ia32_new_NoReg_gp(env_cg);
+       tmpreg = new_rd_ia32_ProduceVal(dbgi, irg, block);
+       nomem  = new_NoMem();
+       sbb    = new_rd_ia32_Sbb(dbgi, irg, block, noreg, noreg, nomem, tmpreg, tmpreg, eflags);
+
+       new_node = new_rd_ia32_And(dbgi, irg, block, noreg, noreg, nomem, new_node, sbb);
+       set_ia32_commutative(new_node);
+       return new_node;
+}
+
 /**
  * Transforms a Psi node into CMov.
  *
@@ -2994,58 +3031,76 @@ static ir_node *gen_Psi(ir_node *node)
        ir_node  *psi_default = get_Psi_default(node);
        ir_node  *cond        = get_Psi_cond(node, 0);
        ir_mode  *mode        = get_irn_mode(node);
-       ir_node  *flags       = NULL;
-       ir_node  *new_node;
-       pn_Cmp    pnc;
+       ir_node  *cmp         = get_Proj_pred(cond);
+       ir_node  *cmp_left    = get_Cmp_left(cmp);
+       ir_node  *cmp_right   = get_Cmp_right(cmp);
+       pn_Cmp   pnc          = get_Proj_proj(cond);
 
        assert(get_Psi_n_conds(node) == 1);
        assert(get_irn_mode(cond) == mode_b);
 
+       /* Note: a Psi node uses a Load two times IFF it's used in the compare AND in the result */
        if (mode_is_float(mode)) {
-               ir_node *cmp       = get_Proj_pred(cond);
-               ir_node *cmp_left  = get_Cmp_left(cmp);
-               ir_node *cmp_right = get_Cmp_right(cmp);
-               pn_Cmp  pnc        = get_Proj_proj(cond);
-
                if (ia32_cg_config.use_sse2) {
                        if (pnc == pn_Cmp_Lt || pnc == pn_Cmp_Le) {
                                if (cmp_left == psi_true && cmp_right == psi_default) {
                                        /* psi(a <= b, a, b) => MIN */
                                        return gen_binop(node, cmp_left, cmp_right, new_rd_ia32_xMin,
-                                        match_commutative | match_am);
+                                        match_commutative | match_am | match_two_users);
                                } else if (cmp_left == psi_default && cmp_right == psi_true) {
                                        /* psi(a <= b, b, a) => MAX */
                                        return gen_binop(node, cmp_left, cmp_right, new_rd_ia32_xMax,
-                                        match_commutative | match_am);
+                                        match_commutative | match_am | match_two_users);
                                }
                        } else if (pnc == pn_Cmp_Gt || pnc == pn_Cmp_Ge) {
                                if (cmp_left == psi_true && cmp_right == psi_default) {
                                        /* psi(a >= b, a, b) => MAX */
                                        return gen_binop(node, cmp_left, cmp_right, new_rd_ia32_xMax,
-                                        match_commutative | match_am);
+                                        match_commutative | match_am | match_two_users);
                                } else if (cmp_left == psi_default && cmp_right == psi_true) {
                                        /* psi(a >= b, b, a) => MIN */
                                        return gen_binop(node, cmp_left, cmp_right, new_rd_ia32_xMin,
-                                        match_commutative | match_am);
+                                        match_commutative | match_am | match_two_users);
                                }
                        }
                }
                panic("cannot transform floating point Psi");
 
        } else {
+               ir_node *flags;
+               ir_node *new_node;
+
                assert(mode_needs_gp_reg(mode));
 
+               /* check for unsigned Doz first */
+               if ((pnc & pn_Cmp_Gt) && !mode_is_signed(mode) &&
+                   is_Const_0(psi_default) && is_Sub(psi_true) &&
+                   get_Sub_left(psi_true) == cmp_left && get_Sub_right(psi_true) == cmp_right) {
+                       /* Psi(a >=u b, a - b, 0) unsigned Doz */
+                       return create_Doz(node, new_block, cmp_left, cmp_right);
+               } else if ((pnc & pn_Cmp_Lt) && !mode_is_signed(mode) &&
+                                  is_Const_0(psi_true) && is_Sub(psi_default) &&
+                                  get_Sub_left(psi_default) == cmp_left && get_Sub_right(psi_default) == cmp_right) {
+                       /* Psi(a <=u b, 0, a - b) unsigned Doz */
+                       return create_Doz(node, new_block, cmp_left, cmp_right);
+               }
+
                flags = get_flags_node(cond, &pnc);
 
-               if (is_Const_1(psi_true) && is_Const_0(psi_default)) {
-                       new_node = create_set_32bit(dbgi, new_block, flags, pnc, node, 0);
-               } else if(is_Const_0(psi_true) && is_Const_1(psi_default)) {
-                       new_node = create_set_32bit(dbgi, new_block, flags, pnc, node, 1);
+               if (is_Const(psi_true) && is_Const(psi_default)) {
+                       /* both are const, good */
+                       if (is_Const_1(psi_true) && is_Const_0(psi_default)) {
+                               new_node = create_set_32bit(dbgi, new_block, flags, pnc, node, /*is_premuted=*/0);
+                       } else if (is_Const_0(psi_true) && is_Const_1(psi_default)) {
+                               new_node = create_set_32bit(dbgi, new_block, flags, pnc, node, /*is_premuted=*/1);
+                       } else {
+                               /* Not that simple. */
+                       }
                } else {
                        new_node = create_CMov(node, cond, flags, pnc);
                }
+               return new_node;
        }
-       return new_node;
 }