X-Git-Url: http://nsz.repo.hu/git/?a=blobdiff_plain;f=ir%2Fir%2Firopt.c;h=ca58b2ef4eeb06915ab19343d0987be77cbe1dc5;hb=a08e6f04aa3669cff094f94a9484c7c2bb1314d0;hp=dc2907678b653dc484a8ed23820cb7374a3afeef;hpb=630162b2084a7ef60e7593824d1552bc785e94eb;p=libfirm diff --git a/ir/ir/iropt.c b/ir/ir/iropt.c index dc2907678..ca58b2ef4 100644 --- a/ir/ir/iropt.c +++ b/ir/ir/iropt.c @@ -352,7 +352,7 @@ static ir_tarval *computed_value_Not(const ir_node *n) } /* computed_value_Not */ /** - * Tests wether a shift shifts more bits than available in the mode + * Tests whether a shift shifts more bits than available in the mode */ static bool is_oversize_shift(const ir_node *n) { @@ -957,7 +957,7 @@ static ir_node *equivalent_node_Sub(ir_node *n) /** - * Optimize an "self-inverse unary op", ie op(op(n)) = n. + * Optimize an "self-inverse unary op", i.e. op(op(n)) = n. * * @todo * -(-a) == a, but might overflow two times. @@ -2181,6 +2181,7 @@ restart: } DBG_OPT_ALGSIM0(oldn, n, FS_OPT_SUB_TO_ADD); return n; +#if 0 } else if (is_Mul(b)) { /* a - (b * C) -> a + (b * -C) */ ir_node *m_right = get_Mul_right(b); if (is_Const(m_right)) { @@ -2199,6 +2200,7 @@ restart: return n; } } +#endif } /* Beware of Sub(P, P) which cannot be optimized into a simple Minus ... */ @@ -2663,7 +2665,7 @@ make_tuple: /* skip a potential Pin */ mem = skip_Pin(mem); - turn_into_tuple(n, pn_Div_max); + turn_into_tuple(n, pn_Div_max+1); set_Tuple_pred(n, pn_Div_M, mem); set_Tuple_pred(n, pn_Div_X_regular, new_r_Jmp(blk)); set_Tuple_pred(n, pn_Div_X_except, new_r_Bad(irg, mode_X)); @@ -2755,7 +2757,7 @@ make_tuple: /* skip a potential Pin */ mem = skip_Pin(mem); - turn_into_tuple(n, pn_Mod_max); + turn_into_tuple(n, pn_Mod_max+1); set_Tuple_pred(n, pn_Mod_M, mem); set_Tuple_pred(n, pn_Mod_X_regular, new_r_Jmp(blk)); set_Tuple_pred(n, pn_Mod_X_except, new_r_Bad(irg, mode_X)); @@ -2789,7 +2791,7 @@ static ir_node *transform_node_Cond(ir_node *n) Replace it by a tuple (Bad, Jmp) or (Jmp, Bad) */ ir_node *blk = get_nodes_block(n); jmp = new_r_Jmp(blk); - turn_into_tuple(n, pn_Cond_max); + turn_into_tuple(n, pn_Cond_max+1); if (ta == tarval_b_true) { set_Tuple_pred(n, pn_Cond_false, new_r_Bad(irg, mode_X)); set_Tuple_pred(n, pn_Cond_true, jmp); @@ -2930,15 +2932,109 @@ static bool is_shiftop(const ir_node *n) return is_Shl(n) || is_Shr(n) || is_Shrs(n) || is_Rotl(n); } +/** + * normalisation: (x & c1) >> c2 to (x >> c2) & (c1 >> c2) + * (we can use: + * - and, or, xor instead of & + * - Shl, Shr, Shrs, rotl instead of >> + * (with a special case for Or/Xor + Shrs) + * + * This normalisation is good for things like x-(x&y) esp. in 186.crafty. + */ +static ir_node *transform_node_shift_bitop(ir_node *n) +{ + ir_graph *irg = get_irn_irg(n); + ir_node *right = get_binop_right(n); + ir_mode *mode = get_irn_mode(n); + ir_node *left; + ir_node *bitop_left; + ir_node *bitop_right; + ir_op *op_left; + ir_node *block; + dbg_info *dbgi; + ir_node *new_shift; + ir_node *new_bitop; + ir_node *new_const; + ir_tarval *tv1; + ir_tarval *tv2; + ir_tarval *tv_shift; + + if (is_irg_state(irg, IR_GRAPH_STATE_NORMALISATION2)) + return n; + + assert(is_Shrs(n) || is_Shr(n) || is_Shl(n) || is_Rotl(n)); + + if (!is_Const(right)) + return n; + + left = get_binop_left(n); + op_left = get_irn_op(left); + if (op_left != op_And && op_left != op_Or && op_left != op_Eor) + return n; + + /* doing it with Shrs is not legal if the Or/Eor affects the topmost bit */ + if (is_Shrs(n) && (op_left == op_Or || op_left == op_Eor)) { + /* TODO: test if sign bit is affectes */ + return n; + } + + bitop_right = get_binop_right(left); + if (!is_Const(bitop_right)) + return n; + + bitop_left = get_binop_left(left); + + block = get_nodes_block(n); + dbgi = get_irn_dbg_info(n); + tv1 = get_Const_tarval(bitop_right); + tv2 = get_Const_tarval(right); + + assert(get_tarval_mode(tv1) == mode); + + if (is_Shl(n)) { + new_shift = new_rd_Shl(dbgi, block, bitop_left, right, mode); + tv_shift = tarval_shl(tv1, tv2); + } else if (is_Shr(n)) { + new_shift = new_rd_Shr(dbgi, block, bitop_left, right, mode); + tv_shift = tarval_shr(tv1, tv2); + } else if (is_Shrs(n)) { + new_shift = new_rd_Shrs(dbgi, block, bitop_left, right, mode); + tv_shift = tarval_shrs(tv1, tv2); + } else { + assert(is_Rotl(n)); + new_shift = new_rd_Rotl(dbgi, block, bitop_left, right, mode); + tv_shift = tarval_rotl(tv1, tv2); + } + + assert(get_tarval_mode(tv_shift) == mode); + irg = get_irn_irg(n); + new_const = new_r_Const(irg, tv_shift); + + if (op_left == op_And) { + new_bitop = new_rd_And(dbgi, block, new_shift, new_const, mode); + } else if (op_left == op_Or) { + new_bitop = new_rd_Or(dbgi, block, new_shift, new_const, mode); + } else { + assert(op_left == op_Eor); + new_bitop = new_rd_Eor(dbgi, block, new_shift, new_const, mode); + } + + return new_bitop; +} + /** * normalisation: (x >> c1) & c2 to (x & (c2<> c1 * (we can use: * - and, or, xor instead of & * - Shl, Shr, Shrs, rotl instead of >> * (with a special case for Or/Xor + Shrs) + * + * This normalisation is usually good for the backend since << C can often be + * matched as address-mode. */ static ir_node *transform_node_bitop_shift(ir_node *n) { + ir_graph *irg = get_irn_irg(n); ir_node *left = get_binop_left(n); ir_node *right = get_binop_right(n); ir_mode *mode = get_irn_mode(n); @@ -2947,7 +3043,6 @@ static ir_node *transform_node_bitop_shift(ir_node *n) ir_node *block; dbg_info *dbg_bitop; dbg_info *dbg_shift; - ir_graph *irg; ir_node *new_bitop; ir_node *new_shift; ir_node *new_const; @@ -2955,6 +3050,9 @@ static ir_node *transform_node_bitop_shift(ir_node *n) ir_tarval *tv2; ir_tarval *tv_bitop; + if (!is_irg_state(irg, IR_GRAPH_STATE_NORMALISATION2)) + return n; + assert(is_And(n) || is_Or(n) || is_Eor(n)); if (!is_Const(right) || !is_shiftop(left)) return n; @@ -3766,6 +3864,7 @@ static ir_node *transform_node_Cmp(ir_node *n) ir_graph *irg = get_irn_irg(n); left = op_left; right = new_r_Const(irg, new_tv); + mode = get_irn_mode(left); changed = true; DBG_OPT_ALGSIM0(n, n, FS_OPT_CMP_CONV); } @@ -5048,7 +5147,7 @@ typedef ir_node*(*new_shift_func)(dbg_info *dbgi, ir_node *block, * then we can use that to minimize the value of Add(x, const) or * Sub(Const, x). In particular this often avoids 1 instruction in some * backends for the Shift(x, Sub(Const, y)) case because it can be replaced - * by Shift(x, Minus(y)) which doesnt't need an explicit Const constructed. + * by Shift(x, Minus(y)) which does not need an explicit Const constructed. */ static ir_node *transform_node_shift_modulo(ir_node *n, new_shift_func new_shift) @@ -5135,6 +5234,8 @@ static ir_node *transform_node_Shr(ir_node *n) n = transform_node_shift_modulo(n, new_rd_Shr); if (is_Shr(n)) n = transform_node_shl_shr(n); + if (is_Shr(n)) + n = transform_node_shift_bitop(n); return n; } /* transform_node_Shr */ @@ -5164,6 +5265,8 @@ static ir_node *transform_node_Shrs(ir_node *n) if (is_Shrs(n)) n = transform_node_shift_modulo(n, new_rd_Shrs); + if (is_Shrs(n)) + n = transform_node_shift_bitop(n); return n; } /* transform_node_Shrs */ @@ -5185,6 +5288,8 @@ static ir_node *transform_node_Shl(ir_node *n) n = transform_node_shift_modulo(n, new_rd_Shl); if (is_Shl(n)) n = transform_node_shl_shr(n); + if (is_Shl(n)) + n = transform_node_shift_bitop(n); return n; } /* transform_node_Shl */ @@ -5202,6 +5307,9 @@ static ir_node *transform_node_Rotl(ir_node *n) HANDLE_BINOP_PHI((eval_func) tarval_rotl, a, b, c, mode); n = transform_node_shift(n); + if (is_Rotl(n)) + n = transform_node_shift_bitop(n); + return n; } /* transform_node_Rotl */ @@ -5276,7 +5384,7 @@ static ir_node *transform_node_End(ir_node *n) /* no need to keep Bad */ if (is_Bad(ka)) continue; - /* dont keep unreachable code */ + /* do not keep unreachable code */ block = is_Block(ka) ? ka : get_nodes_block(ka); if (is_block_unreachable(block)) continue; @@ -5306,16 +5414,115 @@ int ir_is_negated_value(const ir_node *a, const ir_node *b) return false; } +static const ir_node *skip_upconv(const ir_node *node) +{ + while (is_Conv(node)) { + ir_mode *mode = get_irn_mode(node); + const ir_node *op = get_Conv_op(node); + ir_mode *op_mode = get_irn_mode(op); + if (!smaller_mode(op_mode, mode)) + break; + node = op; + } + return node; +} + +int ir_mux_is_abs(const ir_node *sel, const ir_node *mux_true, + const ir_node *mux_false) +{ + ir_node *cmp_left; + ir_node *cmp_right; + ir_mode *mode; + ir_relation relation; + + if (!is_Cmp(sel)) + return 0; + + /** + * Note further that these optimization work even for floating point + * with NaN's because -NaN == NaN. + * However, if +0 and -0 is handled differently, we cannot use the Abs/-Abs + * transformations. + */ + mode = get_irn_mode(mux_true); + if (mode_honor_signed_zeros(mode)) + return 0; + + /* must be <, <=, >=, > */ + relation = get_Cmp_relation(sel); + if ((relation & ir_relation_less_greater) == 0) + return 0; + + if (!ir_is_negated_value(mux_true, mux_false)) + return 0; + + mux_true = skip_upconv(mux_true); + mux_false = skip_upconv(mux_false); + + /* must be x cmp 0 */ + cmp_right = get_Cmp_right(sel); + if (!is_Const(cmp_right) || !is_Const_null(cmp_right)) + return 0; + + cmp_left = get_Cmp_left(sel); + if (cmp_left == mux_false) { + if (relation & ir_relation_less) { + return 1; + } else { + assert(relation & ir_relation_greater); + return -1; + } + } else if (cmp_left == mux_true) { + if (relation & ir_relation_less) { + return -1; + } else { + assert(relation & ir_relation_greater); + return 1; + } + } + + return 0; +} + +ir_node *ir_get_abs_op(const ir_node *sel, ir_node *mux_true, + ir_node *mux_false) +{ + ir_node *cmp_left = get_Cmp_left(sel); + return cmp_left == skip_upconv(mux_false) ? mux_false : mux_true; +} + /** * Optimize a Mux into some simpler cases. */ static ir_node *transform_node_Mux(ir_node *n) { - ir_node *oldn = n, *sel = get_Mux_sel(n); - ir_mode *mode = get_irn_mode(n); - ir_node *t = get_Mux_true(n); - ir_node *f = get_Mux_false(n); - ir_graph *irg = get_irn_irg(n); + ir_node *oldn = n; + ir_node *sel = get_Mux_sel(n); + ir_mode *mode = get_irn_mode(n); + ir_node *t = get_Mux_true(n); + ir_node *f = get_Mux_false(n); + ir_graph *irg = get_irn_irg(n); + + /* implement integer abs: abs(x) = x^(x >>s 31) - (x >>s 31) */ + if (get_mode_arithmetic(mode) == irma_twos_complement) { + int abs = ir_mux_is_abs(sel, t, f); + if (abs != 0) { + dbg_info *dbgi = get_irn_dbg_info(n); + ir_node *block = get_nodes_block(n); + ir_node *op = ir_get_abs_op(sel, t, f); + int bits = get_mode_size_bits(mode); + ir_node *shiftconst = new_r_Const_long(irg, mode_Iu, bits-1); + ir_node *sext = new_rd_Shrs(dbgi, block, op, shiftconst, mode); + ir_node *xorn = new_rd_Eor(dbgi, block, op, sext, mode); + ir_node *res; + if (abs > 0) { + res = new_rd_Sub(dbgi, block, xorn, sext, mode); + } else { + res = new_rd_Sub(dbgi, block, sext, xorn, mode); + } + return res; + } + } if (is_irg_state(irg, IR_GRAPH_STATE_KEEP_MUX)) return n; @@ -5626,7 +5833,7 @@ static ir_node *transform_node_Load(ir_node *n) ir_node *bad = new_r_Bad(irg, mode_X); ir_mode *mode = get_Load_mode(n); ir_node *res = new_r_Proj(pred_load, mode, pn_Load_res); - ir_node *in[pn_Load_max] = { mem, jmp, bad, res }; + ir_node *in[pn_Load_max+1] = { mem, res, jmp, bad }; ir_node *tuple = new_r_Tuple(block, ARRAY_SIZE(in), in); return tuple; } @@ -5646,7 +5853,7 @@ static ir_node *transform_node_Load(ir_node *n) ir_graph *irg = get_irn_irg(n); ir_node *bad = new_r_Bad(irg, mode_X); ir_node *res = value; - ir_node *in[pn_Load_max] = { mem, jmp, bad, res }; + ir_node *in[pn_Load_max+1] = { mem, res, jmp, bad }; ir_node *tuple = new_r_Tuple(block, ARRAY_SIZE(in), in); return tuple; } @@ -6109,10 +6316,23 @@ int identities_cmp(const void *elt, const void *key) /* for pinned nodes, the block inputs must be equal */ if (get_irn_n(a, -1) != get_irn_n(b, -1)) return 1; - } else if (! get_opt_global_cse()) { - /* for block-local CSE both nodes must be in the same Block */ - if (get_nodes_block(a) != get_nodes_block(b)) - return 1; + } else { + ir_node *block_a = get_nodes_block(a); + ir_node *block_b = get_nodes_block(b); + if (! get_opt_global_cse()) { + /* for block-local CSE both nodes must be in the same Block */ + if (block_a != block_b) + return 1; + } else { + /* The optimistic approach would be to do nothing here. + * However doing GCSE optimistically produces a lot of partially dead code which appears + * to be worse in practice than the missed opportunities. + * So we use a very conservative variant here and only CSE if 1 value dominates the + * other. */ + if (!block_dominates(block_a, block_b) + && !block_dominates(block_b, block_a)) + return 1; + } } /* compare a->in[0..ins] with b->in[0..ins] */