X-Git-Url: http://nsz.repo.hu/git/?a=blobdiff_plain;f=ir%2Fir%2Firopt.c;h=dcbe3baec97dc1f3a70cc1cec51775c1a04d78af;hb=637542932dc27dcdfc7def09b58d9d5d4c34fb77;hp=31e2b21127904bf36b2b5d443de4b0db8007e2e9;hpb=4b76022c2d15da980982500340da6887007bd9b7;p=libfirm diff --git a/ir/ir/iropt.c b/ir/ir/iropt.c index 31e2b2112..dcbe3baec 100644 --- a/ir/ir/iropt.c +++ b/ir/ir/iropt.c @@ -6,7 +6,7 @@ * Modified by: Goetz Lindenmaier * Created: * CVS-ID: $Id$ - * Copyright: (c) 1998-2003 Universität Karlsruhe + * Copyright: (c) 1998-2005 Universität Karlsruhe * Licence: This file protected by GPL - GNU GENERAL PUBLIC LICENSE. */ @@ -26,6 +26,7 @@ # include "irnode_t.h" # include "irgraph_t.h" +# include "iredges_t.h" # include "irmode_t.h" # include "iropt_t.h" # include "ircons_t.h" @@ -38,6 +39,8 @@ # include "irhooks.h" # include "irarch.h" # include "hashptr.h" +# include "archop.h" +# include "opt_polymorphy.h" /* Make types visible to allow most efficient access */ # include "entity_t.h" @@ -58,7 +61,7 @@ follow_Id (ir_node *n) */ static tarval *computed_value_Const(ir_node *n) { - return get_Const_tarval(n); + return get_Const_tarval(n); } /** @@ -436,20 +439,22 @@ static tarval *computed_value_Proj(ir_node *n) ab = get_Cmp_right(a); proj_nr = get_Proj_proj(n); - if (aa == ab && !mode_is_float(get_irn_mode(aa))) { /* 1.: */ + if (aa == ab && ( + !mode_is_float(get_irn_mode(aa)) || proj_nr == pn_Cmp_Lt || proj_nr == pn_Cmp_Gt) + ) { /* 1.: */ /* BEWARE: a == a is NOT always True for floating Point!!! */ /* This is a trick with the bits used for encoding the Cmp Proj numbers, the following statement is not the same: - return new_tarval_from_long (proj_nr == Eq, mode_b) */ - return new_tarval_from_long (proj_nr & Eq, mode_b); + return new_tarval_from_long (proj_nr == pn_Cmp_Eq, mode_b) */ + return new_tarval_from_long (proj_nr & pn_Cmp_Eq, mode_b); } else { tarval *taa = value_of(aa); tarval *tab = value_of(ab); if ((taa != tarval_bad) && (tab != tarval_bad)) { /* 2.: */ /* strange checks... */ - pnc_number flags = tarval_cmp (taa, tab); - if (flags != False) { + pn_Cmp flags = tarval_cmp (taa, tab); + if (flags != pn_Cmp_False) { return new_tarval_from_long (proj_nr & flags, mode_b); } } else { /* check for 3.: */ @@ -477,7 +482,7 @@ static tarval *computed_value_Proj(ir_node *n) && (mode_is_reference(get_irn_mode(ab))) && (get_irn_op(aba) == op_Alloc))) /* 3.: */ - return new_tarval_from_long (proj_nr & Ne, mode_b); + return new_tarval_from_long (proj_nr & pn_Cmp_Ne, mode_b); } } break; @@ -599,9 +604,26 @@ different_identity (ir_node *a, ir_node *b) } #endif +/** + * Returns a equivalent block for another block. + * If the block has only one predecessor, this is + * the equivalent one. If the only predecessor of a block is + * the block itself, this is a dead block. + * + * If both predecessors of a block are the branches of a binary + * Cond, the equivalent block is Cond's block. + * + * If all predecessors of a block are bad or lies in a dead + * block, the current block is dead as well. + * + * Note, that blocks are NEVER turned into Bad's, instead + * the dead_block flag is set. So, never test for is_Bad(block), + * always use is_dead_Block(block). + */ static ir_node *equivalent_node_Block(ir_node *n) { ir_node *oldn = n; + int n_preds = get_Block_n_cfgpreds(n); /* The Block constructor does not call optimize, but mature_immBlock calls the optimization. */ @@ -613,8 +635,7 @@ static ir_node *equivalent_node_Block(ir_node *n) This should be true, as the block is matured before optimize is called. But what about Phi-cycles with the Phi0/Id that could not be resolved? Remaining Phi nodes are just Ids. */ - if ((get_Block_n_cfgpreds(n) == 1) && - (get_irn_op(get_Block_cfgpred(n, 0)) == op_Jmp)) { + if ((n_preds == 1) && (get_irn_op(get_Block_cfgpred(n, 0)) == op_Jmp)) { ir_node *predblock = get_nodes_block(get_Block_cfgpred(n, 0)); if (predblock == oldn) { /* Jmp jumps into the block it is in -- deal self cycle. */ @@ -625,7 +646,7 @@ static ir_node *equivalent_node_Block(ir_node *n) DBG_OPT_STG(oldn, n); } } - else if ((get_Block_n_cfgpreds(n) == 1) && + else if ((n_preds == 1) && (get_irn_op(skip_Proj(get_Block_cfgpred(n, 0))) == op_Cond)) { ir_node *predblock = get_nodes_block(get_Block_cfgpred(n, 0)); if (predblock == oldn) { @@ -634,7 +655,7 @@ static ir_node *equivalent_node_Block(ir_node *n) DBG_OPT_DEAD(oldn, n); } } - else if ((get_Block_n_cfgpreds(n) == 2) && + else if ((n_preds == 2) && (get_opt_control_flow_weak_simplification())) { /* Test whether Cond jumps twice to this block @@@ we could do this also with two loops finding two preds from several ones. */ @@ -766,6 +787,10 @@ static ir_node *equivalent_node_left_zero(ir_node *n) /** * Er, a "symmetic unop", ie op(op(n)) = n. + * + * @fixme -(-a) == a, but might overflow two times. + * We handle it anyway here but the better way would be a + * flag. This would be needed for Pascal for instance. */ static ir_node *equivalent_node_symmetric_unop(ir_node *n) { @@ -936,7 +961,7 @@ static ir_node *equivalent_node_Conv(ir_node *n) /** * A Cast may be removed if the type of the previous node - * is already to type of the Cast. + * is already the type of the Cast. */ static ir_node *equivalent_node_Cast(ir_node *n) { ir_node *pred = get_Cast_op(n); @@ -1082,17 +1107,96 @@ static ir_node *equivalent_node_Id(ir_node *n) */ static ir_node *equivalent_node_Mux(ir_node *n) { - ir_node *sel = get_Mux_sel(n); + ir_node *oldn = n, *sel = get_Mux_sel(n); tarval *ts = value_of(sel); - if (ts == get_tarval_b_true()) - return get_Mux_true(n); - else if (ts == get_tarval_b_false()) - return get_Mux_false(n); + /* Mux(true, f, t) == t */ + if (ts == get_tarval_b_true()) { + n = get_Mux_true(n); + DBG_OPT_ALGSIM0(oldn, n); + } + /* Mux(false, f, t) == f */ + else if (ts == get_tarval_b_false()) { + n = get_Mux_false(n); + DBG_OPT_ALGSIM0(oldn, n); + } + /* Mux(v, x, x) == x */ + else if (get_Mux_false(n) == get_Mux_true(n)) { + n = get_Mux_true(n); + DBG_OPT_ALGSIM0(oldn, n); + } + else if (get_irn_op(sel) == op_Proj && !mode_honor_signed_zeros(get_irn_mode(n))) { + ir_node *cmp = get_Proj_pred(sel); + long proj_nr = get_Proj_proj(sel); + ir_node *b = get_Mux_false(n); + ir_node *a = get_Mux_true(n); + + /* + * Note: normalization puts the constant on the right site, + * so we check only one case. + * + * Note further that these optimization work even for floating point + * with NaN's because -NaN == NaN. + * However, if +0 and -0 is handled differently, we cannot use the first one. + */ + if (get_irn_op(cmp) == op_Cmp && get_Cmp_left(cmp) == a) { + if (classify_Const(get_Cmp_right(cmp)) == CNST_NULL) { + /* Mux(a CMP 0, X, a) */ + if (get_irn_op(b) == op_Minus && get_Minus_op(b) == a) { + /* Mux(a CMP 0, -a, a) */ + if (proj_nr == pn_Cmp_Eq) { + /* Mux(a == 0, -a, a) ==> -a */ + n = b; + DBG_OPT_ALGSIM0(oldn, n); + } + else if (proj_nr == pn_Cmp_Lg || proj_nr == pn_Cmp_Ne) { + /* Mux(a != 0, -a, a) ==> a */ + n = a; + DBG_OPT_ALGSIM0(oldn, n); + } + } + else if (classify_Const(b) == CNST_NULL) { + /* Mux(a CMP 0, 0, a) */ + if (proj_nr == pn_Cmp_Lg || proj_nr == pn_Cmp_Ne) { + /* Mux(a != 0, 0, a) ==> a */ + n = a; + DBG_OPT_ALGSIM0(oldn, n); + } + else if (proj_nr == pn_Cmp_Eq) { + /* Mux(a == 0, 0, a) ==> 0 */ + n = b; + DBG_OPT_ALGSIM0(oldn, n); + } + } + } + } + } return n; } +/** + * Optimize -a CMP -b into b CMP a. + * This works only for for modes where unary Minus + * cannot Overflow. + * Note that two-complement integers can Overflow + * so it will NOT work. + */ +static ir_node *equivalent_node_Cmp(ir_node *n) +{ + ir_node *left = get_Cmp_left(n); + ir_node *right = get_Cmp_right(n); + + if (get_irn_op(left) == op_Minus && get_irn_op(right) == op_Minus && + !mode_overflow_on_unary_Minus(get_irn_mode(left))) { + left = get_Minus_op(left); + right = get_Minus_op(right); + set_Cmp_left(n, right); + set_Cmp_right(n, left); + } + return n; +} + /** * equivalent_node() returns a node equivalent to input n. It skips all nodes that * perform no actual computation, as, e.g., the Id nodes. It does not create @@ -1142,6 +1246,7 @@ static ir_op *firm_set_default_equivalent_node(ir_op *op) CASE(Proj); CASE(Id); CASE(Mux); + CASE(Cmp); default: op->equivalent_node = NULL; } @@ -1185,7 +1290,8 @@ optimize_preds(ir_node *n) { /** * Transform AddP(P, ConvIs(Iu)), AddP(P, ConvIu(Is)) and - * SubP(P, ConvIs(Iu)), SubP(P, ConvIu(Is)) if possible. + * SubP(P, ConvIs(Iu)), SubP(P, ConvIu(Is)). + * If possible, remove the Conv's. */ static ir_node *transform_node_AddSub(ir_node *n) { @@ -1245,14 +1351,70 @@ static ir_node *transform_node_AddSub(ir_node *n) return n; } -#define transform_node_Add transform_node_AddSub -#define transform_node_Sub transform_node_AddSub +/** + * Do the AddSub optimization, then Transform Add(a,a) into Mul(a, 2) + * if the mode is integer or float. + * Reassociation might fold this further. + */ +static ir_node *transform_node_Add(ir_node *n) +{ + ir_mode *mode; + ir_node *oldn = n; + + n = transform_node_AddSub(n); + + mode = get_irn_mode(n); + if (mode_is_num(mode)) { + ir_node *a = get_Add_left(n); + + if (a == get_Add_right(n)) { + ir_node *block = get_nodes_block(n); + + n = new_rd_Mul( + get_irn_dbg_info(n), + current_ir_graph, + block, + a, + new_r_Const_long(current_ir_graph, block, mode, 2), + mode); + DBG_OPT_ALGSIM0(oldn, n); + } + } + return n; +} + +/** + * Do the AddSub optimization, then Transform Sub(0,a) into Minus(a). + */ +static ir_node *transform_node_Sub(ir_node *n) +{ + ir_mode *mode; + ir_node *oldn = n; + + n = transform_node_AddSub(n); + + mode = get_irn_mode(n); + if (mode_is_num(mode) && (classify_Const(get_Sub_left(n)) == CNST_NULL)) { + n = new_rd_Minus( + get_irn_dbg_info(n), + current_ir_graph, + get_nodes_block(n), + get_Sub_right(n), + mode); + DBG_OPT_ALGSIM0(oldn, n); + } + + return n; +} /** Do architecture dependend optimizations on Mul nodes */ static ir_node *transform_node_Mul(ir_node *n) { return arch_dep_replace_mul_with_shifts(n); } +/** + * transform a Div Node + */ static ir_node *transform_node_Div(ir_node *n) { tarval *tv = value_of(n); @@ -1260,8 +1422,11 @@ static ir_node *transform_node_Div(ir_node *n) /* BEWARE: it is NOT possible to optimize a/a to 1, as this may cause a exception */ - if (tv != tarval_bad) + if (tv != tarval_bad) { value = new_Const(get_tarval_mode(tv), tv); + + DBG_OPT_CSTEVAL(n, value); + } else /* Try architecture dependand optimization */ value = arch_dep_replace_div_by_const(n); @@ -1277,6 +1442,9 @@ static ir_node *transform_node_Div(ir_node *n) return n; } +/** + * transform a Mod node + */ static ir_node *transform_node_Mod(ir_node *n) { tarval *tv = value_of(n); @@ -1284,8 +1452,11 @@ static ir_node *transform_node_Mod(ir_node *n) /* BEWARE: it is NOT possible to optimize a%a to 0, as this may cause a exception */ - if (tv != tarval_bad) + if (tv != tarval_bad) { value = new_Const(get_tarval_mode(tv), tv); + + DBG_OPT_CSTEVAL(n, value); + } else /* Try architecture dependand optimization */ value = arch_dep_replace_mod_by_const(n); @@ -1301,6 +1472,9 @@ static ir_node *transform_node_Mod(ir_node *n) return n; } +/** + * transform a DivMod node + */ static ir_node *transform_node_DivMod(ir_node *n) { int evaluated = 0; @@ -1320,7 +1494,10 @@ static ir_node *transform_node_DivMod(ir_node *n) if (tb == get_mode_one(get_tarval_mode(tb))) { b = new_Const (mode, get_mode_null(mode)); evaluated = 1; - } else if (ta != tarval_bad) { + + DBG_OPT_CSTEVAL(n, b); + } + else if (ta != tarval_bad) { tarval *resa, *resb; resa = tarval_div (ta, tb); if (resa == tarval_bad) return n; /* Causes exception!!! Model by replacing through @@ -1330,6 +1507,9 @@ static ir_node *transform_node_DivMod(ir_node *n) a = new_Const (mode, resa); b = new_Const (mode, resb); evaluated = 1; + + DBG_OPT_CSTEVAL(n, a); + DBG_OPT_CSTEVAL(n, b); } else { /* Try architecture dependand optimization */ arch_dep_replace_divmod_by_const(&a, &b, n); @@ -1354,6 +1534,9 @@ static ir_node *transform_node_DivMod(ir_node *n) return n; } +/** + * transform a Cond node + */ static ir_node *transform_node_Cond(ir_node *n) { /* Replace the Cond by a Jmp if it branches on a constant @@ -1411,6 +1594,7 @@ static ir_node *transform_node_Cond(ir_node *n) */ static ir_node *transform_node_Eor(ir_node *n) { + ir_node *oldn = n; ir_node *a = get_Eor_left(n); ir_node *b = get_Eor_right(n); @@ -1418,16 +1602,22 @@ static ir_node *transform_node_Eor(ir_node *n) && (get_irn_op(a) == op_Proj) && (get_irn_mode(a) == mode_b) && (classify_tarval (value_of(b)) == TV_CLASSIFY_ONE) - && (get_irn_op(get_Proj_pred(a)) == op_Cmp)) + && (get_irn_op(get_Proj_pred(a)) == op_Cmp)) { /* The Eor negates a Cmp. The Cmp has the negated result anyways! */ n = new_r_Proj(current_ir_graph, get_nodes_block(n), get_Proj_pred(a), mode_b, get_negated_pnc(get_Proj_proj(a))); + + DBG_OPT_ALGSIM0(oldn, n); + } else if ((get_irn_mode(n) == mode_b) - && (classify_tarval (value_of(b)) == TV_CLASSIFY_ONE)) + && (classify_tarval (value_of(b)) == TV_CLASSIFY_ONE)) { /* The Eor is a Not. Replace it by a Not. */ /* ????!!!Extend to bitfield 1111111. */ n = new_r_Not(current_ir_graph, get_nodes_block(n), a, mode_b); + DBG_OPT_ALGSIM0(oldn, n); + } + return n; } @@ -1436,15 +1626,18 @@ static ir_node *transform_node_Eor(ir_node *n) */ static ir_node *transform_node_Not(ir_node *n) { + ir_node *oldn = n; ir_node *a = get_Not_op(n); if ( (get_irn_mode(n) == mode_b) && (get_irn_op(a) == op_Proj) && (get_irn_mode(a) == mode_b) - && (get_irn_op(get_Proj_pred(a)) == op_Cmp)) + && (get_irn_op(get_Proj_pred(a)) == op_Cmp)) { /* We negate a Cmp. The Cmp has the negated result anyways! */ n = new_r_Proj(current_ir_graph, get_nodes_block(n), get_Proj_pred(a), mode_b, get_negated_pnc(get_Proj_proj(a))); + DBG_OPT_ALGSIM0(oldn, n); + } return n; } @@ -1453,26 +1646,33 @@ static ir_node *transform_node_Not(ir_node *n) * Transform a Cast of a Const into a new Const */ static ir_node *transform_node_Cast(ir_node *n) { + ir_node *oldn = n; ir_node *pred = get_Cast_op(n); - type *tp = get_irn_type(pred); + type *tp = get_irn_type(n); if (get_irn_op(pred) == op_Const && get_Const_type(pred) != tp) { n = new_rd_Const_type(NULL, current_ir_graph, get_nodes_block(pred), get_irn_mode(pred), get_Const_tarval(pred), tp); + DBG_OPT_CSTEVAL(oldn, n); } else if ((get_irn_op(pred) == op_SymConst) && (get_SymConst_value_type(pred) != tp)) { n = new_rd_SymConst_type(NULL, current_ir_graph, get_nodes_block(pred), get_SymConst_symbol(pred), get_SymConst_kind(pred), tp); + DBG_OPT_CSTEVAL(oldn, n); } + return n; } /** - * Transform a Div/Mod/DivMod with a non-zero constant. Must be - * done here instead of equivalent node because it creates new - * nodes. + * Does all optimizations on nodes that must be done on it's Proj's + * because of creating new nodes. + * + * Transform a Div/Mod/DivMod with a non-zero constant. * Removes the exceptions and routes the memory to the NoMem node. * - * Further, it optimizes jump tables by removing all impossible cases. + * Optimizes jump tables by removing all impossible cases. + * + * Normalizes and optimizes Cmp nodes. */ static ir_node *transform_node_Proj(ir_node *proj) { @@ -1572,6 +1772,177 @@ static ir_node *transform_node_Proj(ir_node *proj) } return proj; + case iro_Cmp: + if (get_opt_reassociation()) { + ir_node *left = get_Cmp_left(n); + ir_node *right = get_Cmp_right(n); + ir_node *c = NULL; + tarval *tv = NULL; + int changed = 0; + ir_mode *mode = NULL; + + proj_nr = get_Proj_proj(proj); + + /* + * First step: normalize the compare op + * by placing the constant on the right site + * or moving the lower address node to the left. + * We ignore the case that both are constants, then + * this compare should be optimized away. + */ + if (get_irn_op(right) == op_Const) + c = right; + else if (get_irn_op(left) == op_Const) { + c = left; + left = right; + right = c; + + proj_nr = get_swapped_pnc(proj_nr); + changed |= 1; + } + else if (left > right) { + ir_node *t = left; + + left = right; + right = t; + + proj_nr = get_swapped_pnc(proj_nr); + changed |= 1; + } + + /* + * Second step: Try to reduce the magnitude + * of a constant. This may help to generate better code + * later and may help to normalize more compares. + * Of course this is only possible for integer values. + */ + if (c) { + mode = get_irn_mode(c); + tv = get_Const_tarval(c); + + if (tv != tarval_bad) { + /* the following optimization is possibe on modes without Overflow + * on Unary Minus or on == and !=: + * -a CMP c ==> a swap(CMP) -c + * + * Beware: for two-complement Overflow may occur, so only == and != can + * be optimized, see this: + * -MININT < 0 =/=> MININT > 0 !!! + */ + if (get_opt_constant_folding() && get_irn_op(left) == op_Minus && + (!mode_overflow_on_unary_Minus(mode) || + (mode_is_int(mode) && (proj_nr == pn_Cmp_Eq || proj_nr == pn_Cmp_Lg)))) { + left = get_Minus_op(left); + tv = tarval_sub(get_tarval_null(mode), tv); + + proj_nr = get_swapped_pnc(proj_nr); + changed |= 2; + } + + /* for integer modes, we have more */ + if (mode_is_int(mode)) { + /* Ne includes Unordered which is not possible on integers. + * However, frontends often use this wrong, so fix it here */ + if (proj_nr == pn_Cmp_Ne) { + proj_nr = pn_Cmp_Lg; + set_Proj_proj(proj, proj_nr); + } + + /* c > 0 : a < c ==> a <= (c-1) a >= c ==> a > (c-1) */ + if ((proj_nr == pn_Cmp_Lt || proj_nr == pn_Cmp_Ge) && + tarval_cmp(tv, get_tarval_null(mode)) == pn_Cmp_Gt) { + tv = tarval_sub(tv, get_tarval_one(mode)); + + proj_nr ^= pn_Cmp_Eq; + changed |= 2; + } + /* c < 0 : a > c ==> a >= (c+1) a <= c ==> a < (c+1) */ + else if ((proj_nr == pn_Cmp_Gt || proj_nr == pn_Cmp_Le) && + tarval_cmp(tv, get_tarval_null(mode)) == pn_Cmp_Lt) { + tv = tarval_add(tv, get_tarval_one(mode)); + + proj_nr ^= pn_Cmp_Eq; + changed |= 2; + } + + /* the following reassociations work only for == and != */ + + /* a-b == 0 ==> a == b, a-b != 0 ==> a != b */ + if (classify_tarval(tv) == TV_CLASSIFY_NULL && get_irn_op(left) == op_Sub) { + if (proj_nr == pn_Cmp_Eq || proj_nr == pn_Cmp_Lg) { + right = get_Sub_right(left); + left = get_Sub_left(left); + + tv = value_of(right); + changed = 1; + } + } + + if ((tv != tarval_bad) && (proj_nr == pn_Cmp_Eq || proj_nr == pn_Cmp_Lg)) { + ir_op *op = get_irn_op(left); + + /* a-c1 == c2 ==> a == c2+c1, a-c1 != c2 ==> a != c2+c1 */ + if (op == op_Sub) { + ir_node *c1 = get_Sub_right(left); + tarval *tv2 = value_of(c1); + + if (tv2 != tarval_bad) { + tv2 = tarval_add(tv, value_of(c1)); + + if (tv2 != tarval_bad) { + left = get_Sub_left(left); + tv = tv2; + changed = 2; + } + } + } + /* a+c1 == c2 ==> a == c2-c1, a+c1 != c2 ==> a != c2-c1 */ + else if (op == op_Add) { + ir_node *a_l = get_Add_left(left); + ir_node *a_r = get_Add_right(left); + ir_node *a; + tarval *tv2; + + if (get_irn_op(a_l) == op_Const) { + a = a_r; + tv2 = value_of(a_l); + } + else { + a = a_l; + tv2 = value_of(a_r); + } + + if (tv2 != tarval_bad) { + tv2 = tarval_sub(tv, tv2); + + if (tv2 != tarval_bad) { + left = a; + tv = tv2; + changed = 2; + } + } + } + } + } + } + } + + if (changed) { + ir_node *block = get_nodes_block(n); + + if (changed & 2) /* need a new Const */ + right = new_Const(mode, tv); + + /* create a new compare */ + n = new_rd_Cmp(get_irn_dbg_info(n), current_ir_graph, block, + left, right); + + set_Proj_pred(proj, n); + set_Proj_proj(proj, proj_nr); + } + } + return proj; + case iro_Tuple: /* should not happen, but if it does will be optimized away */ break; @@ -1616,7 +1987,7 @@ static void get_comm_Binop_Ops(ir_node *binop, ir_node **a, ir_node **c) * AND c1 * OR */ -static ir_node *transform_node_Or(ir_node *or) +static ir_node *transform_node_Or_bf_store(ir_node *or) { ir_node *and, *c1; ir_node *or_l, *c2; @@ -1683,7 +2054,129 @@ static ir_node *transform_node_Or(ir_node *or) set_Or_right(or, new_const); /* check for more */ - return transform_node_Or(or); + return transform_node_Or_bf_store(or); +} + +/** + * Optimize an Or(shl(x, c), shr(x, bits - c)) into a Rot + */ +static ir_node *transform_node_Or_Rot(ir_node *or) +{ + ir_mode *mode = get_irn_mode(or); + ir_node *shl, *shr, *block; + ir_node *irn, *x, *c1, *c2, *v, *sub, *n; + tarval *tv1, *tv2; + + if (! mode_is_int(mode)) + return or; + + shl = get_binop_left(or); + shr = get_binop_right(or); + + if (get_irn_op(shl) == op_Shr) { + if (get_irn_op(shr) != op_Shl) + return or; + + irn = shl; + shl = shr; + shr = irn; + } + else if (get_irn_op(shl) != op_Shl) + return or; + else if (get_irn_op(shr) != op_Shr) + return or; + + x = get_Shl_left(shl); + if (x != get_Shr_left(shr)) + return or; + + c1 = get_Shl_right(shl); + c2 = get_Shr_right(shr); + if (get_irn_op(c1) == op_Const && get_irn_op(c2) == op_Const) { + tv1 = get_Const_tarval(c1); + if (! tarval_is_long(tv1)) + return or; + + tv2 = get_Const_tarval(c2); + if (! tarval_is_long(tv2)) + return or; + + if (get_tarval_long(tv1) + get_tarval_long(tv2) + != get_mode_size_bits(mode)) + return or; + + /* yet, condition met */ + block = get_nodes_block(or); + + n = new_r_Rot(current_ir_graph, block, x, c1, mode); + + DBG_OPT_ALGSIM1(or, shl, shr, n); + return n; + } + else if (get_irn_op(c1) == op_Sub) { + v = c2; + sub = c1; + + if (get_Sub_right(sub) != v) + return or; + + c1 = get_Sub_left(sub); + if (get_irn_op(c1) != op_Const) + return or; + + tv1 = get_Const_tarval(c1); + if (! tarval_is_long(tv1)) + return or; + + if (get_tarval_long(tv1) != get_mode_size_bits(mode)) + return or; + + /* yet, condition met */ + block = get_nodes_block(or); + + /* a Rot right is not supported, so use a rot left */ + n = new_r_Rot(current_ir_graph, block, x, sub, mode); + + DBG_OPT_ALGSIM0(or, n); + return n; + } + else if (get_irn_op(c2) == op_Sub) { + v = c1; + sub = c2; + + c1 = get_Sub_left(sub); + if (get_irn_op(c1) != op_Const) + return or; + + tv1 = get_Const_tarval(c1); + if (! tarval_is_long(tv1)) + return or; + + if (get_tarval_long(tv1) != get_mode_size_bits(mode)) + return or; + + /* yet, condition met */ + block = get_nodes_block(or); + + /* a Rot Left */ + n = new_r_Rot(current_ir_graph, block, x, v, mode); + + DBG_OPT_ALGSIM0(or, n); + return n; + } + + return or; +} + +/** + * Optimize an Or + */ +static ir_node *transform_node_Or(ir_node *or) +{ + or = transform_node_Or_bf_store(or); + or = transform_node_Or_Rot(or); + + return or; } /* forward */ @@ -1692,9 +2185,9 @@ static ir_node *transform_node(ir_node *n); /** * Optimize (a >> c1) >> c2), works for Shr, Shrs, Shl */ -static ir_node * transform_node_shift(ir_node *n) +static ir_node *transform_node_shift(ir_node *n) { - ir_node *left; + ir_node *left, *right; tarval *tv1, *tv2, *res; ir_mode *mode; int modulo_shf, flag; @@ -1705,7 +2198,8 @@ static ir_node * transform_node_shift(ir_node *n) if (get_irn_op(left) != get_irn_op(n)) return n; - tv1 = value_of(get_binop_right(n)); + right = get_binop_right(n); + tv1 = value_of(right); if (tv1 == tarval_bad) return n; @@ -1724,7 +2218,7 @@ static ir_node * transform_node_shift(ir_node *n) if (modulo_shf > 0) { tarval *modulo = new_tarval_from_long(modulo_shf, get_tarval_mode(res)); - if (tarval_cmp(res, modulo) & Lt) + if (tarval_cmp(res, modulo) & pn_Cmp_Lt) flag = 1; } else @@ -1739,16 +2233,23 @@ static ir_node * transform_node_shift(ir_node *n) irn = new_ir_node(NULL, current_ir_graph, block, get_irn_op(n), mode, 2, in); + DBG_OPT_ALGSIM0(n, irn); + return transform_node(irn); } return n; } -static ir_node * transform_node_End(ir_node *n) { +#define transform_node_Shr transform_node_shift +#define transform_node_Shrs transform_node_shift +#define transform_node_Shl transform_node_shift + +/** + * Remove dead blocks in keepalive list. We do not generate a new End node. + */ +static ir_node *transform_node_End(ir_node *n) { int i, n_keepalives = get_End_n_keepalives(n); - /* Remove dead blocks in keepalive list. - We do not generate a new End node. */ for (i = 0; i < n_keepalives; ++i) { ir_node *ka = get_End_keepalive(n, i); if (is_Block(ka) && is_Block_dead(ka)) @@ -1757,6 +2258,139 @@ static ir_node * transform_node_End(ir_node *n) { return n; } +/** + * Optimize a Mux into some simplier cases. + */ +static ir_node *transform_node_Mux(ir_node *n) +{ + ir_node *oldn = n, *sel = get_Mux_sel(n); + ir_mode *mode = get_irn_mode(n); + + if (get_irn_op(sel) == op_Proj && !mode_honor_signed_zeros(mode)) { + ir_node *cmp = get_Proj_pred(sel); + long proj_nr = get_Proj_proj(sel); + ir_node *f = get_Mux_false(n); + ir_node *t = get_Mux_true(n); + + if (get_irn_op(cmp) == op_Cmp && classify_Const(get_Cmp_right(cmp)) == CNST_NULL) { + ir_node *block = get_nodes_block(n); + + /* + * Note: normalization puts the constant on the right site, + * so we check only one case. + * + * Note further that these optimization work even for floating point + * with NaN's because -NaN == NaN. + * However, if +0 and -0 is handled differently, we cannot use the first one. + */ + if (get_irn_op(f) == op_Minus && + get_Minus_op(f) == t && + get_Cmp_left(cmp) == t) { + + if (proj_nr == pn_Cmp_Ge || proj_nr == pn_Cmp_Gt) { + /* Mux(a >=/> 0, -a, a) ==> Abs(a) */ + n = new_rd_Abs(get_irn_dbg_info(n), + current_ir_graph, + block, + t, mode); + DBG_OPT_ALGSIM1(oldn, cmp, sel, n); + return n; + } + else if (proj_nr == pn_Cmp_Le || proj_nr == pn_Cmp_Lt) { + /* Mux(a <=/< 0, -a, a) ==> Minus(Abs(a)) */ + n = new_rd_Abs(get_irn_dbg_info(n), + current_ir_graph, + block, + t, mode); + n = new_rd_Minus(get_irn_dbg_info(n), + current_ir_graph, + block, + n, mode); + + DBG_OPT_ALGSIM1(oldn, cmp, sel, n); + return n; + } + } + else if (get_irn_op(t) == op_Minus && + get_Minus_op(t) == f && + get_Cmp_left(cmp) == f) { + + if (proj_nr == pn_Cmp_Le || proj_nr == pn_Cmp_Lt) { + /* Mux(a <=/< 0, a, -a) ==> Abs(a) */ + n = new_rd_Abs(get_irn_dbg_info(n), + current_ir_graph, + block, + f, mode); + DBG_OPT_ALGSIM1(oldn, cmp, sel, n); + return n; + } + else if (proj_nr == pn_Cmp_Ge || proj_nr == pn_Cmp_Gt) { + /* Mux(a >=/> 0, a, -a) ==> Minus(Abs(a)) */ + n = new_rd_Abs(get_irn_dbg_info(n), + current_ir_graph, + block, + f, mode); + n = new_rd_Minus(get_irn_dbg_info(n), + current_ir_graph, + block, + n, mode); + + DBG_OPT_ALGSIM1(oldn, cmp, sel, n); + return n; + } + } + + if (mode_is_int(mode) && mode_is_signed(mode) && + get_mode_arithmetic(mode) == irma_twos_complement) { + ir_node *x = get_Cmp_left(cmp); + + /* the following optimization works only with signed integer two-complement mode */ + + if (mode == get_irn_mode(x)) { + /* + * FIXME: this restriction is two rigid, as it would still + * work if mode(x) = Hs and mode == Is, but at least it removes + * all wrong cases. + */ + if ((proj_nr == pn_Cmp_Lt || proj_nr == pn_Cmp_Le) && + classify_Const(t) == CNST_ALL_ONE && + classify_Const(f) == CNST_NULL) { + /* + * Mux(x:T Shrs(x, sizeof_bits(T) - 1) + * Conditions: + * T must be signed. + */ + n = new_rd_Shrs(get_irn_dbg_info(n), + current_ir_graph, block, x, + new_r_Const_long(current_ir_graph, block, mode_Iu, + get_mode_size_bits(mode) - 1), + mode); + DBG_OPT_ALGSIM1(oldn, cmp, sel, n); + return n; + } + else if ((proj_nr == pn_Cmp_Gt || proj_nr == pn_Cmp_Ge) && + classify_Const(t) == CNST_ONE && + classify_Const(f) == CNST_NULL) { + /* + * Mux(x:T >/>= 0, 0, 1) -> Shr(-x, sizeof_bits(T) - 1) + * Conditions: + * T must be signed. + */ + n = new_rd_Shr(get_irn_dbg_info(n), + current_ir_graph, block, + new_r_Minus(current_ir_graph, block, x, mode), + new_r_Const_long(current_ir_graph, block, mode_Iu, + get_mode_size_bits(mode) - 1), + mode); + DBG_OPT_ALGSIM1(oldn, cmp, sel, n); + return n; + } + } + } + } + } + return arch_transform_node_Mux(n); +} /** * Tries several [inplace] [optimizing] transformations and returns an @@ -1793,15 +2427,15 @@ static ir_op *firm_set_default_transform_node(ir_op *op) CASE(Not); CASE(Cast); CASE(Proj); + CASE(Sel); CASE(Or); + CASE(Shr); + CASE(Shrs); + CASE(Shl); CASE(End); - case iro_Shr: - case iro_Shrs: - case iro_Shl: - op->transform_node = transform_node_shift; - break; + CASE(Mux); default: - op->transform_node = NULL; + op->transform_node = NULL; } return op; @@ -2181,94 +2815,103 @@ gigo (ir_node *node) ir_node * optimize_node (ir_node *n) { - tarval *tv; - ir_node *oldn = n; - opcode iro = get_irn_opcode(n); - - type *old_tp = get_irn_type(n); - { - int i, arity = get_irn_arity(n); - for (i = 0; i < arity && !old_tp; ++i) - old_tp = get_irn_type(get_irn_n(n, i)); - } - - /* Allways optimize Phi nodes: part of the construction. */ - if ((!get_opt_optimize()) && (iro != iro_Phi)) return n; - - /* constant expression evaluation / constant folding */ - if (get_opt_constant_folding()) { - /* constants can not be evaluated */ - if (iro != iro_Const) { - /* try to evaluate */ - tv = computed_value(n); - if ((get_irn_mode(n) != mode_T) && (tv != tarval_bad)) { - /* - * we MUST copy the node here temporary, because it's still needed - * for DBG_OPT_CSTEVAL - */ - int node_size = offsetof(ir_node, attr) + n->op->attr_size; - oldn = alloca(node_size); - - memcpy(oldn, n, node_size); - CLONE_ARR_A(ir_node *, oldn->in, n->in); - - /* ARG, copy the in array, we need it for statistics */ - memcpy(oldn->in, n->in, ARR_LEN(n->in) * sizeof(n->in[0])); - - /* evaluation was successful -- replace the node. */ - obstack_free (current_ir_graph->obst, n); - n = new_Const (get_tarval_mode (tv), tv); - if (old_tp && get_type_mode(old_tp) == get_tarval_mode (tv)) - set_Const_type(n, old_tp); - DBG_OPT_CSTEVAL(oldn, n); - return n; - } - } - } - - /* remove unnecessary nodes */ - if (get_opt_constant_folding() || - (iro == iro_Phi) || /* always optimize these nodes. */ - (iro == iro_Id) || - (iro == iro_Proj) || - (iro == iro_Block) ) /* Flags tested local. */ - n = equivalent_node (n); - - optimize_preds(n); /* do node specific optimizations of nodes predecessors. */ - - /** common subexpression elimination **/ - /* Checks whether n is already available. */ - /* The block input is used to distinguish different subexpressions. Right - now all nodes are op_pin_state_pinned to blocks, i.e., the cse only finds common - subexpressions within a block. */ - if (get_opt_cse()) - n = identify_cons (current_ir_graph->value_table, n); - - if (n != oldn) { - /* We found an existing, better node, so we can deallocate the old node. */ - obstack_free (current_ir_graph->obst, oldn); - - return n; - } - - /* Some more constant expression evaluation that does not allow to - free the node. */ - iro = get_irn_opcode(n); - if (get_opt_constant_folding() || - (iro == iro_Cond) || - (iro == iro_Proj)) /* Flags tested local. */ - n = transform_node (n); - - /* Remove nodes with dead (Bad) input. - Run always for transformation induced Bads. */ - n = gigo (n); - - /* Now we have a legal, useful node. Enter it in hash table for cse */ - if (get_opt_cse() && (get_irn_opcode(n) != iro_Block)) { - n = identify_remember (current_ir_graph->value_table, n); - } - - return n; + tarval *tv; + ir_node *oldn = n; + opcode iro = get_irn_opcode(n); + + type *old_tp = get_irn_type(n); + { + int i, arity = get_irn_arity(n); + for (i = 0; i < arity && !old_tp; ++i) + old_tp = get_irn_type(get_irn_n(n, i)); + } + + /* Always optimize Phi nodes: part of the construction. */ + if ((!get_opt_optimize()) && (iro != iro_Phi)) return n; + + /* constant expression evaluation / constant folding */ + if (get_opt_constant_folding()) { + /* constants can not be evaluated */ + if (iro != iro_Const) { + /* try to evaluate */ + tv = computed_value(n); + if ((get_irn_mode(n) != mode_T) && (tv != tarval_bad)) { + ir_node *nw; + + /* + * we MUST copy the node here temporary, because it's still needed + * for DBG_OPT_CSTEVAL + */ + int node_size = offsetof(ir_node, attr) + n->op->attr_size; + oldn = alloca(node_size); + + memcpy(oldn, n, node_size); + CLONE_ARR_A(ir_node *, oldn->in, n->in); + + /* ARG, copy the in array, we need it for statistics */ + memcpy(oldn->in, n->in, ARR_LEN(n->in) * sizeof(n->in[0])); + + + edges_node_deleted(n, current_ir_graph); + + /* evaluation was successful -- replace the node. */ + obstack_free (current_ir_graph->obst, n); + nw = new_Const (get_tarval_mode (tv), tv); + + if (old_tp && get_type_mode(old_tp) == get_tarval_mode (tv)) + set_Const_type(nw, old_tp); + DBG_OPT_CSTEVAL(oldn, nw); + return nw; + } + } + } + + /* remove unnecessary nodes */ + if (get_opt_constant_folding() || + (iro == iro_Phi) || /* always optimize these nodes. */ + (iro == iro_Id) || + (iro == iro_Proj) || + (iro == iro_Block) ) /* Flags tested local. */ + n = equivalent_node (n); + + optimize_preds(n); /* do node specific optimizations of nodes predecessors. */ + + /** common subexpression elimination **/ + /* Checks whether n is already available. */ + /* The block input is used to distinguish different subexpressions. Right + now all nodes are op_pin_state_pinned to blocks, i.e., the cse only finds common + subexpressions within a block. */ + if (get_opt_cse()) + n = identify_cons (current_ir_graph->value_table, n); + + if (n != oldn) { + edges_node_deleted(oldn, current_ir_graph); + + /* We found an existing, better node, so we can deallocate the old node. */ + obstack_free (current_ir_graph->obst, oldn); + + return n; + } + + /* Some more constant expression evaluation that does not allow to + free the node. */ + iro = get_irn_opcode(n); + if (get_opt_constant_folding() || + (iro == iro_Cond) || + (iro == iro_Proj) || + (iro == iro_Sel)) /* Flags tested local. */ + n = transform_node (n); + + /* Remove nodes with dead (Bad) input. + Run always for transformation induced Bads. */ + n = gigo (n); + + /* Now we have a legal, useful node. Enter it in hash table for cse */ + if (get_opt_cse() && (get_irn_opcode(n) != iro_Block)) { + n = identify_remember (current_ir_graph->value_table, n); + } + + return n; } @@ -2342,7 +2985,8 @@ optimize_in_place_2 (ir_node *n) iro = get_irn_opcode(n); if (get_opt_constant_folding() || (iro == iro_Cond) || - (iro == iro_Proj)) /* Flags tested local. */ + (iro == iro_Proj) || + (iro == iro_Sel)) /* Flags tested local. */ n = transform_node (n); /* Remove nodes with dead (Bad) input. @@ -2391,6 +3035,7 @@ ir_op *firm_set_default_operations(ir_op *op) op = firm_set_default_equivalent_node(op); op = firm_set_default_transform_node(op); op = firm_set_default_node_cmp_attr(op); + op = firm_set_default_get_type(op); return op; }