From: Michael Beck Date: Wed, 25 Jul 2007 14:25:42 +0000 (+0000) Subject: added ieee754 exact flag which allows more cases to be optimized X-Git-Url: http://nsz.repo.hu/git/?a=commitdiff_plain;h=8cc52245e109070a5e9094803f74cc6432a44b1a;p=libfirm added ieee754 exact flag which allows more cases to be optimized [r15334] --- diff --git a/include/libfirm/tv.h b/include/libfirm/tv.h index f41a42cb0..8a407c25d 100644 --- a/include/libfirm/tv.h +++ b/include/libfirm/tv.h @@ -609,6 +609,11 @@ int tarval_ieee754_get_exponent(tarval *tv); */ unsigned tarval_ieee754_set_immediate_precision(unsigned bits); +/** + * Returns non-zero if the result of the last IEEE-754 operation was exact. + */ +unsigned tarval_ieee754_get_exact(void); + /** * Enable/Disable floating point constant folding. */ diff --git a/ir/ir/iropt.c b/ir/ir/iropt.c index 609eb5901..5a329c8f8 100644 --- a/ir/ir/iropt.c +++ b/ir/ir/iropt.c @@ -2345,22 +2345,21 @@ static ir_node *transform_node_Quot(ir_node *n) { if (is_Const(b)) { tarval *tv = get_Const_tarval(b); - if (tarval_ieee754_zero_mantissa(tv)) { - tv = tarval_quo(get_mode_one(mode), tv); - if (tv != tarval_bad) { - ir_node *blk = get_irn_n(n, -1); - ir_node *c = new_r_Const(current_ir_graph, blk, mode, tv); - ir_node *a = get_Quot_left(n); - ir_node *m = new_rd_Mul(get_irn_dbg_info(n), current_ir_graph, blk, a, c, mode); - ir_node *mem = get_Quot_mem(n); - - turn_into_tuple(n, pn_Quot_max); - set_Tuple_pred(n, pn_Quot_M, mem); - set_Tuple_pred(n, pn_Quot_X_regular, new_r_Jmp(current_ir_graph, blk)); - set_Tuple_pred(n, pn_Quot_X_except, new_r_Bad(current_ir_graph)); - set_Tuple_pred(n, pn_Quot_res, m); - DBG_OPT_ALGSIM1(oldn, a, b, m, FS_OPT_FP_INV_MUL); - } + tv = tarval_quo(get_mode_one(mode), tv); + + if (tv != tarval_bad && tarval_ieee754_get_exact()) { + ir_node *blk = get_irn_n(n, -1); + ir_node *c = new_r_Const(current_ir_graph, blk, mode, tv); + ir_node *a = get_Quot_left(n); + ir_node *m = new_rd_Mul(get_irn_dbg_info(n), current_ir_graph, blk, a, c, mode); + ir_node *mem = get_Quot_mem(n); + + turn_into_tuple(n, pn_Quot_max); + set_Tuple_pred(n, pn_Quot_M, mem); + set_Tuple_pred(n, pn_Quot_X_regular, new_r_Jmp(current_ir_graph, blk)); + set_Tuple_pred(n, pn_Quot_X_except, new_r_Bad(current_ir_graph)); + set_Tuple_pred(n, pn_Quot_res, m); + DBG_OPT_ALGSIM1(oldn, a, b, m, FS_OPT_FP_INV_MUL); } } } diff --git a/ir/tv/fltcalc.c b/ir/tv/fltcalc.c index 4e498eacf..d9f7345ad 100644 --- a/ir/tv/fltcalc.c +++ b/ir/tv/fltcalc.c @@ -154,6 +154,9 @@ static int calc_buffer_size; static int value_size; static int max_precision; +/** Exact flag. */ +static int fc_exact = 1; + #if 0 static void fail_char(const char *str, unsigned int len, int pos) { if (*(str+pos)) @@ -221,7 +224,13 @@ static void *pack(const fp_value *int_float, void *packed) { return packed; } -static void normalize(const fp_value *in_val, fp_value *out_val, int sticky) { +/** + * Normalize a fp_value. + * + * @return non-zero if result is exact + */ +static int normalize(const fp_value *in_val, fp_value *out_val, int sticky) { + int exact = 1; int hsb; char lsb, guard, round, round_dir = 0; char *temp = alloca(value_size); @@ -236,7 +245,7 @@ static void normalize(const fp_value *in_val, fp_value *out_val, int sticky) { out_val->desc.clss = NORMAL; - /* mantissa all zeros, so zero exponent (because of explicit one)*/ + /* mantissa all zeros, so zero exponent (because of explicit one) */ if (hsb == 2 + in_val->desc.mantissa_size) { sc_val_from_ulong(0, _exp(out_val)); hsb = -1; @@ -250,8 +259,10 @@ static void normalize(const fp_value *in_val, fp_value *out_val, int sticky) { _shift_right(_mant(in_val), temp, _mant(out_val)); /* remember if some bits were shifted away */ - if (!sticky) sticky = sc_had_carry(); - + if (sc_had_carry()) { + exact = 0; + sticky = 1; + } sc_add(_exp(in_val), temp, _exp(out_val)); } else if (hsb > -1) { /* shift left */ @@ -271,7 +282,10 @@ static void normalize(const fp_value *in_val, fp_value *out_val, int sticky) { sc_sub(temp, _exp(out_val), NULL); _shift_right(_mant(out_val), sc_get_buffer(), _mant(out_val)); - if (!sticky) sticky = sc_had_carry(); + if (sc_had_carry()) { + exact = 0; + sticky = 1; + } /* denormalized means exponent of zero */ sc_val_from_ulong(0, _exp(out_val)); @@ -317,6 +331,7 @@ static void normalize(const fp_value *in_val, fp_value *out_val, int sticky) { if (lsb != 0) { sc_val_from_long(lsb, temp); sc_add(_mant(out_val), temp, _mant(out_val)); + exact = 0; } /* could have rounded down to zero */ @@ -328,7 +343,8 @@ static void normalize(const fp_value *in_val, fp_value *out_val, int sticky) { if ((out_val->desc.clss != SUBNORMAL) && (hsb < -1)) { sc_val_from_ulong(1, temp); _shift_right(_mant(out_val), temp, _mant(out_val)); - + if (exact && sc_had_carry()) + exact = 0; sc_add(_exp(out_val), temp, _exp(out_val)); } else if ((out_val->desc.clss == SUBNORMAL) && (hsb == -1)) { /* overflow caused the mantissa to be normal again, @@ -389,6 +405,7 @@ static void normalize(const fp_value *in_val, fp_value *out_val, int sticky) { } } } + return exact; } /** @@ -417,6 +434,8 @@ static void _fadd(const fp_value *a, const fp_value *b, fp_value *result) { char sign, res_sign; char sticky; + fc_exact = 1; + handle_NAN(a, b, result); /* make sure result has a descriptor */ @@ -487,6 +506,7 @@ static void _fadd(const fp_value *a, const fp_value *b, fp_value *result) { _shift_right(_mant(b), exp_diff, temp); sticky = sc_had_carry(); + fc_exact &= !sticky; if (sticky && sign) { /* if subtracting a little more than the represented value or adding a little @@ -516,16 +536,19 @@ static void _fadd(const fp_value *a, const fp_value *b, fp_value *result) { /* resulting exponent is the bigger one */ memmove(_exp(result), _exp(a), value_size); - normalize(result, result, sticky); + fc_exact &= normalize(result, result, sticky); } /** * calculate a * b */ static void _fmul(const fp_value *a, const fp_value *b, fp_value *result) { + int sticky; char *temp; char res_sign; + fc_exact = 1; + handle_NAN(a, b, result); temp = alloca(value_size); @@ -592,17 +615,22 @@ static void _fmul(const fp_value *a, const fp_value *b, fp_value *result) { sc_val_from_ulong(2 + result->desc.mantissa_size, temp); _shift_right(_mant(result), temp, _mant(result)); + sticky = sc_had_carry(); + fc_exact &= !sticky; - normalize(result, result, sc_had_carry()); + fc_exact &= normalize(result, result, sticky); } /** * calculate a / b */ static void _fdiv(const fp_value *a, const fp_value *b, fp_value *result) { + int sticky; char *temp, *dividend; char res_sign; + fc_exact = 1; + handle_NAN(a, b, result); temp = alloca(value_size); @@ -683,9 +711,11 @@ static void _fdiv(const fp_value *a, const fp_value *b, fp_value *result) { sc_val_from_ulong(1, divisor); _shift_right(_mant(b), divisor, divisor); sc_div(dividend, divisor, _mant(result)); + sticky = sc_had_carry(); + fc_exact &= !sticky; } - normalize(result, result, sc_had_carry()); + fc_exact &= normalize(result, result, sticky); } #if 0 @@ -747,6 +777,9 @@ static void _trunc(const fp_value *a, fp_value *result) { int exp_bias, exp_val; char *temp; + /* fixme: can be exact */ + fc_exact = 0; + temp = alloca(value_size); if (a != result) @@ -1600,3 +1633,7 @@ unsigned fc_set_immediate_precision(unsigned bits) { immediate_prec = bits; return old; } + +int fc_is_exact(void) { + return fc_exact; +} diff --git a/ir/tv/fltcalc.h b/ir/tv/fltcalc.h index 9a939be61..9d77031bd 100644 --- a/ir/tv/fltcalc.h +++ b/ir/tv/fltcalc.h @@ -256,6 +256,11 @@ unsigned char fc_sub_bits(const fp_value *val, unsigned num_bit, unsigned byte_o */ unsigned fc_set_immediate_precision(unsigned bits); +/** + * Returns non-zero if the result of the last operation was exact. + */ +int fc_is_exact(void); + void init_fltcalc(int precision); void finish_fltcalc(void); diff --git a/ir/tv/strcalc.c b/ir/tv/strcalc.c index e893bc679..698483e3f 100644 --- a/ir/tv/strcalc.c +++ b/ir/tv/strcalc.c @@ -1579,9 +1579,9 @@ void sc_mul(const void *value1, const void *value2, void *buffer) { } } -int sc_div(const void *value1, const void *value2, void *buffer) { +void sc_div(const void *value1, const void *value2, void *buffer) { /* temp buffer holding unused result of divmod */ - char *mod_res = alloca(calc_buffer_size); + char *unused_res = alloca(calc_buffer_size); CLEAR_BUFFER(calc_buffer); carry_flag = 0; @@ -1589,14 +1589,13 @@ int sc_div(const void *value1, const void *value2, void *buffer) { DEBUGPRINTF_COMPUTATION(("%s / ", sc_print_hex(value1))); DEBUGPRINTF_COMPUTATION(("%s -> ", sc_print_hex(value2))); - _divmod(value1, value2, calc_buffer, mod_res); + _divmod(value1, value2, calc_buffer, unused_res); DEBUGPRINTF_COMPUTATION(("%s\n", sc_print_hex(calc_buffer))); if ((buffer != NULL) && (buffer != calc_buffer)) { memcpy(buffer, calc_buffer, calc_buffer_size); } - return sc_is_zero(mod_res); } void sc_mod(const void *value1, const void *value2, void *buffer) { diff --git a/ir/tv/strcalc.h b/ir/tv/strcalc.h index cd0c002d2..dedcfbf8c 100644 --- a/ir/tv/strcalc.h +++ b/ir/tv/strcalc.h @@ -129,10 +129,8 @@ void sc_mul(const void *value1, const void *value2, void *buffer); /** * buffer = value1 / value2 - * - * @return non-zero if the remainder is null. */ -int sc_div(const void *value1, const void *value2, void *buffer); +void sc_div(const void *value1, const void *value2, void *buffer); /** * buffer = value1 % value2 diff --git a/ir/tv/tv.c b/ir/tv/tv.c index 770681b20..2daab3ace 100644 --- a/ir/tv/tv.c +++ b/ir/tv/tv.c @@ -1580,6 +1580,11 @@ unsigned tarval_ieee754_set_immediate_precision(unsigned bits) { return fc_set_immediate_precision(bits); } +/* Returns non-zero if the result of the last IEEE-754 operation was exact. */ +unsigned tarval_ieee754_get_exact(void) { + return fc_is_exact(); +} + /* * Sets the overflow mode for integer operations. */