From: Michael Beck <beck@ipd.info.uni-karlsruhe.de>
Date: Wed, 25 Jul 2007 14:25:42 +0000 (+0000)
Subject: added ieee754 exact flag which allows more cases to be optimized
X-Git-Url: http://nsz.repo.hu/git/?a=commitdiff_plain;h=8cc52245e109070a5e9094803f74cc6432a44b1a;p=libfirm

added ieee754 exact flag which allows more cases to be optimized

[r15334]
---

diff --git a/include/libfirm/tv.h b/include/libfirm/tv.h
index f41a42cb0..8a407c25d 100644
--- a/include/libfirm/tv.h
+++ b/include/libfirm/tv.h
@@ -609,6 +609,11 @@ int tarval_ieee754_get_exponent(tarval *tv);
  */
 unsigned tarval_ieee754_set_immediate_precision(unsigned bits);
 
+/**
+ *  Returns non-zero if the result of the last IEEE-754 operation was exact.
+ */
+unsigned tarval_ieee754_get_exact(void);
+
 /**
  * Enable/Disable floating point constant folding.
  */
diff --git a/ir/ir/iropt.c b/ir/ir/iropt.c
index 609eb5901..5a329c8f8 100644
--- a/ir/ir/iropt.c
+++ b/ir/ir/iropt.c
@@ -2345,22 +2345,21 @@ static ir_node *transform_node_Quot(ir_node *n) {
 		if (is_Const(b)) {
 			tarval *tv = get_Const_tarval(b);
 
-			if (tarval_ieee754_zero_mantissa(tv)) {
-				tv = tarval_quo(get_mode_one(mode), tv);
-				if (tv != tarval_bad) {
-					ir_node *blk = get_irn_n(n, -1);
-					ir_node *c = new_r_Const(current_ir_graph, blk, mode, tv);
-					ir_node *a = get_Quot_left(n);
-					ir_node *m = new_rd_Mul(get_irn_dbg_info(n), current_ir_graph, blk, a, c, mode);
-					ir_node *mem = get_Quot_mem(n);
-
-					turn_into_tuple(n, pn_Quot_max);
-					set_Tuple_pred(n, pn_Quot_M, mem);
-					set_Tuple_pred(n, pn_Quot_X_regular, new_r_Jmp(current_ir_graph, blk));
-					set_Tuple_pred(n, pn_Quot_X_except,  new_r_Bad(current_ir_graph));
-					set_Tuple_pred(n, pn_Quot_res, m);
-					DBG_OPT_ALGSIM1(oldn, a, b, m, FS_OPT_FP_INV_MUL);
-				}
+			tv = tarval_quo(get_mode_one(mode), tv);
+
+			if (tv != tarval_bad && tarval_ieee754_get_exact()) {
+				ir_node *blk = get_irn_n(n, -1);
+				ir_node *c = new_r_Const(current_ir_graph, blk, mode, tv);
+				ir_node *a = get_Quot_left(n);
+				ir_node *m = new_rd_Mul(get_irn_dbg_info(n), current_ir_graph, blk, a, c, mode);
+				ir_node *mem = get_Quot_mem(n);
+
+				turn_into_tuple(n, pn_Quot_max);
+				set_Tuple_pred(n, pn_Quot_M, mem);
+				set_Tuple_pred(n, pn_Quot_X_regular, new_r_Jmp(current_ir_graph, blk));
+				set_Tuple_pred(n, pn_Quot_X_except,  new_r_Bad(current_ir_graph));
+				set_Tuple_pred(n, pn_Quot_res, m);
+				DBG_OPT_ALGSIM1(oldn, a, b, m, FS_OPT_FP_INV_MUL);
 			}
 		}
 	}
diff --git a/ir/tv/fltcalc.c b/ir/tv/fltcalc.c
index 4e498eacf..d9f7345ad 100644
--- a/ir/tv/fltcalc.c
+++ b/ir/tv/fltcalc.c
@@ -154,6 +154,9 @@ static int calc_buffer_size;
 static int value_size;
 static int max_precision;
 
+/** Exact flag. */
+static int fc_exact = 1;
+
 #if 0
 static void fail_char(const char *str, unsigned int len, int pos) {
 	if (*(str+pos))
@@ -221,7 +224,13 @@ static void *pack(const fp_value *int_float, void *packed) {
 	return packed;
 }
 
-static void normalize(const fp_value *in_val, fp_value *out_val, int sticky) {
+/**
+ * Normalize a fp_value.
+ *
+ * @return non-zero if result is exact
+ */
+static int normalize(const fp_value *in_val, fp_value *out_val, int sticky) {
+	int exact = 1;
 	int hsb;
 	char lsb, guard, round, round_dir = 0;
 	char *temp = alloca(value_size);
@@ -236,7 +245,7 @@ static void normalize(const fp_value *in_val, fp_value *out_val, int sticky) {
 
 	out_val->desc.clss = NORMAL;
 
-	/* mantissa all zeros, so zero exponent (because of explicit one)*/
+	/* mantissa all zeros, so zero exponent (because of explicit one) */
 	if (hsb == 2 + in_val->desc.mantissa_size)   {
 		sc_val_from_ulong(0, _exp(out_val));
 		hsb = -1;
@@ -250,8 +259,10 @@ static void normalize(const fp_value *in_val, fp_value *out_val, int sticky) {
 		_shift_right(_mant(in_val), temp, _mant(out_val));
 
 		/* remember if some bits were shifted away */
-		if (!sticky) sticky = sc_had_carry();
-
+		if (sc_had_carry()) {
+			exact = 0;
+			sticky = 1;
+		}
 		sc_add(_exp(in_val), temp, _exp(out_val));
 	} else if (hsb > -1) {
 		/* shift left */
@@ -271,7 +282,10 @@ static void normalize(const fp_value *in_val, fp_value *out_val, int sticky) {
 		sc_sub(temp, _exp(out_val), NULL);
 
 		_shift_right(_mant(out_val), sc_get_buffer(), _mant(out_val));
-		if (!sticky) sticky = sc_had_carry();
+		if (sc_had_carry()) {
+			exact  = 0;
+			sticky = 1;
+		}
 		/* denormalized means exponent of zero */
 		sc_val_from_ulong(0, _exp(out_val));
 
@@ -317,6 +331,7 @@ static void normalize(const fp_value *in_val, fp_value *out_val, int sticky) {
 	if (lsb != 0) {
 		sc_val_from_long(lsb, temp);
 		sc_add(_mant(out_val), temp, _mant(out_val));
+		exact = 0;
 	}
 
 	/* could have rounded down to zero */
@@ -328,7 +343,8 @@ static void normalize(const fp_value *in_val, fp_value *out_val, int sticky) {
 	if ((out_val->desc.clss != SUBNORMAL) && (hsb < -1)) {
 		sc_val_from_ulong(1, temp);
 		_shift_right(_mant(out_val), temp, _mant(out_val));
-
+		if (exact && sc_had_carry())
+			exact = 0;
 		sc_add(_exp(out_val), temp, _exp(out_val));
 	} else if ((out_val->desc.clss == SUBNORMAL) && (hsb == -1)) {
 		/* overflow caused the mantissa to be normal again,
@@ -389,6 +405,7 @@ static void normalize(const fp_value *in_val, fp_value *out_val, int sticky) {
 			}
 		}
 	}
+	return exact;
 }
 
 /**
@@ -417,6 +434,8 @@ static void _fadd(const fp_value *a, const fp_value *b, fp_value *result) {
 	char sign, res_sign;
 	char sticky;
 
+	fc_exact = 1;
+
 	handle_NAN(a, b, result);
 
 	/* make sure result has a descriptor */
@@ -487,6 +506,7 @@ static void _fadd(const fp_value *a, const fp_value *b, fp_value *result) {
 
 	_shift_right(_mant(b), exp_diff, temp);
 	sticky = sc_had_carry();
+	fc_exact &= !sticky;
 
 	if (sticky && sign) {
 		/* if subtracting a little more than the represented value or adding a little
@@ -516,16 +536,19 @@ static void _fadd(const fp_value *a, const fp_value *b, fp_value *result) {
 	/* resulting exponent is the bigger one */
 	memmove(_exp(result), _exp(a), value_size);
 
-	normalize(result, result, sticky);
+	fc_exact &= normalize(result, result, sticky);
 }
 
 /**
  * calculate a * b
  */
 static void _fmul(const fp_value *a, const fp_value *b, fp_value *result) {
+	int sticky;
 	char *temp;
 	char res_sign;
 
+	fc_exact = 1;
+
 	handle_NAN(a, b, result);
 
 	temp = alloca(value_size);
@@ -592,17 +615,22 @@ static void _fmul(const fp_value *a, const fp_value *b, fp_value *result) {
 	sc_val_from_ulong(2 + result->desc.mantissa_size, temp);
 
 	_shift_right(_mant(result), temp, _mant(result));
+	sticky = sc_had_carry();
+	fc_exact &= !sticky;
 
-	normalize(result, result, sc_had_carry());
+	fc_exact &= normalize(result, result, sticky);
 }
 
 /**
  * calculate a / b
  */
 static void _fdiv(const fp_value *a, const fp_value *b, fp_value *result) {
+	int sticky;
 	char *temp, *dividend;
 	char res_sign;
 
+	fc_exact = 1;
+
 	handle_NAN(a, b, result);
 
 	temp = alloca(value_size);
@@ -683,9 +711,11 @@ static void _fdiv(const fp_value *a, const fp_value *b, fp_value *result) {
 		sc_val_from_ulong(1, divisor);
 		_shift_right(_mant(b), divisor, divisor);
 		sc_div(dividend, divisor, _mant(result));
+		sticky = sc_had_carry();
+		fc_exact &= !sticky;
 	}
 
-	normalize(result, result, sc_had_carry());
+	fc_exact &= normalize(result, result, sticky);
 }
 
 #if 0
@@ -747,6 +777,9 @@ static void _trunc(const fp_value *a, fp_value *result) {
 	int exp_bias, exp_val;
 	char *temp;
 
+	/* fixme: can be exact */
+	fc_exact = 0;
+
 	temp = alloca(value_size);
 
 	if (a != result)
@@ -1600,3 +1633,7 @@ unsigned fc_set_immediate_precision(unsigned bits) {
 	immediate_prec = bits;
 	return old;
 }
+
+int fc_is_exact(void) {
+	return fc_exact;
+}
diff --git a/ir/tv/fltcalc.h b/ir/tv/fltcalc.h
index 9a939be61..9d77031bd 100644
--- a/ir/tv/fltcalc.h
+++ b/ir/tv/fltcalc.h
@@ -256,6 +256,11 @@ unsigned char fc_sub_bits(const fp_value *val, unsigned num_bit, unsigned byte_o
  */
 unsigned fc_set_immediate_precision(unsigned bits);
 
+/**
+ * Returns non-zero if the result of the last operation was exact.
+ */
+int fc_is_exact(void);
+
 void init_fltcalc(int precision);
 void finish_fltcalc(void);
 
diff --git a/ir/tv/strcalc.c b/ir/tv/strcalc.c
index e893bc679..698483e3f 100644
--- a/ir/tv/strcalc.c
+++ b/ir/tv/strcalc.c
@@ -1579,9 +1579,9 @@ void sc_mul(const void *value1, const void *value2, void *buffer) {
 	}
 }
 
-int sc_div(const void *value1, const void *value2, void *buffer) {
+void sc_div(const void *value1, const void *value2, void *buffer) {
 	/* temp buffer holding unused result of divmod */
-	char *mod_res = alloca(calc_buffer_size);
+	char *unused_res = alloca(calc_buffer_size);
 
 	CLEAR_BUFFER(calc_buffer);
 	carry_flag = 0;
@@ -1589,14 +1589,13 @@ int sc_div(const void *value1, const void *value2, void *buffer) {
 	DEBUGPRINTF_COMPUTATION(("%s / ", sc_print_hex(value1)));
 	DEBUGPRINTF_COMPUTATION(("%s -> ", sc_print_hex(value2)));
 
-	_divmod(value1, value2, calc_buffer, mod_res);
+	_divmod(value1, value2, calc_buffer, unused_res);
 
 	DEBUGPRINTF_COMPUTATION(("%s\n", sc_print_hex(calc_buffer)));
 
 	if ((buffer != NULL) && (buffer != calc_buffer)) {
 		memcpy(buffer, calc_buffer, calc_buffer_size);
 	}
-	return sc_is_zero(mod_res);
 }
 
 void sc_mod(const void *value1, const void *value2, void *buffer) {
diff --git a/ir/tv/strcalc.h b/ir/tv/strcalc.h
index cd0c002d2..dedcfbf8c 100644
--- a/ir/tv/strcalc.h
+++ b/ir/tv/strcalc.h
@@ -129,10 +129,8 @@ void sc_mul(const void *value1, const void *value2, void *buffer);
 
 /**
  * buffer = value1 / value2
- *
- * @return non-zero if the remainder is null.
  */
-int sc_div(const void *value1, const void *value2, void *buffer);
+void sc_div(const void *value1, const void *value2, void *buffer);
 
 /**
  * buffer = value1 % value2
diff --git a/ir/tv/tv.c b/ir/tv/tv.c
index 770681b20..2daab3ace 100644
--- a/ir/tv/tv.c
+++ b/ir/tv/tv.c
@@ -1580,6 +1580,11 @@ unsigned tarval_ieee754_set_immediate_precision(unsigned bits) {
 	return fc_set_immediate_precision(bits);
 }
 
+/* Returns non-zero if the result of the last IEEE-754 operation was exact. */
+unsigned tarval_ieee754_get_exact(void) {
+	return fc_is_exact();
+}
+
 /*
  * Sets the overflow mode for integer operations.
  */