+typedef union {
+ struct {
+#ifdef WORDS_BIGENDIAN
+ uint32_t high;
+#else
+ uint32_t low;
+#endif
+#ifndef SIZEOF_LONG_DOUBLE_8
+ uint32_t mid;
+#endif
+#ifdef WORDS_BIGENDIAN
+ uint32_t low;
+#else
+ uint32_t high;
+#endif
+ } val;
+ volatile long double d;
+} value_t;
+
+#define CLEAR_BUFFER(buffer) memset(buffer, 0, calc_buffer_size)
+
+/* our floating point value */
+struct fp_value {
+ ieee_descriptor_t desc;
+ char sign;
+ char value[1]; /* exp[value_size] + mant[value_size] */
+};
+
+#define _exp(a) &((a)->value[0])
+#define _mant(a) &((a)->value[value_size])
+
+#define _save_result(x) memcpy((x), sc_get_buffer(), value_size)
+#define _shift_right(x, y, res) sc_shr((x), (y), value_size*4, 0, (res))
+#define _shift_left(x, y, res) sc_shl((x), (y), value_size*4, 0, (res))
+
+
+#ifdef FLTCALC_DEBUG
+# define DEBUGPRINTF(x) printf x
+#else
+# define DEBUGPRINTF(x) ((void)0)
+#endif
+
+#ifdef FLTCALC_TRACE_CALC
+# define TRACEPRINTF(x) printf x
+#else
+# define TRACEPRINTF(x) ((void)0)
+#endif
+
+/** The immediate precision. */
+static unsigned immediate_prec = 0;
+
+/** A temporal buffer. */
+static fp_value *calc_buffer = NULL;
+
+/** Current rounding mode.*/
+static fc_rounding_mode_t rounding_mode;
+
+static int calc_buffer_size;
+static int value_size;
+static int max_precision;
+
+/** Exact flag. */
+static int fc_exact = 1;
+
+/** pack machine-like */
+static void *pack(const fp_value *int_float, void *packed)
+{
+ char *shift_val;
+ char *temp;
+ fp_value *val_buffer;
+ int pos;
+
+ temp = (char*) alloca(value_size);
+ shift_val = (char*) alloca(value_size);
+
+ switch ((value_class_t)int_float->desc.clss) {
+ case NAN:
+ val_buffer = (fp_value*) alloca(calc_buffer_size);
+ fc_get_qnan(&int_float->desc, val_buffer);
+ int_float = val_buffer;
+ break;
+
+ case INF:
+ val_buffer = (fp_value*) alloca(calc_buffer_size);
+ fc_get_plusinf(&int_float->desc, val_buffer);
+ val_buffer->sign = int_float->sign;
+ int_float = val_buffer;
+ break;
+
+ default:
+ break;
+ }
+ assert(int_float->desc.explicit_one <= 1);
+
+ /* pack sign: move it to the left after exponent AND mantissa */
+ sc_val_from_ulong(int_float->sign, temp);
+
+ pos = int_float->desc.exponent_size + int_float->desc.mantissa_size + int_float->desc.explicit_one;
+ sc_val_from_ulong(pos, NULL);
+ _shift_left(temp, sc_get_buffer(), packed);
+
+ /* pack exponent: move it to the left after mantissa */
+ pos = int_float->desc.mantissa_size + int_float->desc.explicit_one;
+ sc_val_from_ulong(pos, shift_val);
+ _shift_left(_exp(int_float), shift_val, temp);
+
+ /* combine sign|exponent */
+ sc_or(temp, packed, packed);
+
+ /* extract mantissa */
+ /* remove rounding bits */
+ sc_val_from_ulong(ROUNDING_BITS, shift_val);
+ _shift_right(_mant(int_float), shift_val, temp);
+
+ /* remove leading 1 (or 0 if denormalized) */
+ sc_max_from_bits(pos, 0, shift_val); /* all mantissa bits are 1's */
+ sc_and(temp, shift_val, temp);
+
+ /* combine sign|exponent|mantissa */
+ sc_or(temp, packed, packed);
+
+ return packed;
+}
+
+/**
+ * Normalize a fp_value.
+ *
+ * @return non-zero if result is exact
+ */
+static int normalize(const fp_value *in_val, fp_value *out_val, int sticky)
+{
+ int exact = 1;
+ int hsb;
+ char lsb, guard, round, round_dir = 0;
+ char *temp = (char*) alloca(value_size);
+
+ /* save rounding bits at the end */
+ hsb = ROUNDING_BITS + in_val->desc.mantissa_size - sc_get_highest_set_bit(_mant(in_val)) - 1;
+
+ if (in_val != out_val) {
+ out_val->sign = in_val->sign;
+ out_val->desc = in_val->desc;
+ }
+
+ out_val->desc.clss = NORMAL;
+
+ /* mantissa all zeros, so zero exponent (because of explicit one) */
+ if (hsb == ROUNDING_BITS + in_val->desc.mantissa_size) {
+ sc_val_from_ulong(0, _exp(out_val));
+ hsb = -1;
+ }
+
+ /* shift the first 1 into the left of the radix point (i.e. hsb == -1) */
+ if (hsb < -1) {
+ /* shift right */
+ sc_val_from_ulong(-hsb-1, temp);
+
+ _shift_right(_mant(in_val), temp, _mant(out_val));
+
+ /* remember if some bits were shifted away */
+ if (sc_had_carry()) {
+ exact = 0;
+ sticky = 1;
+ }
+ sc_add(_exp(in_val), temp, _exp(out_val));
+ } else if (hsb > -1) {
+ /* shift left */
+ sc_val_from_ulong(hsb+1, temp);
+
+ _shift_left(_mant(in_val), temp, _mant(out_val));
+
+ sc_sub(_exp(in_val), temp, _exp(out_val));
+ }
+
+ /* check for exponent underflow */
+ if (sc_is_negative(_exp(out_val)) || sc_is_zero(_exp(out_val))) {
+ DEBUGPRINTF(("Exponent underflow!\n"));
+ /* exponent underflow */
+ /* shift the mantissa right to have a zero exponent */
+ sc_val_from_ulong(1, temp);
+ sc_sub(temp, _exp(out_val), NULL);
+
+ _shift_right(_mant(out_val), sc_get_buffer(), _mant(out_val));
+ if (sc_had_carry()) {
+ exact = 0;
+ sticky = 1;
+ }
+ /* denormalized means exponent of zero */
+ sc_val_from_ulong(0, _exp(out_val));
+
+ out_val->desc.clss = SUBNORMAL;
+ }
+
+ /* perform rounding by adding a value that clears the guard bit and the round bit
+ * and either causes a carry to round up or not */
+ /* get the last 3 bits of the value */
+ lsb = sc_sub_bits(_mant(out_val), out_val->desc.mantissa_size + ROUNDING_BITS, 0) & 0x7;
+ guard = (lsb&0x2)>>1;
+ round = lsb&0x1;
+
+ switch (rounding_mode) {
+ case FC_TONEAREST:
+ /* round to nearest representable value, if in doubt choose the version
+ * with lsb == 0 */
+ round_dir = guard && (sticky || round || lsb>>2);
+ break;
+ case FC_TOPOSITIVE:
+ /* if positive: round to one if the exact value is bigger, else to zero */
+ round_dir = (!out_val->sign && (guard || round || sticky));
+ break;
+ case FC_TONEGATIVE:
+ /* if negative: round to one if the exact value is bigger, else to zero */
+ round_dir = (out_val->sign && (guard || round || sticky));
+ break;
+ case FC_TOZERO:
+ /* always round to 0 (chopping mode) */
+ round_dir = 0;
+ break;
+ }
+ DEBUGPRINTF(("Rounding (s%d, l%d, g%d, r%d, s%d) %s\n", out_val->sign, lsb>>2, guard, round, sticky, (round_dir)?"up":"down"));
+
+ if (round_dir == 1) {
+ guard = (round^guard)<<1;
+ lsb = !(round || guard)<<2 | guard | round;
+ } else {
+ lsb = -((guard<<1) | round);
+ }
+
+ /* add the rounded value */
+ if (lsb != 0) {
+ sc_val_from_long(lsb, temp);
+ sc_add(_mant(out_val), temp, _mant(out_val));
+ exact = 0;
+ }
+
+ /* could have rounded down to zero */
+ if (sc_is_zero(_mant(out_val)) && (out_val->desc.clss == SUBNORMAL))
+ out_val->desc.clss = ZERO;
+
+ /* check for rounding overflow */
+ hsb = ROUNDING_BITS + out_val->desc.mantissa_size - sc_get_highest_set_bit(_mant(out_val)) - 1;
+ if ((out_val->desc.clss != SUBNORMAL) && (hsb < -1)) {
+ sc_val_from_ulong(1, temp);
+ _shift_right(_mant(out_val), temp, _mant(out_val));
+ if (exact && sc_had_carry())
+ exact = 0;
+ sc_add(_exp(out_val), temp, _exp(out_val));
+ } else if ((out_val->desc.clss == SUBNORMAL) && (hsb == -1)) {
+ /* overflow caused the mantissa to be normal again,
+ * so adapt the exponent accordingly */
+ sc_val_from_ulong(1, temp);
+ sc_add(_exp(out_val), temp, _exp(out_val));
+
+ out_val->desc.clss = NORMAL;
+ }
+ /* no further rounding is needed, because rounding overflow means
+ * the carry of the original rounding was propagated all the way
+ * up to the bit left of the radix point. This implies the bits
+ * to the right are all zeros (rounding is +1) */
+
+ /* check for exponent overflow */
+ sc_val_from_ulong((1 << out_val->desc.exponent_size) - 1, temp);
+ if (sc_comp(_exp(out_val), temp) != -1) {
+ DEBUGPRINTF(("Exponent overflow!\n"));
+ /* exponent overflow, reaction depends on rounding method:
+ *
+ * mode | sign of value | result
+ *--------------------------------------------------------------
+ * TO_NEAREST | + | +inf
+ * | - | -inf
+ *--------------------------------------------------------------
+ * TO_POSITIVE | + | +inf
+ * | - | smallest representable value
+ *--------------------------------------------------------------
+ * TO_NEAGTIVE | + | largest representable value
+ * | - | -inf
+ *--------------------------------------------------------------
+ * TO_ZERO | + | largest representable value
+ * | - | smallest representable value
+ *--------------------------------------------------------------*/
+ if (out_val->sign == 0) {
+ /* value is positive */
+ switch (rounding_mode) {
+ case FC_TONEAREST:
+ case FC_TOPOSITIVE:
+ out_val->desc.clss = INF;
+ break;
+
+ case FC_TONEGATIVE:
+ case FC_TOZERO:
+ fc_get_max(&out_val->desc, out_val);
+ }
+ } else {
+ /* value is negative */
+ switch (rounding_mode) {
+ case FC_TONEAREST:
+ case FC_TONEGATIVE:
+ out_val->desc.clss = INF;
+ break;
+
+ case FC_TOPOSITIVE:
+ case FC_TOZERO:
+ fc_get_min(&out_val->desc, out_val);
+ }
+ }
+ }
+ return exact;
+}
+
+/**
+ * Operations involving NaN's must return NaN.
+ * They are NOT exact.
+ */
+#define handle_NAN(a, b, result) \
+do { \
+ if (a->desc.clss == NAN) { \
+ if (a != result) memcpy(result, a, calc_buffer_size); \
+ fc_exact = 0; \
+ return; \
+ } \
+ if (b->desc.clss == NAN) { \
+ if (b != result) memcpy(result, b, calc_buffer_size); \
+ fc_exact = 0; \
+ return; \
+ } \
+}while (0)
+
+
+/**
+ * calculate a + b, where a is the value with the bigger exponent
+ */
+static void _fadd(const fp_value *a, const fp_value *b, fp_value *result)
+{
+ char *temp;
+ char *exp_diff;
+
+ char sign, res_sign;
+ char sticky;
+
+ fc_exact = 1;
+
+ handle_NAN(a, b, result);
+
+ /* make sure result has a descriptor */
+ if (result != a && result != b)
+ result->desc = a->desc;
+
+ /* determine if this is an addition or subtraction */
+ sign = a->sign ^ b->sign;
+
+ /* produce NaN on inf - inf */
+ if (sign && (a->desc.clss == INF) && (b->desc.clss == INF)) {
+ fc_exact = 0;
+ fc_get_qnan(&a->desc, result);
+ return;
+ }
+
+ temp = (char*) alloca(value_size);
+ exp_diff = (char*) alloca(value_size);
+
+ /* get exponent difference */
+ sc_sub(_exp(a), _exp(b), exp_diff);
+
+ /* initially set sign to be the sign of a, special treatment of subtraction
+ * when exponents are equal is required though.
+ * Also special care about the sign is needed when the mantissas are equal
+ * (+/- 0 ?) */
+ if (sign && sc_val_to_long(exp_diff) == 0) {
+ switch (sc_comp(_mant(a), _mant(b))) {
+ case 1: /* a > b */
+ res_sign = a->sign; /* abs(a) is bigger and a is negative */
+ break;
+ case 0: /* a == b */
+ res_sign = (rounding_mode == FC_TONEGATIVE);
+ break;
+ case -1: /* a < b */
+ res_sign = b->sign; /* abs(b) is bigger and b is negative */
+ break;
+ default:
+ /* can't be reached */
+ res_sign = 0;
+ break;
+ }
+ }
+ else
+ res_sign = a->sign;
+ result->sign = res_sign;
+
+ /* sign has been taken care of, check for special cases */
+ if (a->desc.clss == ZERO || b->desc.clss == INF) {
+ if (b != result)
+ memcpy(result, b, calc_buffer_size);
+ fc_exact = b->desc.clss == NORMAL;
+ result->sign = res_sign;
+ return;
+ }
+ if (b->desc.clss == ZERO || a->desc.clss == INF) {
+ if (a != result)
+ memcpy(result, a, calc_buffer_size);
+ fc_exact = a->desc.clss == NORMAL;
+ result->sign = res_sign;
+ return;
+ }
+
+ /* shift the smaller value to the right to align the radix point */
+ /* subnormals have their radix point shifted to the right,
+ * take care of this first */
+ if ((b->desc.clss == SUBNORMAL) && (a->desc.clss != SUBNORMAL)) {
+ sc_val_from_ulong(1, temp);
+ sc_sub(exp_diff, temp, exp_diff);
+ }
+
+ _shift_right(_mant(b), exp_diff, temp);
+ sticky = sc_had_carry();
+ fc_exact &= !sticky;
+
+ if (sticky && sign) {
+ /* if subtracting a little more than the represented value or adding a little
+ * more than the represented value to a negative value this, in addition to the
+ * still set sticky bit, takes account of the 'little more' */
+ char *temp1 = (char*) alloca(calc_buffer_size);
+ sc_val_from_ulong(1, temp1);
+ sc_add(temp, temp1, temp);
+ }
+
+ if (sign) {
+ if (sc_comp(_mant(a), temp) == -1)
+ sc_sub(temp, _mant(a), _mant(result));
+ else
+ sc_sub(_mant(a), temp, _mant(result));
+ } else {
+ sc_add(_mant(a), temp, _mant(result));
+ }
+
+ /* _normalize expects a 'normal' radix point, adding two subnormals
+ * results in a subnormal radix point -> shifting before normalizing */
+ if ((a->desc.clss == SUBNORMAL) && (b->desc.clss == SUBNORMAL)) {
+ sc_val_from_ulong(1, NULL);
+ _shift_left(_mant(result), sc_get_buffer(), _mant(result));
+ }
+
+ /* resulting exponent is the bigger one */
+ memmove(_exp(result), _exp(a), value_size);
+
+ fc_exact &= normalize(result, result, sticky);
+}
+
+/**
+ * calculate a * b
+ */
+static void _fmul(const fp_value *a, const fp_value *b, fp_value *result)
+{
+ int sticky;
+ char *temp;
+ char res_sign;
+
+ fc_exact = 1;
+
+ handle_NAN(a, b, result);
+
+ temp = (char*) alloca(value_size);
+
+ if (result != a && result != b)
+ result->desc = a->desc;
+
+ result->sign = res_sign = a->sign ^ b->sign;
+
+ /* produce NaN on 0 * inf */
+ if (a->desc.clss == ZERO) {
+ if (b->desc.clss == INF) {
+ fc_get_qnan(&a->desc, result);
+ fc_exact = 0;
+ } else {
+ if (a != result)
+ memcpy(result, a, calc_buffer_size);
+ result->sign = res_sign;
+ }
+ return;
+ }
+ if (b->desc.clss == ZERO) {
+ if (a->desc.clss == INF) {
+ fc_get_qnan(&a->desc, result);
+ fc_exact = 0;
+ } else {
+ if (b != result)
+ memcpy(result, b, calc_buffer_size);
+ result->sign = res_sign;
+ }
+ return;
+ }
+
+ if (a->desc.clss == INF) {
+ fc_exact = 0;
+ if (a != result)
+ memcpy(result, a, calc_buffer_size);
+ result->sign = res_sign;
+ return;
+ }
+ if (b->desc.clss == INF) {
+ fc_exact = 0;
+ if (b != result)
+ memcpy(result, b, calc_buffer_size);
+ result->sign = res_sign;
+ return;
+ }
+
+ /* exp = exp(a) + exp(b) - excess */
+ sc_add(_exp(a), _exp(b), _exp(result));
+
+ sc_val_from_ulong((1 << (a->desc.exponent_size - 1)) - 1, temp);
+ sc_sub(_exp(result), temp, _exp(result));
+
+ /* mixed normal, subnormal values introduce an error of 1, correct it */
+ if ((a->desc.clss == SUBNORMAL) ^ (b->desc.clss == SUBNORMAL)) {
+ sc_val_from_ulong(1, temp);
+ sc_add(_exp(result), temp, _exp(result));
+ }
+
+ sc_mul(_mant(a), _mant(b), _mant(result));
+
+ /* realign result: after a multiplication the digits right of the radix
+ * point are the sum of the factors' digits after the radix point. As all
+ * values are normalized they both have the same amount of these digits,
+ * which has to be restored by proper shifting
+ * because of the rounding bits */
+ sc_val_from_ulong(ROUNDING_BITS + result->desc.mantissa_size, temp);
+
+ _shift_right(_mant(result), temp, _mant(result));
+ sticky = sc_had_carry();
+ fc_exact &= !sticky;
+
+ fc_exact &= normalize(result, result, sticky);
+}
+
+/**
+ * calculate a / b
+ */
+static void _fdiv(const fp_value *a, const fp_value *b, fp_value *result)
+{
+ int sticky;
+ char *temp, *dividend;
+ char res_sign;
+
+ fc_exact = 1;
+
+ handle_NAN(a, b, result);
+
+ temp = (char*) alloca(value_size);
+ dividend = (char*) alloca(value_size);
+
+ if (result != a && result != b)
+ result->desc = a->desc;
+
+ result->sign = res_sign = a->sign ^ b->sign;
+
+ /* produce NAN on 0/0 and inf/inf */
+ if (a->desc.clss == ZERO) {
+ if (b->desc.clss == ZERO) {
+ /* 0/0 -> NaN */
+ fc_get_qnan(&a->desc, result);
+ fc_exact = 0;
+ } else {
+ /* 0/x -> a */
+ if (a != result)
+ memcpy(result, a, calc_buffer_size);
+ result->sign = res_sign;
+ }
+ return;
+ }
+
+ if (b->desc.clss == INF) {
+ fc_exact = 0;
+ if (a->desc.clss == INF) {
+ /* inf/inf -> NaN */
+ fc_get_qnan(&a->desc, result);
+ } else {
+ /* x/inf -> 0 */
+ sc_val_from_ulong(0, NULL);
+ _save_result(_exp(result));
+ _save_result(_mant(result));
+ result->desc.clss = ZERO;
+ }
+ return;
+ }
+
+ if (a->desc.clss == INF) {
+ fc_exact = 0;
+ /* inf/x -> inf */
+ if (a != result)
+ memcpy(result, a, calc_buffer_size);
+ result->sign = res_sign;
+ return;
+ }
+ if (b->desc.clss == ZERO) {
+ fc_exact = 0;
+ /* division by zero */
+ if (result->sign)
+ fc_get_minusinf(&a->desc, result);
+ else
+ fc_get_plusinf(&a->desc, result);
+ return;
+ }
+
+ /* exp = exp(a) - exp(b) + excess - 1*/
+ sc_sub(_exp(a), _exp(b), _exp(result));
+ sc_val_from_ulong((1 << (a->desc.exponent_size - 1)) - 2, temp);
+ sc_add(_exp(result), temp, _exp(result));
+
+ /* mixed normal, subnormal values introduce an error of 1, correct it */
+ if ((a->desc.clss == SUBNORMAL) ^ (b->desc.clss == SUBNORMAL)) {
+ sc_val_from_ulong(1, temp);
+ sc_add(_exp(result), temp, _exp(result));
+ }
+
+ /* mant(res) = mant(a) / 1/2mant(b) */
+ /* to gain more bits of precision in the result the dividend could be
+ * shifted left, as this operation does not loose bits. This would not
+ * fit into the integer precision, but due to the rounding bits (which
+ * are always zero because the values are all normalized) the divisor
+ * can be shifted right instead to achieve the same result */
+ sc_val_from_ulong(ROUNDING_BITS + result->desc.mantissa_size, temp);
+
+ _shift_left(_mant(a), temp, dividend);
+
+ {
+ char *divisor = (char*) alloca(calc_buffer_size);
+ sc_val_from_ulong(1, divisor);
+ _shift_right(_mant(b), divisor, divisor);
+ sc_div(dividend, divisor, _mant(result));
+ sticky = sc_had_carry();
+ fc_exact &= !sticky;
+ }
+
+ fc_exact &= normalize(result, result, sticky);
+}
+
+#if 0
+static void _power_of_ten(int exp, ieee_descriptor_t *desc, char *result)
+{
+ char *build;
+ char *temp;
+
+ /* positive sign */
+ result->sign = 0;
+
+ /* set new descriptor (else result is supposed to already have one) */
+ if (desc != NULL)
+ result->desc = *desc;
+
+ build = alloca(value_size);
+ temp = alloca(value_size);
+
+ sc_val_from_ulong((1 << (result->desc.exponent_size - 1)) - 1, _exp(result));
+
+ if (exp > 0) {
+ /* temp is value of ten now */
+ sc_val_from_ulong(10, NULL);
+ _save_result(temp);
+
+ for (exp--; exp > 0; exp--) {
+ _save_result(build);
+ sc_mul(build, temp, NULL);
+ }
+ _save_result(build);
+
+ /* temp is amount of left shift needed to put the value left of the radix point */
+ sc_val_from_ulong(result->desc.mantissa_size + ROUNDING_BITS, temp);
+
+ _shift_left(build, temp, _mant(result));
+
+ _normalize(result, result, 0);
+ }
+}
+#endif
+
+/**
+ * Truncate the fractional part away.
+ *
+ * This does not clip to any integer range.
+ */
+static void _trunc(const fp_value *a, fp_value *result)
+{
+ /*
+ * When exponent == 0 all bits left of the radix point
+ * are the integral part of the value. For 15bit exp_size
+ * this would require a left shift of max. 16383 bits which
+ * is too much.
+ * But it is enough to ensure that no bit right of the radix
+ * point remains set. This restricts the interesting
+ * exponents to the interval [0, mant_size-1].
+ * Outside this interval the truncated value is either 0 or
+ * it does not have fractional parts.
+ */
+
+ int exp_bias, exp_val;
+ char *temp;
+
+ /* fixme: can be exact */
+ fc_exact = 0;
+
+ temp = (char*) alloca(value_size);
+
+ if (a != result)
+ result->desc = a->desc;
+
+ exp_bias = (1 << (a->desc.exponent_size - 1)) - 1;
+ exp_val = sc_val_to_long(_exp(a)) - exp_bias;
+
+ if (exp_val < 0) {
+ sc_val_from_ulong(0, NULL);
+ _save_result(_exp(result));
+ _save_result(_mant(result));
+ result->desc.clss = ZERO;
+
+ return;
+ }
+
+ if (exp_val > (long)a->desc.mantissa_size) {
+ if (a != result)
+ memcpy(result, a, calc_buffer_size);
+
+ return;
+ }
+
+ /* set up a proper mask to delete all bits right of the
+ * radix point if the mantissa had been shifted until exp == 0 */
+ sc_max_from_bits(1 + exp_val, 0, temp);
+ sc_val_from_long(a->desc.mantissa_size - exp_val + 2, NULL);
+ _shift_left(temp, sc_get_buffer(), temp);
+
+ /* and the mask and return the result */
+ sc_and(_mant(a), temp, _mant(result));
+
+ if (a != result) {
+ memcpy(_exp(result), _exp(a), value_size);
+ result->sign = a->sign;
+ }
+}