From ffd8ac2dd50f99c3c83d7d9d845df9874ec3e7d5 Mon Sep 17 00:00:00 2001
From: Szabolcs Nagy <nsz@port70.net>
Date: Sun, 19 May 2013 12:13:08 +0000
Subject: [PATCH] math: fix two fma issues (only affects non-nearest rounding
 mode, x86)

1) in downward rounding fma(1,1,-1) should be -0 but it was 0 with
gcc, the code was correct but gcc does not support FENV_ACCESS ON
so it used common subexpression elimination where it shouldn't have.
now volatile memory access is used as a barrier after fesetround.

2) in directed rounding modes there is no double rounding issue
so the complicated adjustments done for nearest rounding mode are
not needed. the only exception to this rule is raising the underflow
flag: assume "small" is an exactly representible subnormal value in
double precision and "verysmall" is a much smaller value so that
	(long double)(small plus verysmall) == small
then
	(double)(small plus verysmall)
raises underflow because the result is an inexact subnormal, but
	(double)(long double)(small plus verysmall)
does not because small is not a subnormal in long double precision
and it is exact in double precision.
now this problem is fixed by checking inexact using fenv when the
result is subnormal
---
 src/math/fma.c | 42 ++++++++++++++++++++++++++++++++++++++----
 1 file changed, 38 insertions(+), 4 deletions(-)

diff --git a/src/math/fma.c b/src/math/fma.c
index 17f1cdcc..89def795 100644
--- a/src/math/fma.c
+++ b/src/math/fma.c
@@ -119,9 +119,17 @@ double fma(double x, double y, double z)
 	} else if (ez > exy - 12) {
 		add(&hi, &lo2, xy, z);
 		if (hi == 0) {
+			/*
+			xy + z is 0, but it should be calculated with the
+			original rounding mode so the sign is correct, if the
+			compiler does not support FENV_ACCESS ON it does not
+			know about the changed rounding mode and eliminates
+			the xy + z below without the volatile memory access
+			*/
+			volatile double z_;
 			fesetround(round);
-			/* make sure that the sign of 0 is correct */
-			return (xy + z) + lo1;
+			z_ = z;
+			return (xy + z_) + lo1;
 		}
 	} else {
 		/*
@@ -135,10 +143,36 @@ double fma(double x, double y, double z)
 		hi = xy;
 		lo2 = z;
 	}
+	/*
+	the result is stored before return for correct precision and exceptions
+
+	one corner case is when the underflow flag should be raised because
+	the precise result is an inexact subnormal double, but the calculated
+	long double result is an exact subnormal double
+	(so rounding to double does not raise exceptions)
+
+	in nearest rounding mode dadd takes care of this: the last bit of the
+	result is adjusted so rounding sees an inexact value when it should
+
+	in non-nearest rounding mode fenv is used for the workaround
+	*/
 	fesetround(round);
 	if (round == FE_TONEAREST)
-		return dadd(hi, dadd(lo1, lo2));
-	return hi + (lo1 + lo2);
+		z = dadd(hi, dadd(lo1, lo2));
+	else {
+#if defined(FE_INEXACT) && defined(FE_UNDERFLOW)
+		int e = fetestexcept(FE_INEXACT);
+		feclearexcept(FE_INEXACT);
+#endif
+		z = hi + (lo1 + lo2);
+#if defined(FE_INEXACT) && defined(FE_UNDERFLOW)
+		if (getexp(z) < 0x3fff-1022 && fetestexcept(FE_INEXACT))
+			feraiseexcept(FE_UNDERFLOW);
+		else if (e)
+			feraiseexcept(FE_INEXACT);
+#endif
+	}
+	return z;
 }
 #else
 /* origin: FreeBSD /usr/src/lib/msun/src/s_fma.c */
-- 
2.20.1