4 #if LDBL_MANT_DIG==64 && LDBL_MAX_EXP==16384
15 /* exact add, assumes exponent_x >= exponent_y */
16 static void add(long double *hi, long double *lo, long double x, long double y)
27 TODO(nsz): probably simpler mul is enough if we assume x and y are doubles
28 so last 11bits are all zeros, no subnormals etc
30 /* exact mul, assumes no over/underflow */
31 static void mul(long double *hi, long double *lo, long double x, long double y)
33 static const long double c = 1.0 + 0x1p32L;
34 long double cx, xh, xl, cy, yh, yl;
43 *lo = (xh*yh - *hi) + xh*yl + xl*yh + xl*yl;
47 assume (long double)(hi+lo) == hi
48 return an adjusted hi so that rounding it to double is correct
50 static long double adjust(long double hi, long double lo)
57 if (uhi.bits.m & 0x3ff)
60 if (uhi.bits.s == ulo.bits.s)
67 static long double dadd(long double x, long double y)
73 static long double dmul(long double x, long double y)
79 static int getexp(long double x)
86 double fma(double x, double y, double z)
88 long double hi, lo1, lo2, xy;
91 /* handle +-inf,nan */
92 if (!isfinite(x) || !isfinite(y))
97 if (x == 0.0 || y == 0.0)
101 if (round == FE_TONEAREST)
106 /* exact mul and add require nearest rounding */
107 /* spurious inexact exceptions may be raised */
108 fesetround(FE_TONEAREST);
109 mul(&xy, &lo1, x, y);
113 add(&hi, &lo2, z, xy);
114 } else if (ez > exy - 12) {
115 add(&hi, &lo2, xy, z);
118 /* TODO: verify that the sign of 0 is always correct */
119 return (xy + z) + lo1;
124 the 12 extra bits (1guard, 11round+sticky) are needed so with
126 elo <= ehi - 11, and we use the last 10 bits in adjust so
128 gives correct result when rounded to double
134 if (round == FE_TONEAREST)
135 return dadd(hi, dadd(lo1, lo2));
136 return hi + (lo1 + lo2);
139 /* origin: FreeBSD /usr/src/lib/msun/src/s_fma.c */
141 * Copyright (c) 2005-2011 David Schultz <das@FreeBSD.ORG>
142 * All rights reserved.
144 * Redistribution and use in source and binary forms, with or without
145 * modification, are permitted provided that the following conditions
147 * 1. Redistributions of source code must retain the above copyright
148 * notice, this list of conditions and the following disclaimer.
149 * 2. Redistributions in binary form must reproduce the above copyright
150 * notice, this list of conditions and the following disclaimer in the
151 * documentation and/or other materials provided with the distribution.
153 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
154 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
155 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
156 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
157 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
158 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
159 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
160 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
161 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
162 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
167 * A struct dd represents a floating-point number with twice the precision
168 * of a double. We maintain the invariant that "hi" stores the 53 high-order
169 * bits of the result.
177 * Compute a+b exactly, returning the exact result in a struct dd. We assume
178 * that both a and b are finite, but make no assumptions about their relative
181 static inline struct dd dd_add(double a, double b)
188 ret.lo = (a - (ret.hi - s)) + (b - s);
193 * Compute a+b, with a small tweak: The least significant bit of the
194 * result is adjusted into a sticky bit summarizing all the bits that
195 * were lost to rounding. This adjustment negates the effects of double
196 * rounding when the result is added to another number with a higher
197 * exponent. For an explanation of round and sticky bits, see any reference
198 * on FPU design, e.g.,
200 * J. Coonen. An Implementation Guide to a Proposed Standard for
201 * Floating-Point Arithmetic. Computer, vol. 13, no. 1, Jan 1980.
203 static inline double add_adjusted(double a, double b)
206 uint64_t hibits, lobits;
210 EXTRACT_WORD64(hibits, sum.hi);
211 if ((hibits & 1) == 0) {
212 /* hibits += (int)copysign(1.0, sum.hi * sum.lo) */
213 EXTRACT_WORD64(lobits, sum.lo);
214 hibits += 1 - ((hibits ^ lobits) >> 62);
215 INSERT_WORD64(sum.hi, hibits);
222 * Compute ldexp(a+b, scale) with a single rounding error. It is assumed
223 * that the result will be subnormal, and care is taken to ensure that
224 * double rounding does not occur.
226 static inline double add_and_denormalize(double a, double b, int scale)
229 uint64_t hibits, lobits;
235 * If we are losing at least two bits of accuracy to denormalization,
236 * then the first lost bit becomes a round bit, and we adjust the
237 * lowest bit of sum.hi to make it a sticky bit summarizing all the
238 * bits in sum.lo. With the sticky bit adjusted, the hardware will
239 * break any ties in the correct direction.
241 * If we are losing only one bit to denormalization, however, we must
242 * break the ties manually.
245 EXTRACT_WORD64(hibits, sum.hi);
246 bits_lost = -((int)(hibits >> 52) & 0x7ff) - scale + 1;
247 if (bits_lost != 1 ^ (int)(hibits & 1)) {
248 /* hibits += (int)copysign(1.0, sum.hi * sum.lo) */
249 EXTRACT_WORD64(lobits, sum.lo);
250 hibits += 1 - (((hibits ^ lobits) >> 62) & 2);
251 INSERT_WORD64(sum.hi, hibits);
254 return (ldexp(sum.hi, scale));
258 * Compute a*b exactly, returning the exact result in a struct dd. We assume
259 * that both a and b are normalized, so no underflow or overflow will occur.
260 * The current rounding mode must be round-to-nearest.
262 static inline struct dd dd_mul(double a, double b)
264 static const double split = 0x1p27 + 1.0;
266 double ha, hb, la, lb, p, q;
279 q = ha * lb + la * hb;
282 ret.lo = p - ret.hi + q + la * lb;
287 * Fused multiply-add: Compute x * y + z with a single rounding error.
289 * We use scaling to avoid overflow/underflow, along with the
290 * canonical precision-doubling technique adapted from:
292 * Dekker, T. A Floating-Point Technique for Extending the
293 * Available Precision. Numer. Math. 18, 224-242 (1971).
295 * This algorithm is sensitive to the rounding precision. FPUs such
296 * as the i387 must be set in double-precision mode if variables are
297 * to be stored in FP registers in order to avoid incorrect results.
298 * This is the default on FreeBSD, but not on many other systems.
300 * Hardware instructions should be used on architectures that support it,
301 * since this implementation will likely be several times slower.
303 double fma(double x, double y, double z)
305 double xs, ys, zs, adj;
312 * Handle special cases. The order of operations and the particular
313 * return values here are crucial in handling special cases involving
314 * infinities, NaNs, overflows, and signed zeroes correctly.
316 if (!isfinite(x) || !isfinite(y))
320 if (x == 0.0 || y == 0.0)
328 oround = fegetround();
329 spread = ex + ey - ez;
332 * If x * y and z are many orders of magnitude apart, the scaling
333 * will overflow, so we handle these cases specially. Rounding
334 * modes other than FE_TONEAREST are painful.
336 if (spread < -DBL_MANT_DIG) {
338 feraiseexcept(FE_INEXACT);
342 feraiseexcept(FE_UNDERFLOW);
345 default: /* FE_TONEAREST */
349 if (x > 0.0 ^ y < 0.0 ^ z < 0.0)
352 return (nextafter(z, 0));
356 if (x > 0.0 ^ y < 0.0)
359 return (nextafter(z, -INFINITY));
363 if (x > 0.0 ^ y < 0.0)
364 return (nextafter(z, INFINITY));
370 if (spread <= DBL_MANT_DIG * 2)
371 zs = ldexp(zs, -spread);
373 zs = copysign(DBL_MIN, zs);
375 fesetround(FE_TONEAREST);
378 * Basic approach for round-to-nearest:
380 * (xy.hi, xy.lo) = x * y (exact)
381 * (r.hi, r.lo) = xy.hi + z (exact)
382 * adj = xy.lo + r.lo (inexact; low bit is sticky)
383 * result = r.hi + adj (correctly rounded)
386 r = dd_add(xy.hi, zs);
392 * When the addends cancel to 0, ensure that the result has
396 volatile double vzs = zs; /* XXX gcc CSE bug workaround */
397 return (xy.hi + vzs + ldexp(xy.lo, spread));
400 if (oround != FE_TONEAREST) {
402 * There is no need to worry about double rounding in directed
407 return (ldexp(r.hi + adj, spread));
410 adj = add_adjusted(r.lo, xy.lo);
411 if (spread + ilogb(r.hi) > -1023)
412 return (ldexp(r.hi + adj, spread));
414 return (add_and_denormalize(r.hi, adj, spread));