X-Git-Url: http://nsz.repo.hu/git/?a=blobdiff_plain;f=src%2Fmath%2Fexp.c;h=b764d73cfe339c22579d28388b387ba22ba7eda0;hb=c5f4b2dfea320356f69445dc1adf8f73596a3c36;hp=29bf9609906bcd46359c764d082a6f211544125c;hpb=634c3a63027aa4a693b64fae0e2f6e1635558e93;p=musl diff --git a/src/math/exp.c b/src/math/exp.c index 29bf9609..b764d73c 100644 --- a/src/math/exp.c +++ b/src/math/exp.c @@ -1,156 +1,134 @@ -/* origin: FreeBSD /usr/src/lib/msun/src/e_exp.c */ /* - * ==================================================== - * Copyright (C) 2004 by Sun Microsystems, Inc. All rights reserved. + * Double-precision e^x function. * - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - */ -/* exp(x) - * Returns the exponential of x. - * - * Method - * 1. Argument reduction: - * Reduce x to an r so that |r| <= 0.5*ln2 ~ 0.34658. - * Given x, find r and integer k such that - * - * x = k*ln2 + r, |r| <= 0.5*ln2. - * - * Here r will be represented as r = hi-lo for better - * accuracy. - * - * 2. Approximation of exp(r) by a special rational function on - * the interval [0,0.34658]: - * Write - * R(r**2) = r*(exp(r)+1)/(exp(r)-1) = 2 + r*r/6 - r**4/360 + ... - * We use a special Remes algorithm on [0,0.34658] to generate - * a polynomial of degree 5 to approximate R. The maximum error - * of this polynomial approximation is bounded by 2**-59. In - * other words, - * R(z) ~ 2.0 + P1*z + P2*z**2 + P3*z**3 + P4*z**4 + P5*z**5 - * (where z=r*r, and the values of P1 to P5 are listed below) - * and - * | 5 | -59 - * | 2.0+P1*z+...+P5*z - R(z) | <= 2 - * | | - * The computation of exp(r) thus becomes - * 2*r - * exp(r) = 1 + ------- - * R - r - * r*R1(r) - * = 1 + r + ----------- (for better accuracy) - * 2 - R1(r) - * where - * 2 4 10 - * R1(r) = r - (P1*r + P2*r + ... + P5*r ). - * - * 3. Scale back to obtain exp(x): - * From step 1, we have - * exp(x) = 2^k * exp(r) - * - * Special cases: - * exp(INF) is INF, exp(NaN) is NaN; - * exp(-INF) is 0, and - * for finite argument, only exp(0)=1 is exact. - * - * Accuracy: - * according to an error analysis, the error is always less than - * 1 ulp (unit in the last place). - * - * Misc. info. - * For IEEE double - * if x > 7.09782712893383973096e+02 then exp(x) overflow - * if x < -7.45133219101941108420e+02 then exp(x) underflow - * - * Constants: - * The hexadecimal values are the intended ones for the following - * constants. The decimal values may be used, provided that the - * compiler will convert from decimal to binary accurately enough - * to produce the hexadecimal values shown. + * Copyright (c) 2018, Arm Limited. + * SPDX-License-Identifier: MIT */ +#include +#include #include "libm.h" +#include "exp_data.h" -static const double -halF[2] = {0.5,-0.5,}, -huge = 1.0e+300, -o_threshold = 7.09782712893383973096e+02, /* 0x40862E42, 0xFEFA39EF */ -u_threshold = -7.45133219101941108420e+02, /* 0xc0874910, 0xD52D3051 */ -ln2HI[2] = { 6.93147180369123816490e-01, /* 0x3fe62e42, 0xfee00000 */ - -6.93147180369123816490e-01},/* 0xbfe62e42, 0xfee00000 */ -ln2LO[2] = { 1.90821492927058770002e-10, /* 0x3dea39ef, 0x35793c76 */ - -1.90821492927058770002e-10},/* 0xbdea39ef, 0x35793c76 */ -invln2 = 1.44269504088896338700e+00, /* 0x3ff71547, 0x652b82fe */ -P1 = 1.66666666666666019037e-01, /* 0x3FC55555, 0x5555553E */ -P2 = -2.77777777770155933842e-03, /* 0xBF66C16C, 0x16BEBD93 */ -P3 = 6.61375632143793436117e-05, /* 0x3F11566A, 0xAF25DE2C */ -P4 = -1.65339022054652515390e-06, /* 0xBEBBBD41, 0xC5D26BF1 */ -P5 = 4.13813679705723846039e-08; /* 0x3E663769, 0x72BEA4D0 */ +#define N (1 << EXP_TABLE_BITS) +#define InvLn2N __exp_data.invln2N +#define NegLn2hiN __exp_data.negln2hiN +#define NegLn2loN __exp_data.negln2loN +#define Shift __exp_data.shift +#define T __exp_data.tab +#define C2 __exp_data.poly[5 - EXP_POLY_ORDER] +#define C3 __exp_data.poly[6 - EXP_POLY_ORDER] +#define C4 __exp_data.poly[7 - EXP_POLY_ORDER] +#define C5 __exp_data.poly[8 - EXP_POLY_ORDER] -static const volatile double -twom1000 = 9.33263618503218878990e-302; /* 2**-1000=0x01700000,0 */ +/* Handle cases that may overflow or underflow when computing the result that + is scale*(1+TMP) without intermediate rounding. The bit representation of + scale is in SBITS, however it has a computed exponent that may have + overflown into the sign bit so that needs to be adjusted before using it as + a double. (int32_t)KI is the k used in the argument reduction and exponent + adjustment of scale, positive k here means the result may overflow and + negative k means the result may underflow. */ +static inline double specialcase(double_t tmp, uint64_t sbits, uint64_t ki) +{ + double_t scale, y; -double exp(double x) + if ((ki & 0x80000000) == 0) { + /* k > 0, the exponent of scale might have overflowed by <= 460. */ + sbits -= 1009ull << 52; + scale = asdouble(sbits); + y = 0x1p1009 * (scale + scale * tmp); + return eval_as_double(y); + } + /* k < 0, need special care in the subnormal range. */ + sbits += 1022ull << 52; + scale = asdouble(sbits); + y = scale + scale * tmp; + if (y < 1.0) { + /* Round y to the right precision before scaling it into the subnormal + range to avoid double rounding that can cause 0.5+E/2 ulp error where + E is the worst-case ulp error outside the subnormal range. So this + is only useful if the goal is better than 1 ulp worst-case error. */ + double_t hi, lo; + lo = scale - y + scale * tmp; + hi = 1.0 + y; + lo = 1.0 - hi + y + lo; + y = eval_as_double(hi + lo) - 1.0; + /* Avoid -0.0 with downward rounding. */ + if (WANT_ROUNDING && y == 0.0) + y = 0.0; + /* The underflow exception needs to be signaled explicitly. */ + fp_force_eval(fp_barrier(0x1p-1022) * 0x1p-1022); + } + y = 0x1p-1022 * y; + return eval_as_double(y); +} + +/* Top 12 bits of a double (sign and exponent bits). */ +static inline uint32_t top12(double x) { - double y,hi=0.0,lo=0.0,c,t,twopk; - int32_t k=0,xsb; - uint32_t hx; + return asuint64(x) >> 52; +} - GET_HIGH_WORD(hx, x); - xsb = (hx>>31)&1; /* sign bit of x */ - hx &= 0x7fffffff; /* high word of |x| */ +double exp(double x) +{ + uint32_t abstop; + uint64_t ki, idx, top, sbits; + double_t kd, z, r, r2, scale, tail, tmp; - /* filter out non-finite argument */ - if (hx >= 0x40862E42) { /* if |x| >= 709.78... */ - if (hx >= 0x7ff00000) { - uint32_t lx; - - GET_LOW_WORD(lx,x); - if (((hx&0xfffff)|lx) != 0) /* NaN */ - return x+x; - return xsb==0 ? x : 0.0; /* exp(+-inf)={inf,0} */ + abstop = top12(x) & 0x7ff; + if (predict_false(abstop - top12(0x1p-54) >= top12(512.0) - top12(0x1p-54))) { + if (abstop - top12(0x1p-54) >= 0x80000000) + /* Avoid spurious underflow for tiny x. */ + /* Note: 0 is common input. */ + return WANT_ROUNDING ? 1.0 + x : 1.0; + if (abstop >= top12(1024.0)) { + if (asuint64(x) == asuint64(-INFINITY)) + return 0.0; + if (abstop >= top12(INFINITY)) + return 1.0 + x; + if (asuint64(x) >> 63) + return __math_uflow(0); + else + return __math_oflow(0); } - if (x > o_threshold) - return huge*huge; /* overflow */ - if (x < u_threshold) - return twom1000*twom1000; /* underflow */ + /* Large x is special cased below. */ + abstop = 0; } - /* argument reduction */ - if (hx > 0x3fd62e42) { /* if |x| > 0.5 ln2 */ - if (hx < 0x3FF0A2B2) { /* and |x| < 1.5 ln2 */ - hi = x-ln2HI[xsb]; - lo = ln2LO[xsb]; - k = 1 - xsb - xsb; - } else { - k = (int)(invln2*x+halF[xsb]); - t = k; - hi = x - t*ln2HI[0]; /* t*ln2HI is exact here */ - lo = t*ln2LO[0]; - } - STRICT_ASSIGN(double, x, hi - lo); - } else if(hx < 0x3e300000) { /* |x| < 2**-28 */ - /* raise inexact */ - if (huge+x > 1.0) - return 1.0+x; - } else - k = 0; - - /* x is now in primary range */ - t = x*x; - if (k >= -1021) - INSERT_WORDS(twopk, 0x3ff00000+(k<<20), 0); - else - INSERT_WORDS(twopk, 0x3ff00000+((k+1000)<<20), 0); - c = x - t*(P1+t*(P2+t*(P3+t*(P4+t*P5)))); - if (k == 0) - return 1.0 - ((x*c)/(c-2.0) - x); - y = 1.0-((lo-(x*c)/(2.0-c))-hi); - if (k < -1021) - return y*twopk*twom1000; - if (k == 1024) - return y*2.0*0x1p1023; - return y*twopk; + /* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */ + /* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N]. */ + z = InvLn2N * x; +#if TOINT_INTRINSICS + kd = roundtoint(z); + ki = converttoint(z); +#elif EXP_USE_TOINT_NARROW + /* z - kd is in [-0.5-2^-16, 0.5] in all rounding modes. */ + kd = eval_as_double(z + Shift); + ki = asuint64(kd) >> 16; + kd = (double_t)(int32_t)ki; +#else + /* z - kd is in [-1, 1] in non-nearest rounding modes. */ + kd = eval_as_double(z + Shift); + ki = asuint64(kd); + kd -= Shift; +#endif + r = x + kd * NegLn2hiN + kd * NegLn2loN; + /* 2^(k/N) ~= scale * (1 + tail). */ + idx = 2 * (ki % N); + top = ki << (52 - EXP_TABLE_BITS); + tail = asdouble(T[idx]); + /* This is only a valid scale when -1023*N < k < 1024*N. */ + sbits = T[idx + 1] + top; + /* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (tail + exp(r) - 1). */ + /* Evaluation is optimized assuming superscalar pipelined execution. */ + r2 = r * r; + /* Without fma the worst case error is 0.25/N ulp larger. */ + /* Worst case error is less than 0.5+1.11/N+(abs poly error * 2^53) ulp. */ + tmp = tail + r + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5); + if (predict_false(abstop == 0)) + return specialcase(tmp, sbits, ki); + scale = asdouble(sbits); + /* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there + is no spurious underflow here even without fma. */ + return eval_as_double(scale + scale * tmp); }