From 5652d70054daf3c2c9b6d475fdf9d24a940e51aa Mon Sep 17 00:00:00 2001 From: Szabolcs Nagy Date: Tue, 1 Jan 2013 22:20:45 +0100 Subject: [PATCH] math: bessel cleanup (jn.c and jnf.c) both jn and yn functions had integer overflow issues for large and small n to handle these issues nm1 (== |n|-1) is used instead of n and -n in the code and some loops are changed to make sure the iteration counter does not overflow (another solution could be to use larger integer type or even double but that has more size and runtime cost, on x87 loading int64_t or even uint32_t into an fpu register is more than two times slower than loading int32_t, and using double for n slows down iteration logic) yn(-1,0) now returns inf posix2008 specifies that on overflow and at +-0 all y0,y1,yn functions return -inf, this is not consistent with math when n<0 odd integer in yn (eg. when x->0, yn(-1,x)->inf, but historically yn(-1,0) seems to be special cased and returned -inf) some threshold values in jnf and ynf were fixed that seems to be incorrectly copy-pasted from the double version --- src/math/jn.c | 171 +++++++++++++++++++++++++------------------------ src/math/jnf.c | 154 +++++++++++++++++++++----------------------- 2 files changed, 161 insertions(+), 164 deletions(-) diff --git a/src/math/jn.c b/src/math/jn.c index d95af15d..4878a54f 100644 --- a/src/math/jn.c +++ b/src/math/jn.c @@ -20,7 +20,7 @@ * Note 2. About jn(n,x), yn(n,x) * For n=0, j0(x) is called, * for n=1, j1(x) is called, - * for nx, a continued fraction approximation to * j(n,x)/j(n-1,x) is evaluated and then backward @@ -32,7 +32,6 @@ * yn(n,x) is similar in all respects, except * that forward recursion is used for all * values of n>1. - * */ #include "libm.h" @@ -41,33 +40,39 @@ static const double invsqrtpi = 5.64189583547756279280e-01; /* 0x3FE20DD7, 0x504 double jn(int n, double x) { - int32_t i,hx,ix,lx,sgn; - double a, b, temp, di; - double z, w; + uint32_t ix, lx; + int nm1, i, sign; + double a, b, temp; + + EXTRACT_WORDS(ix, lx, x); + sign = ix>>31; + ix &= 0x7fffffff; + + if ((ix | (lx|-lx)>>31) > 0x7ff00000) /* nan */ + return x; /* J(-n,x) = (-1)^n * J(n, x), J(n, -x) = (-1)^n * J(n, x) * Thus, J(-n,x) = J(n,-x) */ - EXTRACT_WORDS(hx, lx, x); - ix = 0x7fffffff & hx; - /* if J(n,NaN) is NaN */ - if ((ix|((uint32_t)(lx|-lx))>>31) > 0x7ff00000) - return x+x; + /* nm1 = |n|-1 is used instead of |n| to handle n==INT_MIN */ + if (n == 0) + return j0(x); if (n < 0) { - n = -n; + nm1 = -(n+1); x = -x; - hx ^= 0x80000000; - } - if (n == 0) return j0(x); - if (n == 1) return j1(x); + sign ^= 1; + } else + nm1 = n-1; + if (nm1 == 0) + return j1(x); - sgn = (n&1)&(hx>>31); /* even n -- 0, odd n -- sign(x) */ + sign &= n; /* even n: 0, odd n: signbit(x) */ x = fabs(x); - if ((ix|lx) == 0 || ix >= 0x7ff00000) /* if x is 0 or inf */ + if ((ix|lx) == 0 || ix == 0x7ff00000) /* if x is 0 or inf */ b = 0.0; - else if ((double)n <= x) { + else if (nm1 < x) { /* Safe to use J(n+1,x)=2n/x *J(n,x)-J(n-1,x) */ - if (ix >= 0x52D00000) { /* x > 2**302 */ + if (ix >= 0x52d00000) { /* x > 2**302 */ /* (x >> n**2) * Jn(x) = cos(x-(2n+1)*pi/4)*sqrt(2/x*pi) * Yn(x) = sin(x-(2n+1)*pi/4)*sqrt(2/x*pi) @@ -81,19 +86,21 @@ double jn(int n, double x) * 2 -s+c -c-s * 3 s+c c-s */ - switch(n&3) { - case 0: temp = cos(x)+sin(x); break; - case 1: temp = -cos(x)+sin(x); break; - case 2: temp = -cos(x)-sin(x); break; - case 3: temp = cos(x)-sin(x); break; + switch(nm1&3) { + case 0: temp = -cos(x)+sin(x); break; + case 1: temp = -cos(x)-sin(x); break; + case 2: temp = cos(x)-sin(x); break; + default: + case 3: temp = cos(x)+sin(x); break; } b = invsqrtpi*temp/sqrt(x); } else { a = j0(x); b = j1(x); - for (i=1; i 33) /* underflow */ + if (nm1 > 32) /* underflow */ b = 0.0; else { temp = x*0.5; b = temp; - for (a=1.0,i=2; i<=n; i++) { + a = 1.0; + for (i=2; i<=nm1+1; i++) { a *= (double)i; /* a = n! */ b *= temp; /* b = (x/2)^n */ } @@ -143,13 +151,14 @@ double jn(int n, double x) * When Q(k) > 1e17 good for quadruple */ /* determine k */ - double t,v; - double q0,q1,h,tmp; - int32_t k,m; + double t,q0,q1,w,h,z,tmp,nf; + int k; - w = (n+n)/(double)x; h = 2.0/(double)x; - q0 = w; + nf = nm1 + 1.0; + w = 2*nf/x; + h = 2/x; z = w+h; + q0 = w; q1 = w*z - 1.0; k = 1; while (q1 < 1.0e9) { @@ -159,9 +168,8 @@ double jn(int n, double x) q0 = q1; q1 = tmp; } - m = n+n; - for (t=0.0, i = 2*(n+k); i>=m; i -= 2) - t = 1.0/(i/x-t); + for (t=0.0, i=k; i>=0; i--) + t = 1/(2*(i+nf)/x - t); a = t; b = 1.0; /* estimate log((2/x)^n*n!) = n*log(2/x)+n*ln(n) @@ -172,26 +180,20 @@ double jn(int n, double x) * then recurrent value may overflow and the result is * likely underflow to zero */ - tmp = n; - v = 2.0/x; - tmp = tmp*log(fabs(v*tmp)); + tmp = nf*log(fabs(w)); if (tmp < 7.09782712893383973096e+02) { - for (i=n-1,di=(double)(i+i); i>0; i--) { + for (i=nm1; i>0; i--) { temp = b; - b *= di; - b = b/x - a; + b = b*(2.0*i)/x - a; a = temp; - di -= 2.0; } } else { - for (i=n-1,di=(double)(i+i); i>0; i--) { + for (i=nm1; i>0; i--) { temp = b; - b *= di; - b = b/x - a; + b = b*(2.0*i)/x - a; a = temp; - di -= 2.0; /* scale b to avoid spurious overflow */ - if (b > 1e100) { + if (b > 0x1p500) { a /= b; t /= b; b = 1.0; @@ -206,39 +208,40 @@ double jn(int n, double x) b = t*w/a; } } - if (sgn==1) return -b; - return b; + return sign ? -b : b; } - double yn(int n, double x) { - int32_t i,hx,ix,lx; - int32_t sign; + uint32_t ix, lx, ib; + int nm1, sign, i; double a, b, temp; - EXTRACT_WORDS(hx, lx, x); - ix = 0x7fffffff & hx; - /* if Y(n,NaN) is NaN */ - if ((ix|((uint32_t)(lx|-lx))>>31) > 0x7ff00000) - return x+x; - if ((ix|lx) == 0) - return -1.0/0.0; - if (hx < 0) - return 0.0/0.0; - sign = 1; - if (n < 0) { - n = -n; - sign = 1 - ((n&1)<<1); - } - if (n == 0) - return y0(x); - if (n == 1) - return sign*y1(x); + EXTRACT_WORDS(ix, lx, x); + sign = ix>>31; + ix &= 0x7fffffff; + + if ((ix | (lx|-lx)>>31) > 0x7ff00000) /* nan */ + return x; + if (sign && (ix|lx)!=0) /* x < 0 */ + return 0/0.0; if (ix == 0x7ff00000) return 0.0; - if (ix >= 0x52D00000) { /* x > 2**302 */ + + if (n == 0) + return y0(x); + if (n < 0) { + nm1 = -(n+1); + sign = n&1; + } else { + nm1 = n-1; + sign = 0; + } + if (nm1 == 0) + return sign ? -y1(x) : y1(x); + + if (ix >= 0x52d00000) { /* x > 2**302 */ /* (x >> n**2) * Jn(x) = cos(x-(2n+1)*pi/4)*sqrt(2/x*pi) * Yn(x) = sin(x-(2n+1)*pi/4)*sqrt(2/x*pi) @@ -252,26 +255,26 @@ double yn(int n, double x) * 2 -s+c -c-s * 3 s+c c-s */ - switch(n&3) { - case 0: temp = sin(x)-cos(x); break; - case 1: temp = -sin(x)-cos(x); break; - case 2: temp = -sin(x)+cos(x); break; - case 3: temp = sin(x)+cos(x); break; + switch(nm1&3) { + case 0: temp = -sin(x)-cos(x); break; + case 1: temp = -sin(x)+cos(x); break; + case 2: temp = sin(x)+cos(x); break; + default: + case 3: temp = sin(x)-cos(x); break; } b = invsqrtpi*temp/sqrt(x); } else { - uint32_t high; a = y0(x); b = y1(x); /* quit if b is -inf */ - GET_HIGH_WORD(high, b); - for (i=1; i 0) return b; - return -b; + return sign ? -b : b; } diff --git a/src/math/jnf.c b/src/math/jnf.c index fd291103..f63c062f 100644 --- a/src/math/jnf.c +++ b/src/math/jnf.c @@ -18,55 +18,57 @@ float jnf(int n, float x) { - int32_t i,hx,ix, sgn; - float a, b, temp, di; - float z, w; + uint32_t ix; + int nm1, sign, i; + float a, b, temp; + + GET_FLOAT_WORD(ix, x); + sign = ix>>31; + ix &= 0x7fffffff; + if (ix > 0x7f800000) /* nan */ + return x; - /* J(-n,x) = (-1)^n * J(n, x), J(n, -x) = (-1)^n * J(n, x) - * Thus, J(-n,x) = J(n,-x) - */ - GET_FLOAT_WORD(hx, x); - ix = 0x7fffffff & hx; - /* if J(n,NaN) is NaN */ - if (ix > 0x7f800000) - return x+x; + /* J(-n,x) = J(n,-x), use |n|-1 to avoid overflow in -n */ + if (n == 0) + return j0f(x); if (n < 0) { - n = -n; + nm1 = -(n+1); x = -x; - hx ^= 0x80000000; - } - if (n == 0) return j0f(x); - if (n == 1) return j1f(x); + sign ^= 1; + } else + nm1 = n-1; + if (nm1 == 0) + return j1f(x); - sgn = (n&1)&(hx>>31); /* even n -- 0, odd n -- sign(x) */ + sign &= n; /* even n: 0, odd n: signbit(x) */ x = fabsf(x); - if (ix == 0 || ix >= 0x7f800000) /* if x is 0 or inf */ + if (ix == 0 || ix == 0x7f800000) /* if x is 0 or inf */ b = 0.0f; - else if((float)n <= x) { + else if (nm1 < x) { /* Safe to use J(n+1,x)=2n/x *J(n,x)-J(n-1,x) */ a = j0f(x); b = j1f(x); - for (i=1; i 33) /* underflow */ - b = 0.0f; - else { - temp = 0.5f * x; - b = temp; - for (a=1.0f,i=2; i<=n; i++) { - a *= (float)i; /* a = n! */ - b *= temp; /* b = (x/2)^n */ - } - b = b/a; + if (nm1 > 8) /* underflow */ + nm1 = 8; + temp = 0.5f * x; + b = temp; + a = 1.0f; + for (i=2; i<=nm1+1; i++) { + a *= (float)i; /* a = n! */ + b *= temp; /* b = (x/2)^n */ } + b = b/a; } else { /* use backward recurrence */ /* x x^2 x^2 @@ -97,26 +99,25 @@ float jnf(int n, float x) * When Q(k) > 1e17 good for quadruple */ /* determine k */ - float t,v; - float q0,q1,h,tmp; - int32_t k,m; + float t,q0,q1,w,h,z,tmp,nf; + int k; - w = (n+n)/x; - h = 2.0f/x; + nf = nm1+1.0f; + w = 2*nf/x; + h = 2/x; z = w+h; q0 = w; q1 = w*z - 1.0f; k = 1; - while (q1 < 1.0e9f) { + while (q1 < 1.0e4f) { k += 1; z += h; tmp = z*q1 - q0; q0 = q1; q1 = tmp; } - m = n+n; - for (t=0.0f, i = 2*(n+k); i>=m; i -= 2) - t = 1.0f/(i/x-t); + for (t=0.0f, i=k; i>=0; i--) + t = 1.0f/(2*(i+nf)/x-t); a = t; b = 1.0f; /* estimate log((2/x)^n*n!) = n*log(2/x)+n*ln(n) @@ -127,26 +128,20 @@ float jnf(int n, float x) * then recurrent value may overflow and the result is * likely underflow to zero */ - tmp = n; - v = 2.0f/x; - tmp = tmp*logf(fabsf(v*tmp)); + tmp = nf*logf(fabsf(w)); if (tmp < 88.721679688f) { - for (i=n-1,di=(float)(i+i); i>0; i--) { + for (i=nm1; i>0; i--) { temp = b; - b *= di; - b = b/x - a; + b = 2.0f*i*b/x - a; a = temp; - di -= 2.0f; } } else { - for (i=n-1,di=(float)(i+i); i>0; i--){ + for (i=nm1; i>0; i--){ temp = b; - b *= di; - b = b/x - a; + b = 2.0f*i*b/x - a; a = temp; - di -= 2.0f; /* scale b to avoid spurious overflow */ - if (b > 1e10f) { + if (b > 0x1p60f) { a /= b; t /= b; b = 1.0f; @@ -161,48 +156,47 @@ float jnf(int n, float x) b = t*w/a; } } - if (sgn == 1) return -b; - return b; + return sign ? -b : b; } float ynf(int n, float x) { - int32_t i,hx,ix,ib; - int32_t sign; + uint32_t ix, ib; + int nm1, sign, i; float a, b, temp; - GET_FLOAT_WORD(hx, x); - ix = 0x7fffffff & hx; - /* if Y(n,NaN) is NaN */ - if (ix > 0x7f800000) - return x+x; - if (ix == 0) - return -1.0f/0.0f; - if (hx < 0) - return 0.0f/0.0f; - sign = 1; - if (n < 0) { - n = -n; - sign = 1 - ((n&1)<<1); - } - if (n == 0) - return y0f(x); - if (n == 1) - return sign*y1f(x); + GET_FLOAT_WORD(ix, x); + sign = ix>>31; + ix &= 0x7fffffff; + if (ix > 0x7f800000) /* nan */ + return x; + if (sign && ix != 0) /* x < 0 */ + return 0/0.0f; if (ix == 0x7f800000) return 0.0f; + if (n == 0) + return y0f(x); + if (n < 0) { + nm1 = -(n+1); + sign = n&1; + } else { + nm1 = n-1; + sign = 0; + } + if (nm1 == 0) + return sign ? -y1f(x) : y1f(x); + a = y0f(x); b = y1f(x); /* quit if b is -inf */ GET_FLOAT_WORD(ib,b); - for (i = 1; i < n && ib != 0xff800000; i++){ + for (i = 0; i < nm1 && ib != 0xff800000; ) { + i++; temp = b; - b = ((float)(i+i)/x)*b - a; + b = (2.0f*i/x)*b - a; GET_FLOAT_WORD(ib, b); a = temp; } - if (sign > 0) - return b; - return -b; + return sign ? -b : b; } -- 2.20.1