X-Git-Url: http://nsz.repo.hu/git/?a=blobdiff_plain;f=src%2Fmath%2Fi386%2Fexpl.s;h=b5124e8f1f5f4565de61d6221ff9652d9f60260b;hb=c5f4b2dfea320356f69445dc1adf8f73596a3c36;hp=3f2f707d02f849967d2e5b71d04294d5759109ee;hpb=a8f73bb1a685dd7d67669c6f6ceb255cfa967790;p=musl diff --git a/src/math/i386/expl.s b/src/math/i386/expl.s index 3f2f707d..b5124e8f 100644 --- a/src/math/i386/expl.s +++ b/src/math/i386/expl.s @@ -1,41 +1,34 @@ # exp(x) = 2^hi + 2^hi (2^lo - 1) # where hi+lo = log2e*x with 128bit precision # exact log2e*x calculation depends on nearest rounding mode +# using the exact multiplication method of Dekker and Veltkamp .global expl .type expl,@function expl: fldt 4(%esp) - # special cases: 2*x is +-inf, nan or |x| < 0x1p-32 - # check (exponent|0x8000)+2 < 0xbfff+2-32 - movw 12(%esp), %ax - movw %ax, %dx - orw $0x8000, %dx - addw $2, %dx - cmpw $0xbfff-30, %dx - jnb 3f - cmpw $1, %dx - jbe 1f - # if |x|<0x1p-32 return 1+x + # interesting case: 0x1p-32 <= |x| < 16384 + # check if (exponent|0x8000) is in [0xbfff-32, 0xbfff+13] + mov 12(%esp), %ax + or $0x8000, %ax + sub $0xbfdf, %ax + cmp $45, %ax + jbe 2f + test %ax, %ax fld1 - jmp 2f -1: testw %ax, %ax - jns 1f - # if 2*x == -inf,-nan return -0/x - fldz - fchs - fdivp + js 1f + # if |x|>=0x1p14 or nan return 2^trunc(x) + fscale + fstp %st(1) ret - # if 2*x == inf,nan return 2*x -1: fld %st(0) -2: faddp + # if |x|<0x1p-32 return 1+x +1: faddp ret - # should be 0x1.71547652b82fe178p0 == 0x3fff b8aa3b29 5c17f0bc + # should be 0x1.71547652b82fe178p0L == 0x3fff b8aa3b29 5c17f0bc # it will be wrong on non-nearest rounding mode -3: fldl2e -# subl $32, %esp +2: fldl2e subl $44, %esp # hi = log2e_hi*x # 2^hi = exp2l(hi) @@ -44,7 +37,8 @@ expl: fstpt (%esp) fstpt 16(%esp) fstpt 32(%esp) - call exp2l +.hidden __exp2l + call __exp2l # if 2^hi == inf return 2^hi fld %st(0) fstpt (%esp) @@ -54,7 +48,7 @@ expl: fldt 16(%esp) # fpu stack: 2^hi x hi # exact mult: x*log2e - fld %st(1) # x + fld %st(1) # c = 0x1p32+1 pushl $0x41f00000 pushl $0x00100000