X-Git-Url: http://nsz.repo.hu/git/?a=blobdiff_plain;f=src%2Fmath%2Fi386%2Fexp.s;h=abb9036957ad04bf9c4579ec5aa3251e69165e3f;hb=7acbbdfd5d6e264dcf8efa13188f0c8edabee047;hp=ca0de1d4f2ac5b7a5db23de24777fb75bd0c7002;hpb=97721a5508415a2f10eb068e022093811c9ff8be;p=musl diff --git a/src/math/i386/exp.s b/src/math/i386/exp.s index ca0de1d4..abb90369 100644 --- a/src/math/i386/exp.s +++ b/src/math/i386/exp.s @@ -2,7 +2,18 @@ .type expm1f,@function expm1f: flds 4(%esp) - jmp 1f + mov 4(%esp),%eax + add %eax,%eax + cmp $0x01000000,%eax + jae 1f + # subnormal x, return x with underflow + fnstsw %ax + and $16,%ax + jnz 2f + fld %st(0) + fmul %st(1) + fstps 4(%esp) +2: ret .global expm1l .type expm1l,@function @@ -14,10 +25,32 @@ expm1l: .type expm1,@function expm1: fldl 4(%esp) + mov 8(%esp),%eax + add %eax,%eax + cmp $0x00200000,%eax + jae 1f + # subnormal x, return x with underflow + fnstsw %ax + and $16,%ax + jnz 2f + fsts 4(%esp) +2: ret 1: fldl2e fmulp + mov $0xc2820000,%eax + push %eax + flds (%esp) + pop %eax + fucomp %st(1) + fnstsw %ax + sahf fld1 - fld %st(1) + jb 1f + # x*log2e < -65, return -1 without underflow + fstp %st(1) + fchs + ret +1: fld %st(1) fabs fucom %st(1) fnstsw %ax @@ -50,12 +83,6 @@ expf: flds 4(%esp) jmp 2f -.global expl -.type expl,@function -expl: - fldt 4(%esp) - jmp 2f - .global exp .type exp,@function exp: @@ -68,52 +95,55 @@ exp: .type exp2,@function exp2: fldl 4(%esp) -1: pushl $0x467ff000 - flds (%esp) - xorl %eax,%eax - pushl $0x80000000 - push %eax +1: sub $12,%esp + fld %st(0) + fstpt (%esp) + mov 8(%esp),%ax + and $0x7fff,%ax + cmp $0x3fff+13,%ax + jb 4f # |x| < 8192 + cmp $0x3fff+15,%ax + jae 3f # |x| >= 32768 + fsts (%esp) + cmpl $0xc67ff800,(%esp) + jb 2f # x > -16382 + movl $0x5f000000,(%esp) + flds (%esp) # 0x1p63 fld %st(1) - fabs - fucom %st(1) + fsub %st(1) + faddp + fucomp %st(1) fnstsw - fstp %st(0) - fstp %st(0) sahf - ja 2f - fld %st(0) - fistpl 8(%esp) - fildl 8(%esp) - fxch %st(1) - fsub %st(1) - mov $0x3fff,%eax - add %eax,8(%esp) + je 2f # x - 0x1p63 + 0x1p63 == x + movl $1,(%esp) + flds (%esp) # 0x1p-149 + fdiv %st(1) + fstps (%esp) # raise underflow +2: fld1 + fld %st(1) + frndint + fxch %st(2) + fsub %st(2) # st(0)=x-rint(x), st(1)=1, st(2)=rint(x) f2xm1 - fld1 - faddp - fldt (%esp) - fmulp + faddp # 2^(x-rint(x)) +1: fscale fstp %st(1) add $12,%esp ret - -2: fld %st(0) +3: xor %eax,%eax +4: cmp $0x3fff-64,%ax + fld1 + jb 1b # |x| < 0x1p-64 fstpt (%esp) - mov 9(%esp),%ah - and $0x7f,%ah - cmp $0x7f,%ah - jne 1f - decb 9(%esp) - fstp %st(0) - fldt (%esp) -1: fld %st(0) - frndint - fxch %st(1) - fsub %st(1) + fistl 8(%esp) + fildl 8(%esp) + fsubrp %st(1) + addl $0x3fff,8(%esp) f2xm1 fld1 - faddp - fscale - fstp %st(1) + faddp # 2^(x-rint(x)) + fldt (%esp) # 2^rint(x) + fmulp add $12,%esp ret