X-Git-Url: http://nsz.repo.hu/git/?a=blobdiff_plain;f=src%2Fmath%2Fi386%2Fexp.s;h=abb9036957ad04bf9c4579ec5aa3251e69165e3f;hb=7acbbdfd5d6e264dcf8efa13188f0c8edabee047;hp=f4769d59c1ee499beacc8df5974ab45be60b9c8b;hpb=aa1b4dff45788f8205e32ea01da243fe09568aa8;p=musl diff --git a/src/math/i386/exp.s b/src/math/i386/exp.s index f4769d59..abb90369 100644 --- a/src/math/i386/exp.s +++ b/src/math/i386/exp.s @@ -1,3 +1,70 @@ +.global expm1f +.type expm1f,@function +expm1f: + flds 4(%esp) + mov 4(%esp),%eax + add %eax,%eax + cmp $0x01000000,%eax + jae 1f + # subnormal x, return x with underflow + fnstsw %ax + and $16,%ax + jnz 2f + fld %st(0) + fmul %st(1) + fstps 4(%esp) +2: ret + +.global expm1l +.type expm1l,@function +expm1l: + fldt 4(%esp) + jmp 1f + +.global expm1 +.type expm1,@function +expm1: + fldl 4(%esp) + mov 8(%esp),%eax + add %eax,%eax + cmp $0x00200000,%eax + jae 1f + # subnormal x, return x with underflow + fnstsw %ax + and $16,%ax + jnz 2f + fsts 4(%esp) +2: ret +1: fldl2e + fmulp + mov $0xc2820000,%eax + push %eax + flds (%esp) + pop %eax + fucomp %st(1) + fnstsw %ax + sahf + fld1 + jb 1f + # x*log2e < -65, return -1 without underflow + fstp %st(1) + fchs + ret +1: fld %st(1) + fabs + fucom %st(1) + fnstsw %ax + fstp %st(0) + fstp %st(0) + sahf + ja 1f + f2xm1 + ret +1: call 1f + fld1 + fsubrp + ret + .global exp2f .type exp2f,@function exp2f: @@ -16,12 +83,6 @@ expf: flds 4(%esp) jmp 2f -.global expl -.type expl,@function -expl: - fldt 4(%esp) - jmp 2f - .global exp .type exp,@function exp: @@ -34,22 +95,55 @@ exp: .type exp2,@function exp2: fldl 4(%esp) -1: fxam - fnstsw %ax +1: sub $12,%esp + fld %st(0) + fstpt (%esp) + mov 8(%esp),%ax + and $0x7fff,%ax + cmp $0x3fff+13,%ax + jb 4f # |x| < 8192 + cmp $0x3fff+15,%ax + jae 3f # |x| >= 32768 + fsts (%esp) + cmpl $0xc67ff800,(%esp) + jb 2f # x > -16382 + movl $0x5f000000,(%esp) + flds (%esp) # 0x1p63 + fld %st(1) + fsub %st(1) + faddp + fucomp %st(1) + fnstsw sahf - jnp 1f - jnc 1f - fstps 4(%esp) - mov $0xfe,%al - and %al,7(%esp) - flds 4(%esp) -1: fld %st(0) + je 2f # x - 0x1p63 + 0x1p63 == x + movl $1,(%esp) + flds (%esp) # 0x1p-149 + fdiv %st(1) + fstps (%esp) # raise underflow +2: fld1 + fld %st(1) frndint - fxch %st(1) - fsub %st(1) + fxch %st(2) + fsub %st(2) # st(0)=x-rint(x), st(1)=1, st(2)=rint(x) f2xm1 - fld1 - faddp - fscale + faddp # 2^(x-rint(x)) +1: fscale fstp %st(1) + add $12,%esp + ret +3: xor %eax,%eax +4: cmp $0x3fff-64,%ax + fld1 + jb 1b # |x| < 0x1p-64 + fstpt (%esp) + fistl 8(%esp) + fildl 8(%esp) + fsubrp %st(1) + addl $0x3fff,8(%esp) + f2xm1 + fld1 + faddp # 2^(x-rint(x)) + fldt (%esp) # 2^rint(x) + fmulp + add $12,%esp ret