math: fix i386/expl.s with more precise x*log2e

[musl] / src / math / i386 / exp.s
diff --git a/src/math/i386/exp.s b/src/math/i386/exp.s

index 76ab4d6..e3b42af 100644 (file)
--- a/src/math/i386/exp.s
+++ b/src/math/i386/exp.s
@@ -50,12 +50,6 @@ expf:
         flds 4(%esp)
         jmp 2f
  
-.global expl
-.type expl,@function
-expl:
-       fldt 4(%esp)
-       jmp 2f
-
  .global exp
  .type exp,@function
  exp:
@@ -68,21 +62,19 @@ exp:
  .type exp2,@function
  exp2:
         fldl 4(%esp)
-1:     mov $0x47000000,%eax
-       push %eax
-       flds (%esp)
-       shl $7,%eax
-       push %eax
-       add %eax,%eax
+1:     pushl $0x467ff000
+       flds (%esp)       # 16380
+       xorl %eax,%eax
+       pushl $0x80000000
         push %eax
         fld %st(1)
         fabs
-       fucom %st(1)
+       fucomp %st(1)
         fnstsw
-       sahf
-       ja 2f
-       fstp %st(0)
         fstp %st(0)
+       sahf
+       ja 3f             # |x| > 16380
+       jp 2f             # x is nan (avoid invalid except in fistp)
         fld %st(0)
         fistpl 8(%esp)
         fildl 8(%esp)
@@ -92,29 +84,27 @@ exp2:
         add %eax,8(%esp)
         f2xm1
         fld1
-       faddp
-       fldt (%esp)
+       faddp             # 2^(x-rint(x))
+       fldt (%esp)       # 2^rint(x)
         fmulp
         fstp %st(1)
-       add $12,%esp
+2:     add $12,%esp
         ret
  
-2:     fstp %st(0)
-       fstp %st(0)
-       fsts 8(%esp)
-       mov 8(%esp),%eax
-       lea (%eax,%eax),%ecx
-       cmp $0xff000000,%ecx
-       ja 2f
-       fstp %st(0)
-       xor %ecx,%ecx
-       inc %ecx
-       add %eax,%eax
-       jc 1f
-       mov $0x7ffe,%ecx
-1:     mov %ecx,8(%esp)
-       fldt (%esp)
-       fld %st(0)
-       fmulp
-2:     add $12,%esp
+3:     fld %st(0)
+       fstpt (%esp)
+       fld1
+       mov 8(%esp),%ax
+       and $0x7fff,%ax
+       cmp $0x7fff,%ax
+       je 1f             # x = +-inf
+       fld %st(1)
+       frndint
+       fxch %st(2)
+       fsub %st(2)       # st(0)=x-rint(x), st(1)=1, st(2)=rint(x)
+       f2xm1
+       faddp             # 2^(x-rint(x))
+1:     fscale
+       fstp %st(1)
+       add $12,%esp
         ret