math: fix x86 asin, atan, exp, log1p to raise underflow

author Szabolcs Nagy <nsz@port70.net>

Thu, 15 Aug 2013 10:56:57 +0000 (10:56 +0000)

committer Szabolcs Nagy <nsz@port70.net>

Thu, 15 Aug 2013 10:56:57 +0000 (10:56 +0000)
author Szabolcs Nagy <nsz@port70.net>
Thu, 15 Aug 2013 10:56:57 +0000 (10:56 +0000)
committer Szabolcs Nagy <nsz@port70.net>
Thu, 15 Aug 2013 10:56:57 +0000 (10:56 +0000)
diff --git a/src/math/i386/asin.s b/src/math/i386/asin.s

index 932c754..a9f691b 100644 (file)
--- a/src/math/i386/asin.s
+++ b/src/math/i386/asin.s
@@ -2,7 +2,18 @@
  .type asinf,@function
  asinf:
         flds 4(%esp)
-       jmp 1f
+       mov 4(%esp),%eax
+       add %eax,%eax
+       cmp $0x01000000,%eax
+       jae 1f
+               # subnormal x, return x with underflow
+       fnstsw %ax
+       and $16,%ax
+       jnz 2f
+       fld %st(0)
+       fmul %st(1)
+       fstps 4(%esp)
+2:     ret
  
  .global asinl
  .type asinl,@function
@@ -14,6 +25,16 @@ asinl:
  .type asin,@function
  asin:
         fldl 4(%esp)
+       mov 8(%esp),%eax
+       add %eax,%eax
+       cmp $0x00200000,%eax
+       jae 1f
+               # subnormal x, return x with underflow
+       fnstsw %ax
+       and $16,%ax
+       jnz 2f
+       fsts 4(%esp)
+2:     ret
  1:     fld %st(0)
         fld1
         fsub %st(0),%st(1)
diff --git a/src/math/i386/atan.s b/src/math/i386/atan.s

index 7e28b39..d73137b 100644 (file)
--- a/src/math/i386/atan.s
+++ b/src/math/i386/atan.s
@@ -2,6 +2,16 @@
  .type atan,@function
  atan:
         fldl 4(%esp)
+       mov 8(%esp),%eax
+       add %eax,%eax
+       cmp $0x00200000,%eax
+       jb 1f
         fld1
         fpatan
         ret
+               # subnormal x, return x with underflow
+1:     fnstsw %ax
+       and $16,%ax
+       jnz 2f
+       fsts 4(%esp)
+2:     ret
diff --git a/src/math/i386/atanf.s b/src/math/i386/atanf.s

index 3cd4023..8caddef 100644 (file)
--- a/src/math/i386/atanf.s
+++ b/src/math/i386/atanf.s
@@ -2,6 +2,18 @@
  .type atanf,@function
  atanf:
         flds 4(%esp)
+       mov 4(%esp),%eax
+       add %eax,%eax
+       cmp $0x01000000,%eax
+       jb 1f
         fld1
         fpatan
         ret
+               # subnormal x, return x with underflow
+1:     fnstsw %ax
+       and $16,%ax
+       jnz 2f
+       fld %st(0)
+       fmul %st(1)
+       fstps 4(%esp)
+2:     ret
diff --git a/src/math/i386/exp.s b/src/math/i386/exp.s

index e3b42af..e5f5458 100644 (file)
--- a/src/math/i386/exp.s
+++ b/src/math/i386/exp.s
@@ -2,7 +2,18 @@
  .type expm1f,@function
  expm1f:
         flds 4(%esp)
-       jmp 1f
+       mov 4(%esp),%eax
+       add %eax,%eax
+       cmp $0x01000000,%eax
+       jae 1f
+               # subnormal x, return x with underflow
+       fnstsw %ax
+       and $16,%ax
+       jnz 2f
+       fld %st(0)
+       fmul %st(1)
+       fstps 4(%esp)
+2:     ret
  
  .global expm1l
  .type expm1l,@function
@@ -14,10 +25,32 @@ expm1l:
  .type expm1,@function
  expm1:
         fldl 4(%esp)
+       mov 8(%esp),%eax
+       add %eax,%eax
+       cmp $0x00200000,%eax
+       jae 1f
+               # subnormal x, return x with underflow
+       fnstsw %ax
+       and $16,%ax
+       jnz 2f
+       fsts 4(%esp)
+2:     ret
  1:     fldl2e
         fmulp
+       mov $0xc2820000,%eax
+       push %eax
+       flds (%esp)
+       pop %eax
+       fucomp %st(1)
+       fnstsw %ax
+       sahf
         fld1
-       fld %st(1)
+       jb 1f
+               # x*log2e < -65, return -1 without underflow
+       fstp %st(1)
+       fchs
+       ret
+1:     fld %st(1)
         fabs
         fucom %st(1)
         fnstsw %ax
diff --git a/src/math/i386/log1p.s b/src/math/i386/log1p.s

index 9971e53..6b6929c 100644 (file)
--- a/src/math/i386/log1p.s
+++ b/src/math/i386/log1p.s
@@ -7,9 +7,18 @@ log1p:
         fldl 4(%esp)
         cmp $0x3fd28f00,%eax
         ja 1f
+       cmp $0x00100000,%eax
+       jb 2f
         fyl2xp1
         ret
  1:     fld1
         faddp
         fyl2x
         ret
+               # subnormal x, return x with underflow
+2:     fnstsw %ax
+       and $16,%ax
+       jnz 1f
+       fsts 4(%esp)
+       fstp %st(1)
+1:     ret
diff --git a/src/math/i386/log1pf.s b/src/math/i386/log1pf.s

index 2680a8a..c0bcd30 100644 (file)
--- a/src/math/i386/log1pf.s
+++ b/src/math/i386/log1pf.s
@@ -7,9 +7,19 @@ log1pf:
         flds 4(%esp)
         cmp $0x3e940000,%eax
         ja 1f
+       cmp $0x00800000,%eax
+       jb 2f
         fyl2xp1
         ret
  1:     fld1
         faddp
         fyl2x
         ret
+               # subnormal x, return x with underflow
+2:     fnstsw %ax
+       and $16,%ax
+       jnz 1f
+       fxch
+       fmul %st(1)
+       fstps 4(%esp)
+1:     ret
author	Szabolcs Nagy <nsz@port70.net>
	Thu, 15 Aug 2013 10:56:57 +0000 (10:56 +0000)
committer	Szabolcs Nagy <nsz@port70.net>
	Thu, 15 Aug 2013 10:56:57 +0000 (10:56 +0000)
src/math/i386/asin.s		patch \| blob \| history
src/math/i386/atan.s		patch \| blob \| history
src/math/i386/atanf.s		patch \| blob \| history
src/math/i386/exp.s		patch \| blob \| history
src/math/i386/log1p.s		patch \| blob \| history
src/math/i386/log1pf.s		patch \| blob \| history