math: fix exp2l asm on x86 (raise underflow correctly)

author Szabolcs Nagy <nsz@port70.net>

Thu, 5 Sep 2013 10:58:48 +0000 (10:58 +0000)

committer Szabolcs Nagy <nsz@port70.net>

Thu, 5 Sep 2013 11:30:09 +0000 (11:30 +0000)
author Szabolcs Nagy <nsz@port70.net>
Thu, 5 Sep 2013 10:58:48 +0000 (10:58 +0000)
committer Szabolcs Nagy <nsz@port70.net>
Thu, 5 Sep 2013 11:30:09 +0000 (11:30 +0000)
diff --git a/src/math/i386/exp.s b/src/math/i386/exp.s

index e5f5458..abb9036 100644 (file)
--- a/src/math/i386/exp.s
+++ b/src/math/i386/exp.s
@@ -95,42 +95,32 @@ exp:
  .type exp2,@function
  exp2:
         fldl 4(%esp)
-1:     pushl $0x467ff000
-       flds (%esp)       # 16380
-       xorl %eax,%eax
-       pushl $0x80000000
-       push %eax
-       fld %st(1)
-       fabs
-       fucomp %st(1)
-       fnstsw
-       fstp %st(0)
-       sahf
-       ja 3f             # |x| > 16380
-       jp 2f             # x is nan (avoid invalid except in fistp)
+1:     sub $12,%esp
         fld %st(0)
-       fistpl 8(%esp)
-       fildl 8(%esp)
-       fxch %st(1)
-       fsub %st(1)
-       mov $0x3fff,%eax
-       add %eax,8(%esp)
-       f2xm1
-       fld1
-       faddp             # 2^(x-rint(x))
-       fldt (%esp)       # 2^rint(x)
-       fmulp
-       fstp %st(1)
-2:     add $12,%esp
-       ret
-
-3:     fld %st(0)
         fstpt (%esp)
-       fld1
         mov 8(%esp),%ax
         and $0x7fff,%ax
-       cmp $0x7fff,%ax
-       je 1f             # x = +-inf
+       cmp $0x3fff+13,%ax
+       jb 4f             # |x| < 8192
+       cmp $0x3fff+15,%ax
+       jae 3f            # |x| >= 32768
+       fsts (%esp)
+       cmpl $0xc67ff800,(%esp)
+       jb 2f             # x > -16382
+       movl $0x5f000000,(%esp)
+       flds (%esp)       # 0x1p63
+       fld %st(1)
+       fsub %st(1)
+       faddp
+       fucomp %st(1)
+       fnstsw
+       sahf
+       je 2f             # x - 0x1p63 + 0x1p63 == x
+       movl $1,(%esp)
+       flds (%esp)       # 0x1p-149
+       fdiv %st(1)
+       fstps (%esp)      # raise underflow
+2:     fld1
         fld %st(1)
         frndint
         fxch %st(2)
@@ -141,3 +131,19 @@ exp2:
         fstp %st(1)
         add $12,%esp
         ret
+3:     xor %eax,%eax
+4:     cmp $0x3fff-64,%ax
+       fld1
+       jb 1b             # |x| < 0x1p-64
+       fstpt (%esp)
+       fistl 8(%esp)
+       fildl 8(%esp)
+       fsubrp %st(1)
+       addl $0x3fff,8(%esp)
+       f2xm1
+       fld1
+       faddp             # 2^(x-rint(x))
+       fldt (%esp)       # 2^rint(x)
+       fmulp
+       add $12,%esp
+       ret
diff --git a/src/math/x86_64/exp2l.s b/src/math/x86_64/exp2l.s

index 1f8ed7b..e714588 100644 (file)
--- a/src/math/x86_64/exp2l.s
+++ b/src/math/x86_64/exp2l.s
@@ -26,44 +26,32 @@ expm1l:
  .type exp2l,@function
  exp2l:
         fldt 8(%rsp)
-1:     mov $0x467ff000,%eax
-       mov %eax,-16(%rsp)
-       mov $0x80000000,%eax
-       mov %eax,-20(%rsp)
-       xor %eax,%eax
-       mov %eax,-24(%rsp)
-       flds -16(%rsp)    # 16380
+1:     fld %st(0)
+       sub $16,%rsp
+       fstpt (%rsp)
+       mov 8(%rsp),%ax
+       and $0x7fff,%ax
+       cmp $0x3fff+13,%ax
+       jb 4f             # |x| < 8192
+       cmp $0x3fff+15,%ax
+       jae 3f            # |x| >= 32768
+       fsts (%rsp)
+       cmpl $0xc67ff800,(%rsp)
+       jb 2f             # x > -16382
+       movl $0x5f000000,(%rsp)
+       flds (%rsp)       # 0x1p63
         fld %st(1)
-       fabs
-       fucom %st(1)
+       fsub %st(1)
+       faddp
+       fucomp %st(1)
         fnstsw
-       fstp %st(0)
-       fstp %st(0)
         sahf
-       ja 3f             # |x| > 16380
-       jp 2f             # x is nan (avoid invalid except in fistp)
-       fld %st(0)
-       fistpl -16(%rsp)
-       fildl -16(%rsp)
-       fxch %st(1)
-       fsub %st(1)
-       mov $0x3fff,%eax
-       add %eax,-16(%rsp)
-       f2xm1
-       fld1
-       faddp             # 2^(x-rint(x))
-       fldt -24(%rsp)    # 2^rint(x)
-       fmulp
-2:     fstp %st(1)
-       ret
-
-3:     fld %st(0)
-       fstpt -24(%rsp)
-       fld1
-       mov -15(%rsp),%ax
-       and $0x7fff,%ax
-       cmp $0x7fff,%ax
-       je 1f             # x = +-inf
+       je 2f             # x - 0x1p63 + 0x1p63 == x
+       movl $1,(%rsp)
+       flds (%rsp)       # 0x1p-149
+       fdiv %st(1)
+       fstps (%rsp)      # raise underflow
+2:     fld1
         fld %st(1)
         frndint
         fxch %st(2)
@@ -72,4 +60,21 @@ exp2l:
         faddp             # 2^(x-rint(x))
  1:     fscale
         fstp %st(1)
+       add $16,%rsp
+       ret
+3:     xor %eax,%eax
+4:     cmp $0x3fff-64,%ax
+       fld1
+       jb 1b             # |x| < 0x1p-64
+       fstpt (%rsp)
+       fistl 8(%rsp)
+       fildl 8(%rsp)
+       fsubrp %st(1)
+       addl $0x3fff,8(%rsp)
+       f2xm1
+       fld1
+       faddp             # 2^(x-rint(x))
+       fldt (%rsp)       # 2^rint(x)
+       fmulp
+       add $16,%rsp
         ret
author	Szabolcs Nagy <nsz@port70.net>
	Thu, 5 Sep 2013 10:58:48 +0000 (10:58 +0000)
committer	Szabolcs Nagy <nsz@port70.net>
	Thu, 5 Sep 2013 11:30:09 +0000 (11:30 +0000)
src/math/i386/exp.s		patch \| blob \| history
src/math/x86_64/exp2l.s		patch \| blob \| history