X-Git-Url: http://nsz.repo.hu/git/?a=blobdiff_plain;f=src%2Fmath%2Fi386%2Fexp.s;h=abb9036957ad04bf9c4579ec5aa3251e69165e3f;hb=7acbbdfd5d6e264dcf8efa13188f0c8edabee047;hp=ca0de1d4f2ac5b7a5db23de24777fb75bd0c7002;hpb=97721a5508415a2f10eb068e022093811c9ff8be;p=musl

diff --git a/src/math/i386/exp.s b/src/math/i386/exp.s
index ca0de1d4..abb90369 100644
--- a/src/math/i386/exp.s
+++ b/src/math/i386/exp.s
@@ -2,7 +2,18 @@
 .type expm1f,@function
 expm1f:
 	flds 4(%esp)
-	jmp 1f
+	mov 4(%esp),%eax
+	add %eax,%eax
+	cmp $0x01000000,%eax
+	jae 1f
+		# subnormal x, return x with underflow
+	fnstsw %ax
+	and $16,%ax
+	jnz 2f
+	fld %st(0)
+	fmul %st(1)
+	fstps 4(%esp)
+2:	ret
 
 .global expm1l
 .type expm1l,@function
@@ -14,10 +25,32 @@ expm1l:
 .type expm1,@function
 expm1:
 	fldl 4(%esp)
+	mov 8(%esp),%eax
+	add %eax,%eax
+	cmp $0x00200000,%eax
+	jae 1f
+		# subnormal x, return x with underflow
+	fnstsw %ax
+	and $16,%ax
+	jnz 2f
+	fsts 4(%esp)
+2:	ret
 1:	fldl2e
 	fmulp
+	mov $0xc2820000,%eax
+	push %eax
+	flds (%esp)
+	pop %eax
+	fucomp %st(1)
+	fnstsw %ax
+	sahf
 	fld1
-	fld %st(1)
+	jb 1f
+		# x*log2e < -65, return -1 without underflow
+	fstp %st(1)
+	fchs
+	ret
+1:	fld %st(1)
 	fabs
 	fucom %st(1)
 	fnstsw %ax
@@ -50,12 +83,6 @@ expf:
 	flds 4(%esp)
 	jmp 2f
 
-.global expl
-.type expl,@function
-expl:
-	fldt 4(%esp)
-	jmp 2f
-
 .global exp
 .type exp,@function
 exp:
@@ -68,52 +95,55 @@ exp:
 .type exp2,@function
 exp2:
 	fldl 4(%esp)
-1:	pushl $0x467ff000
-	flds (%esp)
-	xorl %eax,%eax
-	pushl $0x80000000
-	push %eax
+1:	sub $12,%esp
+	fld %st(0)
+	fstpt (%esp)
+	mov 8(%esp),%ax
+	and $0x7fff,%ax
+	cmp $0x3fff+13,%ax
+	jb 4f             # |x| < 8192
+	cmp $0x3fff+15,%ax
+	jae 3f            # |x| >= 32768
+	fsts (%esp)
+	cmpl $0xc67ff800,(%esp)
+	jb 2f             # x > -16382
+	movl $0x5f000000,(%esp)
+	flds (%esp)       # 0x1p63
 	fld %st(1)
-	fabs
-	fucom %st(1)
+	fsub %st(1)
+	faddp
+	fucomp %st(1)
 	fnstsw
-	fstp %st(0)
-	fstp %st(0)
 	sahf
-	ja 2f
-	fld %st(0)
-	fistpl 8(%esp)
-	fildl 8(%esp)
-	fxch %st(1)
-	fsub %st(1)
-	mov $0x3fff,%eax
-	add %eax,8(%esp)
+	je 2f             # x - 0x1p63 + 0x1p63 == x
+	movl $1,(%esp)
+	flds (%esp)       # 0x1p-149
+	fdiv %st(1)
+	fstps (%esp)      # raise underflow
+2:	fld1
+	fld %st(1)
+	frndint
+	fxch %st(2)
+	fsub %st(2)       # st(0)=x-rint(x), st(1)=1, st(2)=rint(x)
 	f2xm1
-	fld1
-	faddp
-	fldt (%esp)
-	fmulp
+	faddp             # 2^(x-rint(x))
+1:	fscale
 	fstp %st(1)
 	add $12,%esp
 	ret
-
-2:	fld %st(0)
+3:	xor %eax,%eax
+4:	cmp $0x3fff-64,%ax
+	fld1
+	jb 1b             # |x| < 0x1p-64
 	fstpt (%esp)
-	mov 9(%esp),%ah
-	and $0x7f,%ah
-	cmp $0x7f,%ah
-	jne 1f
-	decb 9(%esp)
-	fstp %st(0)
-	fldt (%esp)
-1:	fld %st(0)
-	frndint
-	fxch %st(1)
-	fsub %st(1)
+	fistl 8(%esp)
+	fildl 8(%esp)
+	fsubrp %st(1)
+	addl $0x3fff,8(%esp)
 	f2xm1
 	fld1
-	faddp
-	fscale
-	fstp %st(1)
+	faddp             # 2^(x-rint(x))
+	fldt (%esp)       # 2^rint(x)
+	fmulp
 	add $12,%esp
 	ret