X-Git-Url: http://nsz.repo.hu/git/?a=blobdiff_plain;f=src%2Fmath%2Fi386%2Fexp.s;h=abb9036957ad04bf9c4579ec5aa3251e69165e3f;hb=7acbbdfd5d6e264dcf8efa13188f0c8edabee047;hp=f4769d59c1ee499beacc8df5974ab45be60b9c8b;hpb=aa1b4dff45788f8205e32ea01da243fe09568aa8;p=musl

diff --git a/src/math/i386/exp.s b/src/math/i386/exp.s
index f4769d59..abb90369 100644
--- a/src/math/i386/exp.s
+++ b/src/math/i386/exp.s
@@ -1,3 +1,70 @@
+.global expm1f
+.type expm1f,@function
+expm1f:
+	flds 4(%esp)
+	mov 4(%esp),%eax
+	add %eax,%eax
+	cmp $0x01000000,%eax
+	jae 1f
+		# subnormal x, return x with underflow
+	fnstsw %ax
+	and $16,%ax
+	jnz 2f
+	fld %st(0)
+	fmul %st(1)
+	fstps 4(%esp)
+2:	ret
+
+.global expm1l
+.type expm1l,@function
+expm1l:
+	fldt 4(%esp)
+	jmp 1f
+
+.global expm1
+.type expm1,@function
+expm1:
+	fldl 4(%esp)
+	mov 8(%esp),%eax
+	add %eax,%eax
+	cmp $0x00200000,%eax
+	jae 1f
+		# subnormal x, return x with underflow
+	fnstsw %ax
+	and $16,%ax
+	jnz 2f
+	fsts 4(%esp)
+2:	ret
+1:	fldl2e
+	fmulp
+	mov $0xc2820000,%eax
+	push %eax
+	flds (%esp)
+	pop %eax
+	fucomp %st(1)
+	fnstsw %ax
+	sahf
+	fld1
+	jb 1f
+		# x*log2e < -65, return -1 without underflow
+	fstp %st(1)
+	fchs
+	ret
+1:	fld %st(1)
+	fabs
+	fucom %st(1)
+	fnstsw %ax
+	fstp %st(0)
+	fstp %st(0)
+	sahf
+	ja 1f
+	f2xm1
+	ret
+1:	call 1f
+	fld1
+	fsubrp
+	ret
+
 .global exp2f
 .type exp2f,@function
 exp2f:
@@ -16,12 +83,6 @@ expf:
 	flds 4(%esp)
 	jmp 2f
 
-.global expl
-.type expl,@function
-expl:
-	fldt 4(%esp)
-	jmp 2f
-
 .global exp
 .type exp,@function
 exp:
@@ -34,22 +95,55 @@ exp:
 .type exp2,@function
 exp2:
 	fldl 4(%esp)
-1:	fxam
-	fnstsw %ax
+1:	sub $12,%esp
+	fld %st(0)
+	fstpt (%esp)
+	mov 8(%esp),%ax
+	and $0x7fff,%ax
+	cmp $0x3fff+13,%ax
+	jb 4f             # |x| < 8192
+	cmp $0x3fff+15,%ax
+	jae 3f            # |x| >= 32768
+	fsts (%esp)
+	cmpl $0xc67ff800,(%esp)
+	jb 2f             # x > -16382
+	movl $0x5f000000,(%esp)
+	flds (%esp)       # 0x1p63
+	fld %st(1)
+	fsub %st(1)
+	faddp
+	fucomp %st(1)
+	fnstsw
 	sahf
-	jnp 1f
-	jnc 1f
-	fstps 4(%esp)
-	mov $0xfe,%al
-	and %al,7(%esp)
-	flds 4(%esp)
-1:	fld %st(0)
+	je 2f             # x - 0x1p63 + 0x1p63 == x
+	movl $1,(%esp)
+	flds (%esp)       # 0x1p-149
+	fdiv %st(1)
+	fstps (%esp)      # raise underflow
+2:	fld1
+	fld %st(1)
 	frndint
-	fxch %st(1)
-	fsub %st(1)
+	fxch %st(2)
+	fsub %st(2)       # st(0)=x-rint(x), st(1)=1, st(2)=rint(x)
 	f2xm1
-	fld1
-	faddp
-	fscale
+	faddp             # 2^(x-rint(x))
+1:	fscale
 	fstp %st(1)
+	add $12,%esp
+	ret
+3:	xor %eax,%eax
+4:	cmp $0x3fff-64,%ax
+	fld1
+	jb 1b             # |x| < 0x1p-64
+	fstpt (%esp)
+	fistl 8(%esp)
+	fildl 8(%esp)
+	fsubrp %st(1)
+	addl $0x3fff,8(%esp)
+	f2xm1
+	fld1
+	faddp             # 2^(x-rint(x))
+	fldt (%esp)       # 2^rint(x)
+	fmulp
+	add $12,%esp
 	ret