nsz Git - musl/blob - src/math/x86_64/expl.s

   1 # exp(x) = 2^hi + 2^hi (2^lo - 1)
   2 # where hi+lo = log2e*x with 128bit precision
   3 # exact log2e*x calculation depends on nearest rounding mode
   4 # using the exact multiplication method of Dekker and Veltkamp
   5
   6 .global expl
   7 .type expl,@function
   8 expl:
   9         fldt 8(%rsp)
  10
  11                 # special cases: 2*x is +-inf, nan or |x| < 0x1p-32
  12                 # check (exponent|0x8000)+2 < 0xbfff+2-32
  13         movw 16(%rsp), %ax
  14         movw %ax, %dx
  15         orw $0x8000, %dx
  16         addw $2, %dx
  17         cmpw $0xbfff-30, %dx
  18         jnb 3f
  19         cmpw $1, %dx
  20         jbe 1f
  21                 # if |x|<0x1p-32 return 1+x
  22         fld1
  23         jmp 2f
  24 1:      testw %ax, %ax
  25         jns 1f
  26                 # if 2*x == -inf,-nan return -0/x
  27         fldz
  28         fchs
  29         fdivp
  30         ret
  31                 # if 2*x == inf,nan return 2*x
  32 1:      fld %st(0)
  33 2:      faddp
  34         ret
  35
  36                 # should be 0x1.71547652b82fe178p0 == 0x3fff b8aa3b29 5c17f0bc
  37                 # it will be wrong on non-nearest rounding mode
  38 3:      fldl2e
  39         subq $48, %rsp
  40                 # hi = log2e_hi*x
  41                 # 2^hi = exp2l(hi)
  42         fmul %st(1),%st
  43         fld %st(0)
  44         fstpt (%rsp)
  45         fstpt 16(%rsp)
  46         fstpt 32(%rsp)
  47         call exp2l
  48                 # if 2^hi == inf return 2^hi
  49         fld %st(0)
  50         fstpt (%rsp)
  51         cmpw $0x7fff, 8(%rsp)
  52         je 1f
  53         fldt 32(%rsp)
  54         fldt 16(%rsp)
  55                 # fpu stack: 2^hi x hi
  56                 # exact mult: x*log2e
  57         fld %st(1)
  58                 # c = 0x1p32+1
  59         movq $0x41f0000000100000,%rax
  60         pushq %rax
  61         fldl (%rsp)
  62                 # xh = x - c*x + c*x
  63                 # xl = x - xh
  64         fmulp
  65         fld %st(2)
  66         fsub %st(1), %st
  67         faddp
  68         fld %st(2)
  69         fsub %st(1), %st
  70                 # yh = log2e_hi - c*log2e_hi + c*log2e_hi
  71         movq $0x3ff7154765200000,%rax
  72         pushq %rax
  73         fldl (%rsp)
  74                 # fpu stack: 2^hi x hi xh xl yh
  75                 # lo = hi - xh*yh + xl*yh
  76         fld %st(2)
  77         fmul %st(1), %st
  78         fsubp %st, %st(4)
  79         fmul %st(1), %st
  80         faddp %st, %st(3)
  81                 # yl = log2e_hi - yh
  82         movq $0x3de705fc2f000000,%rax
  83         pushq %rax
  84         fldl (%rsp)
  85                 # fpu stack: 2^hi x lo xh xl yl
  86                 # lo += xh*yl + xl*yl
  87         fmul %st, %st(2)
  88         fmulp %st, %st(1)
  89         fxch %st(2)
  90         faddp
  91         faddp
  92                 # log2e_lo
  93         movq $0xbfbe,%rax
  94         pushq %rax
  95         movq $0x82f0025f2dc582ee,%rax
  96         pushq %rax
  97         fldt (%rsp)
  98         addq $40,%rsp
  99                 # fpu stack: 2^hi x lo log2e_lo
 100                 # lo += log2e_lo*x
 101                 # return 2^hi + 2^hi (2^lo - 1)
 102         fmulp %st, %st(2)
 103         faddp
 104         f2xm1
 105         fmul %st(1), %st
 106         faddp
 107 1:      addq $48, %rsp
 108         ret