nsz Git - musl/blob - src/math/i386/expl.s

   1 # exp(x) = 2^hi + 2^hi (2^lo - 1)
   2 # where hi+lo = log2e*x with 128bit precision
   3 # exact log2e*x calculation depends on nearest rounding mode
   4 # using the exact multiplication method of Dekker and Veltkamp
   5
   6 .global expl
   7 .type expl,@function
   8 expl:
   9         fldt 4(%esp)
  10
  11                 # special cases: 2*x is +-inf, nan or |x| < 0x1p-32
  12                 # check (exponent|0x8000)+2 < 0xbfff+2-32
  13         movw 12(%esp), %ax
  14         movw %ax, %dx
  15         orw $0x8000, %dx
  16         addw $2, %dx
  17         cmpw $0xbfff-30, %dx
  18         jnb 3f
  19         cmpw $1, %dx
  20         jbe 1f
  21                 # if |x|<0x1p-32 return 1+x
  22         fld1
  23         jmp 2f
  24 1:      testw %ax, %ax
  25         jns 1f
  26                 # if 2*x == -inf,-nan return -0/x
  27         fldz
  28         fchs
  29         fdivp
  30         ret
  31                 # if 2*x == inf,nan return 2*x
  32 1:      fld %st(0)
  33 2:      faddp
  34         ret
  35
  36                 # should be 0x1.71547652b82fe178p0 == 0x3fff b8aa3b29 5c17f0bc
  37                 # it will be wrong on non-nearest rounding mode
  38 3:      fldl2e
  39         subl $44, %esp
  40                 # hi = log2e_hi*x
  41                 # 2^hi = exp2l(hi)
  42         fmul %st(1),%st
  43         fld %st(0)
  44         fstpt (%esp)
  45         fstpt 16(%esp)
  46         fstpt 32(%esp)
  47         call exp2l
  48                 # if 2^hi == inf return 2^hi
  49         fld %st(0)
  50         fstpt (%esp)
  51         cmpw $0x7fff, 8(%esp)
  52         je 1f
  53         fldt 32(%esp)
  54         fldt 16(%esp)
  55                 # fpu stack: 2^hi x hi
  56                 # exact mult: x*log2e
  57         fld %st(1)
  58                 # c = 0x1p32+1
  59         pushl $0x41f00000
  60         pushl $0x00100000
  61         fldl (%esp)
  62                 # xh = x - c*x + c*x
  63                 # xl = x - xh
  64         fmulp
  65         fld %st(2)
  66         fsub %st(1), %st
  67         faddp
  68         fld %st(2)
  69         fsub %st(1), %st
  70                 # yh = log2e_hi - c*log2e_hi + c*log2e_hi
  71         pushl $0x3ff71547
  72         pushl $0x65200000
  73         fldl (%esp)
  74                 # fpu stack: 2^hi x hi xh xl yh
  75                 # lo = hi - xh*yh + xl*yh
  76         fld %st(2)
  77         fmul %st(1), %st
  78         fsubp %st, %st(4)
  79         fmul %st(1), %st
  80         faddp %st, %st(3)
  81                 # yl = log2e_hi - yh
  82         pushl $0x3de705fc
  83         pushl $0x2f000000
  84         fldl (%esp)
  85                 # fpu stack: 2^hi x lo xh xl yl
  86                 # lo += xh*yl + xl*yl
  87         fmul %st, %st(2)
  88         fmulp %st, %st(1)
  89         fxch %st(2)
  90         faddp
  91         faddp
  92                 # log2e_lo
  93         pushl $0xbfbe
  94         pushl $0x82f0025f
  95         pushl $0x2dc582ee
  96         fldt (%esp)
  97         addl $36,%esp
  98                 # fpu stack: 2^hi x lo log2e_lo
  99                 # lo += log2e_lo*x
 100                 # return 2^hi + 2^hi (2^lo - 1)
 101         fmulp %st, %st(2)
 102         faddp
 103         f2xm1
 104         fmul %st(1), %st
 105         faddp
 106 1:      addl $44, %esp
 107         ret