assembly optimizations for fmod/remainder functions
[musl] / src / math / lgammaf_r.c
1 /* origin: FreeBSD /usr/src/lib/msun/src/e_lgammaf_r.c */
2 /*
3  * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
4  */
5 /*
6  * ====================================================
7  * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
8  *
9  * Developed at SunPro, a Sun Microsystems, Inc. business.
10  * Permission to use, copy, modify, and distribute this
11  * software is freely granted, provided that this notice
12  * is preserved.
13  * ====================================================
14  */
15
16 #include "libm.h"
17
18 static const float
19 two23= 8.3886080000e+06, /* 0x4b000000 */
20 half=  5.0000000000e-01, /* 0x3f000000 */
21 one =  1.0000000000e+00, /* 0x3f800000 */
22 pi  =  3.1415927410e+00, /* 0x40490fdb */
23 a0  =  7.7215664089e-02, /* 0x3d9e233f */
24 a1  =  3.2246702909e-01, /* 0x3ea51a66 */
25 a2  =  6.7352302372e-02, /* 0x3d89f001 */
26 a3  =  2.0580807701e-02, /* 0x3ca89915 */
27 a4  =  7.3855509982e-03, /* 0x3bf2027e */
28 a5  =  2.8905137442e-03, /* 0x3b3d6ec6 */
29 a6  =  1.1927076848e-03, /* 0x3a9c54a1 */
30 a7  =  5.1006977446e-04, /* 0x3a05b634 */
31 a8  =  2.2086278477e-04, /* 0x39679767 */
32 a9  =  1.0801156895e-04, /* 0x38e28445 */
33 a10 =  2.5214456400e-05, /* 0x37d383a2 */
34 a11 =  4.4864096708e-05, /* 0x383c2c75 */
35 tc  =  1.4616321325e+00, /* 0x3fbb16c3 */
36 tf  = -1.2148628384e-01, /* 0xbdf8cdcd */
37 /* tt = -(tail of tf) */
38 tt  =  6.6971006518e-09, /* 0x31e61c52 */
39 t0  =  4.8383611441e-01, /* 0x3ef7b95e */
40 t1  = -1.4758771658e-01, /* 0xbe17213c */
41 t2  =  6.4624942839e-02, /* 0x3d845a15 */
42 t3  = -3.2788541168e-02, /* 0xbd064d47 */
43 t4  =  1.7970675603e-02, /* 0x3c93373d */
44 t5  = -1.0314224288e-02, /* 0xbc28fcfe */
45 t6  =  6.1005386524e-03, /* 0x3bc7e707 */
46 t7  = -3.6845202558e-03, /* 0xbb7177fe */
47 t8  =  2.2596477065e-03, /* 0x3b141699 */
48 t9  = -1.4034647029e-03, /* 0xbab7f476 */
49 t10 =  8.8108185446e-04, /* 0x3a66f867 */
50 t11 = -5.3859531181e-04, /* 0xba0d3085 */
51 t12 =  3.1563205994e-04, /* 0x39a57b6b */
52 t13 = -3.1275415677e-04, /* 0xb9a3f927 */
53 t14 =  3.3552918467e-04, /* 0x39afe9f7 */
54 u0  = -7.7215664089e-02, /* 0xbd9e233f */
55 u1  =  6.3282704353e-01, /* 0x3f2200f4 */
56 u2  =  1.4549225569e+00, /* 0x3fba3ae7 */
57 u3  =  9.7771751881e-01, /* 0x3f7a4bb2 */
58 u4  =  2.2896373272e-01, /* 0x3e6a7578 */
59 u5  =  1.3381091878e-02, /* 0x3c5b3c5e */
60 v1  =  2.4559779167e+00, /* 0x401d2ebe */
61 v2  =  2.1284897327e+00, /* 0x4008392d */
62 v3  =  7.6928514242e-01, /* 0x3f44efdf */
63 v4  =  1.0422264785e-01, /* 0x3dd572af */
64 v5  =  3.2170924824e-03, /* 0x3b52d5db */
65 s0  = -7.7215664089e-02, /* 0xbd9e233f */
66 s1  =  2.1498242021e-01, /* 0x3e5c245a */
67 s2  =  3.2577878237e-01, /* 0x3ea6cc7a */
68 s3  =  1.4635047317e-01, /* 0x3e15dce6 */
69 s4  =  2.6642270386e-02, /* 0x3cda40e4 */
70 s5  =  1.8402845599e-03, /* 0x3af135b4 */
71 s6  =  3.1947532989e-05, /* 0x3805ff67 */
72 r1  =  1.3920053244e+00, /* 0x3fb22d3b */
73 r2  =  7.2193557024e-01, /* 0x3f38d0c5 */
74 r3  =  1.7193385959e-01, /* 0x3e300f6e */
75 r4  =  1.8645919859e-02, /* 0x3c98bf54 */
76 r5  =  7.7794247773e-04, /* 0x3a4beed6 */
77 r6  =  7.3266842264e-06, /* 0x36f5d7bd */
78 w0  =  4.1893854737e-01, /* 0x3ed67f1d */
79 w1  =  8.3333335817e-02, /* 0x3daaaaab */
80 w2  = -2.7777778450e-03, /* 0xbb360b61 */
81 w3  =  7.9365057172e-04, /* 0x3a500cfd */
82 w4  = -5.9518753551e-04, /* 0xba1c065c */
83 w5  =  8.3633989561e-04, /* 0x3a5b3dd2 */
84 w6  = -1.6309292987e-03; /* 0xbad5c4e8 */
85
86 static const float zero = 0.0000000000e+00;
87
88 static float sin_pif(float x)
89 {
90         float y,z;
91         int n,ix;
92
93         GET_FLOAT_WORD(ix, x);
94         ix &= 0x7fffffff;
95
96         if(ix < 0x3e800000)
97                 return __sindf(pi*x);
98
99         y = -x;  /* negative x is assumed */
100
101         /*
102          * argument reduction, make sure inexact flag not raised if input
103          * is an integer
104          */
105         z = floorf(y);
106         if (z != y) {   /* inexact anyway */
107                 y *= 0.5f;
108                 y  = 2.0f*(y - floorf(y));   /* y = |x| mod 2.0 */
109                 n  = (int)(y*4.0f);
110         } else {
111                 if (ix >= 0x4b800000) {
112                         y = zero;  /* y must be even */
113                         n = 0;
114                 } else {
115                         if (ix < 0x4b000000)
116                                 z = y + two23;  /* exact */
117                         GET_FLOAT_WORD(n, z);
118                         n &= 1;
119                         y = n;
120                         n <<= 2;
121                 }
122         }
123         switch (n) {
124         case 0:  y =  __sindf(pi*y); break;
125         case 1:
126         case 2:  y =  __cosdf(pi*(0.5f - y)); break;
127         case 3:
128         case 4:  y =  __sindf(pi*(one - y)); break;
129         case 5:
130         case 6:  y = -__cosdf(pi*(y - 1.5f)); break;
131         default: y =  __sindf(pi*(y - 2.0f)); break;
132         }
133         return -y;
134 }
135
136
137 float __lgammaf_r(float x, int *signgamp)
138 {
139         float t,y,z,nadj,p,p1,p2,p3,q,r,w;
140         int32_t hx;
141         int i,ix;
142
143         GET_FLOAT_WORD(hx, x);
144
145         /* purge off +-inf, NaN, +-0, tiny and negative arguments */
146         *signgamp = 1;
147         ix = hx & 0x7fffffff;
148         if (ix >= 0x7f800000)
149                 return x*x;
150         if (ix == 0)
151                 return one/zero;
152         if (ix < 0x35000000) {  /* |x| < 2**-21, return -log(|x|) */
153                 if (hx < 0) {
154                         *signgamp = -1;
155                         return -logf(-x);
156                 }
157                 return -logf(x);
158         }
159         if (hx < 0) {
160                 if (ix >= 0x4b000000)  /* |x| >= 2**23, must be -integer */
161                         return one/zero;
162                 t = sin_pif(x);
163                 if (t == zero) /* -integer */
164                         return one/zero;
165                 nadj = logf(pi/fabsf(t*x));
166                 if (t < zero)
167                         *signgamp = -1;
168                 x = -x;
169         }
170
171         /* purge off 1 and 2 */
172         if (ix == 0x3f800000 || ix == 0x40000000)
173                 r = 0;
174         /* for x < 2.0 */
175         else if (ix < 0x40000000) {
176                 if (ix <= 0x3f666666) {  /* lgamma(x) = lgamma(x+1)-log(x) */
177                         r = -logf(x);
178                         if (ix >= 0x3f3b4a20) {
179                                 y = one - x;
180                                 i = 0;
181                         } else if (ix >= 0x3e6d3308) {
182                                 y = x - (tc-one);
183                                 i = 1;
184                         } else {
185                                 y = x;
186                                 i = 2;
187                         }
188                 } else {
189                         r = zero;
190                         if (ix >= 0x3fdda618) {  /* [1.7316,2] */
191                                 y = 2.0f - x;
192                                 i = 0;
193                         } else if (ix >= 0x3F9da620) {  /* [1.23,1.73] */
194                                 y = x - tc;
195                                 i = 1;
196                         } else {
197                                 y = x - one;
198                                 i = 2;
199                         }
200                 }
201                 switch(i) {
202                 case 0:
203                         z = y*y;
204                         p1 = a0+z*(a2+z*(a4+z*(a6+z*(a8+z*a10))));
205                         p2 = z*(a1+z*(a3+z*(a5+z*(a7+z*(a9+z*a11)))));
206                         p = y*p1+p2;
207                         r += p - 0.5f*y;
208                         break;
209                 case 1:
210                         z = y*y;
211                         w = z*y;
212                         p1 = t0+w*(t3+w*(t6+w*(t9 +w*t12)));    /* parallel comp */
213                         p2 = t1+w*(t4+w*(t7+w*(t10+w*t13)));
214                         p3 = t2+w*(t5+w*(t8+w*(t11+w*t14)));
215                         p = z*p1-(tt-w*(p2+y*p3));
216                         r += (tf + p);
217                         break;
218                 case 2:
219                         p1 = y*(u0+y*(u1+y*(u2+y*(u3+y*(u4+y*u5)))));
220                         p2 = one+y*(v1+y*(v2+y*(v3+y*(v4+y*v5))));
221                         r += -0.5f*y + p1/p2;
222                 }
223         } else if (ix < 0x41000000) {  /* x < 8.0 */
224                 i = (int)x;
225                 y = x - (float)i;
226                 p = y*(s0+y*(s1+y*(s2+y*(s3+y*(s4+y*(s5+y*s6))))));
227                 q = one+y*(r1+y*(r2+y*(r3+y*(r4+y*(r5+y*r6)))));
228                 r = half*y+p/q;
229                 z = one;    /* lgamma(1+s) = log(s) + lgamma(s) */
230                 switch (i) {
231                 case 7: z *= y + 6.0f;  /* FALLTHRU */
232                 case 6: z *= y + 5.0f;  /* FALLTHRU */
233                 case 5: z *= y + 4.0f;  /* FALLTHRU */
234                 case 4: z *= y + 3.0f;  /* FALLTHRU */
235                 case 3: z *= y + 2.0f;  /* FALLTHRU */
236                         r += logf(z);
237                         break;
238                 }
239         } else if (ix < 0x5c800000) {  /* 8.0 <= x < 2**58 */
240                 t = logf(x);
241                 z = one/x;
242                 y = z*z;
243                 w = w0+z*(w1+y*(w2+y*(w3+y*(w4+y*(w5+y*w6)))));
244                 r = (x-half)*(t-one)+w;
245         } else                         /* 2**58 <= x <= inf */
246                 r =  x*(logf(x)-one);
247         if (hx < 0)
248                 r = nadj - r;
249         return r;
250 }
251
252 weak_alias(__lgammaf_r, lgammaf_r);