6 #if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
7 long double sqrtl(long double x)
11 #elif (LDBL_MANT_DIG == 113 || LDBL_MANT_DIG == 64) && LDBL_MAX_EXP == 16384
12 #include "sqrt_data.h"
14 #define FENV_SUPPORT 1
21 /* top: 16 bit sign+exponent, x: significand. */
22 static inline long double mkldbl(uint64_t top, u128 x)
25 #if LDBL_MANT_DIG == 113
28 u.i2.hi &= 0x0000ffffffffffff;
30 #elif LDBL_MANT_DIG == 64
33 /* force the top bit on non-zero (and non-subnormal) results. */
35 u.i.m |= 0x8000000000000000;
40 /* return: top 16 bit is sign+exp and following bits are the significand. */
41 static inline u128 asu128(long double x)
43 union ldshape u = {.f=x};
45 #if LDBL_MANT_DIG == 113
48 #elif LDBL_MANT_DIG == 64
50 /* ignore the top bit: pseudo numbers are not handled. */
52 r.hi &= 0x0000ffffffffffff;
53 r.hi |= (uint64_t)u.i.se << 48;
58 /* returns a*b*2^-32 - e, with error 0 <= e < 1. */
59 static inline uint32_t mul32(uint32_t a, uint32_t b)
61 return (uint64_t)a*b >> 32;
64 /* returns a*b*2^-64 - e, with error 0 <= e < 3. */
65 static inline uint64_t mul64(uint64_t a, uint64_t b)
68 uint64_t alo = a&0xffffffff;
70 uint64_t blo = b&0xffffffff;
71 return ahi*bhi + (ahi*blo >> 32) + (alo*bhi >> 32);
74 static inline u128 add64(u128 a, uint64_t b)
84 static inline u128 add128(u128 a, u128 b)
94 static inline u128 sub64(u128 a, uint64_t b)
104 static inline u128 sub128(u128 a, u128 b)
114 /* a<<n, 0 <= n <= 127 */
115 static inline u128 lsh(u128 a, int n)
123 a.hi = (a.hi<<n) | (a.lo>>(64-n));
129 /* a>>n, 0 <= n <= 127 */
130 static inline u128 rsh(u128 a, int n)
138 a.lo = (a.lo>>n) | (a.hi<<(64-n));
144 /* returns a*b exactly. */
145 static inline u128 mul64_128(uint64_t a, uint64_t b)
148 uint64_t ahi = a>>32;
149 uint64_t alo = a&0xffffffff;
150 uint64_t bhi = b>>32;
151 uint64_t blo = b&0xffffffff;
152 uint64_t lo1 = ((ahi*blo)&0xffffffff) + ((alo*bhi)&0xffffffff) + (alo*blo>>32);
153 uint64_t lo2 = (alo*blo)&0xffffffff;
154 r.hi = ahi*bhi + (ahi*blo>>32) + (alo*bhi>>32) + (lo1>>32);
155 r.lo = (lo1<<32) + lo2;
159 /* returns a*b*2^-128 - e, with error 0 <= e < 7. */
160 static inline u128 mul128(u128 a, u128 b)
162 u128 hi = mul64_128(a.hi, b.hi);
163 uint64_t m1 = mul64(a.hi, b.lo);
164 uint64_t m2 = mul64(a.lo, b.hi);
165 return add64(add64(hi, m1), m2);
168 /* returns a*b % 2^128. */
169 static inline u128 mul128_tail(u128 a, u128 b)
171 u128 lo = mul64_128(a.lo, b.lo);
172 lo.hi += a.hi*b.lo + a.lo*b.hi;
177 /* see sqrt.c for detailed comments. */
179 long double sqrtl(long double x)
186 if (predict_false(top - 0x0001 >= 0x7fff - 0x0001)) {
187 /* x < 0x1p-16382 or inf or nan. */
188 if (2*ix.hi == 0 && ix.lo == 0)
190 if (ix.hi == 0x7fff000000000000 && ix.lo == 0)
193 return __math_invalidl(x);
194 /* x is subnormal, normalize it. */
195 ix = asu128(x * 0x1p112);
200 /* x = 4^e m; with int e and m in [1, 4) */
203 ml.hi |= 0x8000000000000000;
204 if (even) ml = rsh(ml, 1);
205 top = (top + 0x3fff) >> 1;
208 static const uint64_t three = 0xc0000000;
209 uint64_t r, s, d, u, i;
210 i = (ix.hi >> 42) % 128;
211 r = (uint32_t)__rsqrt_tab[i] << 16;
212 /* |r sqrt(m) - 1| < 0x1p-8 */
213 s = mul32(ml.hi>>32, r);
216 r = mul32(u, r) << 1;
217 /* |r sqrt(m) - 1| < 0x1.7bp-16, switch to 64bit */
222 r = mul64(u, r) << 1;
223 /* |r sqrt(m) - 1| < 0x1.a5p-31 */
224 s = mul64(u, s) << 1;
227 r = mul64(u, r) << 1;
228 /* |r sqrt(m) - 1| < 0x1.c001p-59, switch to 128bit */
230 static const u128 threel = {.hi=three<<32, .lo=0};
236 ul = sub128(threel, dl);
237 sl = mul128(ul, sl); /* repr: 3.125 */
238 /* -0x1p-116 < s - sqrt(m) < 0x3.8001p-125 */
239 sl = rsh(sub64(sl, 4), 125-(LDBL_MANT_DIG-1));
240 /* s < sqrt(m) < s + 1 ULP + tiny */
244 d0 = sub128(lsh(ml, 2*(LDBL_MANT_DIG-1)-126), mul128_tail(sl,sl));
246 d2 = add128(add64(sl, 1), d1);
247 sl = add64(sl, d1.hi >> 63);
250 /* handle rounding modes and inexact exception. */
251 top = predict_false((d2.hi|d2.lo)==0) ? 0 : 1;
252 top |= ((d1.hi^d2.hi)&0x8000000000000000) >> 48;
253 y += mkldbl(top, (u128){0});
258 #error unsupported long double format