diff options
Diffstat (limited to 'src/sqrt.c')
-rw-r--r-- | src/sqrt.c | 559 |
1 files changed, 517 insertions, 42 deletions
diff --git a/src/sqrt.c b/src/sqrt.c index af993fa64..332720e3f 100644 --- a/src/sqrt.c +++ b/src/sqrt.c @@ -20,14 +20,115 @@ along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ +/* +*** Note *** + + The code of mpfr_sqrt1 and/or functions it calls depends on + implementation-defined features of the C standard. See lines with a + cast to mp_limb_signed_t, associated with a shift to the right '>>'. + Such features are known to behave as the code below expects with GCC, + according to the GCC manual (excerpt from the GCC 4.9.3 manual): + + 4 C Implementation-defined behavior + *********************************** + + 4.5 Integers + ============ + + * 'The result of, or the signal raised by, converting an integer to + a signed integer type when the value cannot be represented in an + object of that type (C90 6.2.1.2, C99 and C11 6.3.1.3).' + + For conversion to a type of width N, the value is reduced modulo + 2^N to be within range of the type; no signal is raised. + + * 'The results of some bitwise operations on signed integers (C90 + 6.3, C99 and C11 6.5).' + + Bitwise operators act on the representation of the value including + both the sign and value bits, where the sign bit is considered + immediately above the highest-value value bit. Signed '>>' acts + on negative numbers by sign extension. + + It is not known whether it works with other compilers. Thus this code + is currently enabled only when __GNUC__ is defined (which includes + compilers that declare a compatibility with GCC). A configure test + might be an alternative solution (but without any guarantee, in case + the result may also depend on the context). + + Warning! The right shift of a negative value corresponds to an integer + division by a power of two, with rounding toward negative. + + TODO: Complete the comments when a right shift of a negative value + may be involved, so that the rounding toward negative appears in the + proof. There has been at least an error with a proof of a bound! +*/ + #define MPFR_NEED_LONGLONG_H #include "mpfr-impl.h" +#if !defined(MPFR_GENERIC_ABI) && (GMP_NUMB_BITS == 32 || GMP_NUMB_BITS == 64) + +/* The tables T1[] and T2[] below were generated using the Sage code below, + with T1,T2 = bipartite(4,4,4,12,16). Note: we would get a slightly smaller + error using an approximation of the form T1[a,b] * (1 + T2[a,b]), but this + would make the code more complex, and the approximation would not always fit + on p2 bits (assuming p2 >= p1). +# bi-partite method, using a table T1 of p1 bits, and T2 of p2 bits +# approximate sqrt(a:b:c) with T1[a,b] + T2[a,c] +def bipartite(pa,pb,pc,p1,p2): + T1 = dict() + T2 = dict() + maxerr = maxnum = 0 + for a in range(2^(pa-2),2^pa): + for b in range(2^pb): + A1 = (a*2^pb+b)/2^(pa+pb) + A2 = (a*2^pb+b+1)/2^(pa+pb) + X = (1/sqrt(A1) + 1/sqrt(A2))/2 + X = round(X*2^p1)/2^p1 + T1[a,b] = X*2^p1 + maxnum = max(maxnum, abs(T1[a,b])) + # print "a=", a, "b=", b, "T1=", x + maxerr = max(maxerr, n(abs(1/sqrt(A1) - X))) + maxerr = max(maxerr, n(abs(1/sqrt(A2) - X))) + print "maxerr1 =", maxerr, log(maxerr)/log(2.0), "maxnum=", maxnum + maxerr = maxnum = 0 + for a in range(2^(pa-2),2^pa): + for c in range(2^pc): + Xmin = infinity + Xmax = -infinity + for b in range(2^pb): + A = (a*2^(pb+pc)+b*2^pc+c)/2^(pa+pb+pc) + X = 1/sqrt(A) - T1[a,b]/2^p1 + X = round(X*2^p2)/2^p2 + Xmin = min (Xmin, X) + Xmax = max (Xmax, X) + A = (a*2^(pb+pc)+b*2^pc+c+1)/2^(pa+pb+pc) + X = 1/sqrt(A) - T1[a,b]/2^p1 + X = round(X*2^p2)/2^p2 + Xmin = min (Xmin, X) + Xmax = max (Xmax, X) + T2[a,c] = round((Xmin + Xmax)/2*2^p2) + maxnum = max(maxnum, abs(T2[a,c])) + # print "a=", a, "c=", c, "T2=", T2[a,c] + for b in range(2^pb): + A = (a*2^(pb+pc)+b*2^pc+c)/2^(pa+pb+pc) + X = 1/sqrt(A) + maxerr = max(maxerr, n(abs(X - (T1[a,b]/2^p1 + T2[a,c]/2^p2)))) + A = (a*2^(pb+pc)+b*2^pc+c+1)/2^(pa+pb+pc) + X = 1/sqrt(A) + maxerr = max(maxerr, n(abs(X - (T1[a,b]/2^p1 + T2[a,c]/2^p2)))) + print "maxerr2 =", maxerr, log(maxerr)/log(2.0), "maxnum=", maxnum + return [T1[a,b] for a in range(2^(pa-2),2^pa) for b in range(2^pb)], \ + [T2[a,c] for a in range(2^(pa-2),2^pa) for c in range(2^pc)] +*/ + static const unsigned short T1[] = {8160, 8098, 8037, 7977, 7919, 7861, 7805, 7751, 7697, 7644, 7593, 7542, 7493, 7445, 7397, 7350, 7304, 7260, 7215, 7172, 7129, 7088, 7047, 7006, 6966, 6927, 6889, 6851, 6814, 6778, 6742, 6706, 6671, 6637, 6603, 6570, 6537, 6505, 6473, 6442, 6411, 6381, 6351, 6321, 6292, 6263, 6235, 6206, 6179, 6152, 6125, 6098, 6072, 6046, 6020, 5995, 5970, 5946, 5921, 5897, 5874, 5850, 5827, 5804, 5781, 5759, 5737, 5715, 5693, 5672, 5651, 5630, 5609, 5589, 5569, 5549, 5529, 5509, 5490, 5471, 5452, 5433, 5415, 5396, 5378, 5360, 5342, 5324, 5307, 5290, 5273, 5256, 5239, 5222, 5206, 5189, 5173, 5157, 5141, 5125, 5110, 5094, 5079, 5064, 5049, 5034, 5019, 5004, 4990, 4975, 4961, 4947, 4933, 4919, 4905, 4892, 4878, 4865, 4851, 4838, 4825, 4812, 4799, 4786, 4773, 4761, 4748, 4736, 4724, 4711, 4699, 4687, 4675, 4663, 4652, 4640, 4628, 4617, 4605, 4594, 4583, 4572, 4561, 4550, 4539, 4528, 4517, 4506, 4496, 4485, 4475, 4464, 4454, 4444, 4434, 4423, 4413, 4403, 4394, 4384, 4374, 4364, 4355, 4345, 4335, 4326, 4317, 4307, 4298, 4289, 4280, 4271, 4262, 4253, 4244, 4235, 4226, 4217, 4208, 4200, 4191, 4183, 4174, 4166, 4157, 4149, 4141, 4132, 4124, 4116, 4108, 4100}; static const short T2[] = {420, 364, 308, 252, 196, 140, 84, 28, -31, -87, -141, -194, -248, -302, -356, -410, 307, 267, 226, 185, 145, 104, 63, 21, -24, -65, -105, -145, -185, -224, -263, -303, 237, 205, 174, 142, 111, 79, 48, 16, -16, -45, -75, -105, -136, -167, -198, -229, 187, 161, 136, 110, 85, 61, 36, 12, -15, -41, -67, -92, -117, -142, -167, -192, 159, 138, 117, 96, 76, 54, 33, 12, -10, -30, -51, -71, -92, -113, -134, -154, 130, 112, 95, 77, 60, 43, 26, 9, -10, -28, -46, -64, -81, -98, -116, -133, 115, 100, 85, 70, 54, 39, 23, 8, -7, -22, -37, -52, -66, -82, -96, -111, 99, 86, 73, 60, 46, 32, 19, 6, -8, -21, -34, -47, -60, -73, -86, -100, 86, 75, 63, 52, 40, 28, 17, 5, -7, -19, -31, -43, -54, -66, -78, -90, 77, 66, 56, 46, 35, 25, 16, 6, -5, -15, -26, -36, -47, -57, -67, -77, 70, 60, 51, 42, 33, 23, 14, 5, -5, -14, -24, -33, -43, -52, -62, -72, 65, 57, 49, 40, 31, 23, 14, 6, -3, -12, -20, -28, -37, -45, -54, -62}; -#if GMP_NUMB_BITS == 64 +/* return x0 and write rp[0] such that a0 = x0^2 + rp[0] + with x0^2 <= a0 < (x0+1)^2 */ static mp_limb_t mpn_sqrtrem1 (mpfr_limb_ptr rp, mp_limb_t a0) { @@ -36,21 +137,41 @@ mpn_sqrtrem1 (mpfr_limb_ptr rp, mp_limb_t a0) mp_limb_t c = (a0 >> (GMP_NUMB_BITS - 12)) & 0xf; mp_limb_t x0, a1, t, y, x2; - x0 = (T1[(a-4)*16+b] << 4) + T2[(a-4)*16+c]; - /* now x0 is a 16-bit approximation, with maximal error 2^(-9.46) */ - + x0 = ((mp_limb_t) T1[(a-4)*16+b] << 4) + T2[(a-4)*16+c]; + /* now x0/2^16 is a (1+16)-bit approximation of 2^6/sqrt(a*2^8+b*2^4+c), + thus of 2^(GMP_NUMB_BITS/2)/sqrt(a0), with maximal error 2^(-9.46) */ + +#if GMP_NUMB_BITS == 32 + x0 -= 89; /* x0 -= 93 ensures that x0/2^16 <= 2^16/sqrt(a0) (proof by + exhaustive search), which ensures that t = a0-y^2 >= below, and + thus that the truncated Karp-Markstein trick gives x0 <= sqrt(a0 + at the end. However (still by exhaustive search) x0 -= 89 is + enough to guarantee x0^2 <= a0 at the end, and at most one + correction is needed. With x0 -= 89 the probability of correction + is 0.097802, with x0 -= 93 it is 0.106486. */ + a1 = a0 >> (GMP_NUMB_BITS - 16); /* a1 has 16 bits */ + y = (a1 * (x0 >> 1)) >> 15; /* y is near 2^32/x0, with 16 bits, and should be + an approximation of sqrt(a0) */ + /* |a0 - y^2| <= 13697110 < 2^24 (by exhaustive search) */ + /* a0 >= y^2 (proof by exhaustive search) */ + t = (a0 - y * y) >> 8; + /* t/2^24 approximates a0/2^32 - (y/2^16)^2, with |t| < 2^16 */ + /* x0*t/2^41 approximates (x0/2^16)/2*(a0/2^32 - (y/2^16)^2) */ + /* x0 * t < 2^32 (proof by exhaustive search) */ + x0 = y + ((x0 * t) >> 25); +#else /* GMP_NUMB_BITS = 64 */ a1 = a0 >> (GMP_NUMB_BITS - 32); /* a1 has 32 bits, thus a1*x0^2 has 64 bits */ - t = (mp_limb_signed_t) (a1 * (x0 * x0)) >> 32; - /* t has 32 bits now */ - x0 = (x0 << 15) - ((mp_limb_signed_t) (x0 * t) >> (17+1)); + /* a1*x^0 might exceed 2^64, but we are only interested in + a1*x^0 - 2^64, which is small */ + t = (mp_limb_signed_t) (a1 * (x0 * x0)) >> 9; + /* |t| < 2^46 (proof by exhaustive search on all possible values of a1, + since x0 depends on a1 only) */ + x0 = (x0 << 16) - ((mp_limb_signed_t) (x0 * t) >> (39+1)) - 1; - /* now x0 is a 31-bit approximation (32 bits, 1 <= x0/2^31 <= 2), - with maximal error 2^(-19.19). More precisely by exhaustive search - on all 32-bit values of a1: - -1.665416e-06 <= x0/2^31 - 1/sqrt(a1/2^32) <= 6.959122e-10, - with the left worst case for a1=1326448622, x0=3864240766, - and the right worst case for a1=1081715548, x0=4279108124. */ + /* now x0 is a (1+32)-bit approximation such that (by exhaustive search on all + 32-bit values of a1): + -1.67e-06 <= x0/2^32 - 2^16/sqrt(a1) <= 0 */ /* we now use Karp-Markstein's trick to get a 32-bit approximation of the square root of a0: @@ -58,16 +179,23 @@ mpn_sqrtrem1 (mpfr_limb_ptr rp, mp_limb_t a0) t = a - y^2 [target accuracy, high 19 bits are zero] y = y + x0/2 * t [target accuracy] */ - /* a1*x0^2 is near 2^94, thus a1*x0 is near 2^94/x0 */ - y = (a1 * x0) >> 31; /* y is near 2^63/x0, with 32 bits */ - t = (mp_limb_signed_t) (a0 - y * y) >> 14; - /* t should have at most 64-14-19=31 significant bits */ - x0 = y + ((mp_limb_signed_t) (x0 * t) >> (49+1)); + /* a1*x0^2 is near 2^96, thus a1*x0 is near 2^96/x0, thus near from + 2^48*sqrt(a1), thus near from 2^32*sqrt(a0) */ + y = (a1 * x0) >> 32; /* y is near sqrt(a0), with 32 bits */ + /* now a0 >= y^2 */ + t = (a0 - y * y) >> 13; + /* t < 2^31 (by exhaustive search on all possible values of a1, with + a0 = 2^32*a1+(2^32-1) */ + /* since x0 < 2^33 and t < 2^31, x0*t does not overflow */ + x0 = y + ((x0 * t) >> (64-13+1)); +#endif - /* x0 is now a 32-bit approximation of sqrt(a0) */ + /* x0 is now a (GMP_NUMB_BITS/2)-bit approximation of sqrt(a0), + with x0 <= sqrt(a0) */ x2 = x0 * x0; - while (x2 + 2*x0 < a0) + if (x2 + 2*x0 < a0) /* x0 is too small: probability of correction is 0.097802 + for GMP_NUMB_BITS=32, 0.000017 for GMP_NUMB_BITS=64 */ { x2 += 2*x0 + 1; x0++; @@ -77,10 +205,11 @@ mpn_sqrtrem1 (mpfr_limb_ptr rp, mp_limb_t a0) return x0; } -/* Return a (1+40)-bit approximation x0 of 2^72/sqrt(a0). - Assume a0 >= 2^(GMP_NUMB_BITS-2), thus x0 has 41 bits. - Assume GMP_NUMB_BITS=64. -*/ +/* For GMP_NUMB_BITS=32: return a (1+20)-bit approximation x0 of 2^36/sqrt(a0). + For GMP_NUMB_BITS=64: return a (1+40)-bit approximation x0 of 2^72/sqrt(a0). + Assume a0 >= 2^(GMP_NUMB_BITS-2), and GMP_NUMB_BITS = 32 or 64. + Must ensure: x0 <= 2^36/sqrt(a0) for GMP_NUMB_BITS=32, + x0 <= 2^72/sqrt(a0) for GMP_NUMB_BITS=64. */ static mp_limb_t mpn_rsqrtrem1 (mp_limb_t a0) { @@ -89,12 +218,31 @@ mpn_rsqrtrem1 (mp_limb_t a0) mp_limb_t c = (a0 >> (GMP_NUMB_BITS - 12)) & 0xf; mp_limb_t x0, a1, t; - MPFR_STAT_STATIC_ASSERT (GMP_NUMB_BITS == 64); + MPFR_STAT_STATIC_ASSERT (GMP_NUMB_BITS == 32 || GMP_NUMB_BITS == 64); - x0 = (T1[(a-4)*16+b] << 4) + T2[(a-4)*16+c]; + x0 = ((mp_limb_t) T1[(a-4)*16+b] << 4) + T2[(a-4)*16+c]; /* now x0 is a 16-bit approximation, with maximal error 2^(-9.46): -2^(-9.46) <= x0/2^16 - 1/sqrt(a/2^4) <= 2^(-9.46) */ +#if GMP_NUMB_BITS == 32 + x0 >>= 1; /* reduce approximation to 1+15 bits */ + /* In principle, a0>>10 can have up to 22 bits, and (x0^2)>>12 can have up to + 20 bits, thus the product can have up to 42 bits, but since a0*x0^2 is very + near 2^62, thus (a0>>10)*(x0^2)>>12 is very near 2^40, and when we reduce + it mod 2^32 and interpret as a signed number in [-2^31, 2^31-1], we get + the correct remainder (a0>>10)*(x0^2)>>12 - 2^40 */ + t = (mp_limb_signed_t) ((a0 >> 10) * ((x0 * x0) >> 12)) >> 8; + /* |t| < 6742843 <= 2^23 - 256 (by exhaustive search) */ + t = (mp_limb_signed_t) t >> 8; /* now |t| < 2^15, thus |x0*t| < 2^31 */ + x0 = (x0 << 5) - ((mp_limb_signed_t) (x0 * t) >> 20); + + /* by exhaustive search on all possible values of a0, we get: + -1.61 <= x0 - 2^36/sqrt(a0) <= 3.11 thus + -2^(-19.3) < -1.54e-6 <= x0/2^20 - 2^16/sqrt(a0) <= 2.97e-6 < 2^(-18.3) + */ + + return x0 - 4; /* ensures x0 <= 2^36/sqrt(a0) */ +#else /* GMP_NUMB_BITS = 64 */ a1 = a0 >> (GMP_NUMB_BITS - 32); /* a1 has 32 bits, thus a1*x0^2 has 64 bits */ t = (mp_limb_signed_t) (-a1 * (x0 * x0)) >> 32; @@ -112,13 +260,21 @@ mpn_rsqrtrem1 (mp_limb_t a0) /* now x0 is a 1+40-bit approximation, more precisely we have (experimentally): - -3.157551e-12 <= x0/2^40 - 2^32/sqrt(a0) <= 3.834848e-12 + -2^(-38.2) < -3.16e-12 <= x0/2^40 - 2^32/sqrt(a0) <= 3.84e-12 < 2^(-37.9) */ - - return x0; + return x0 - 5; /* ensures x0 <= 2^72/sqrt(a0) */ +#endif } -/* Comments below are for GMP_NUMB_BITS=64 for simplicity, but the code is valid +/* Given as input np[0] and np[1], with B/4 <= np[1] (where B = 2^GMP_NUMB_BITS), + mpn_sqrtrem2 returns a value x, 0 <= x <= 1, and stores values s in sp[0] and + r in rp[0] such that: + + n := np[1]*B + np[0] = s^2 + x*B + r, with n < (s+1)^2 + + or equivalently x*B + r <= 2*s. + + This comment is for GMP_NUMB_BITS=64 for simplicity, but the code is valid for any even value of GMP_NUMB_BITS. The algorithm used is the following, and uses Karp-Markstein's trick: - start from x, a 33-bit approximation of 2^64/sqrt(n1), with x <= 2^64/sqrt(n1) @@ -126,6 +282,7 @@ mpn_rsqrtrem1 (mp_limb_t a0) - t = n1 - y^2 - u = (x * t) >> 33 - y = (y << 32) + u + Proof: * we know that Newton's iteration for the reciprocal square root, @@ -159,7 +316,32 @@ mpn_sqrtrem2 (mpfr_limb_ptr sp, mpfr_limb_ptr rp, mpfr_limb_srcptr np) mp_limb_t x, y, t, high, low; x = mpn_rsqrtrem1 (np[1]); - /* x is an approximation of 2^72/sqrt(n1), x has 41 bits */ + /* we must have x^2*n1 <= 2^72 for GMP_NUMB_BITS=32 + <= 2^144 for GMP_NUMB_BITS=64 */ + +#if GMP_NUMB_BITS == 32 + MPFR_ASSERTD ((double) x * (double) x * (double) np[1] + < 4722366482869645213696.0); + /* x is an approximation of 2^36/sqrt(n1), x has 1+20 bits */ + + /* We know x/2^20 <= 2^16/sqrt(n1) + 2^(-18) + thus n1*x/2^36 <= sqrt(n1) + 2^(-18)*n1/2^16 <= sqrt(n1) + 2^(-2). */ + + /* compute y = floor(np[1]*x/2^36), cutting the upper 24 bits of n1 in two + parts of 12 and 11 bits, which can be multiplied by x without overflow + (warning: if we take 12 bits from low, it might overflow with x */ + high = np[1] >> 20; /* upper 12 bits from n1 */ + MPFR_ASSERTD((double) high * (double) x < 4294967296.0); + low = (np[1] >> 9) & 0x7ff; /* next 11 bits */ + MPFR_ASSERTD((double) low * (double) x < 4294967296.0); + y = high * x + ((low * x) >> 11); /* y approximates n1*x/2^20 */ + y = (y - 0x4000) >> 16; /* the constant 0x4000 = 2^(36-2-20) takes + into account the 2^(-2) error above, to ensure + y <= sqrt(n1) */ +#else /* GMP_NUMB_BITS = 64 */ + MPFR_ASSERTD ((double) x * (double) x * (double) np[1] + < 2.2300745198530623e43); + /* x is an approximation of 2^72/sqrt(n1), x has 1+40 bits */ /* We know x/2^40 <= 2^32/sqrt(n1) + 3.9e-12 <= 2^32/sqrt(n1) + 2^(-37) thus n1*x/2^72 <= sqrt(n1) + 2^(-37)*n1/2^32 <= sqrt(n1) + 2^(-5). */ @@ -172,16 +354,26 @@ mpn_sqrtrem2 (mpfr_limb_ptr sp, mpfr_limb_ptr rp, mpfr_limb_srcptr np) low = (np[1] >> 17) & 0x7fffff; /* next 23 bits */ MPFR_ASSERTD((double) low * (double) x < 18446744073709551616.0); y = high * x + ((low * x) >> 23); /* y approximates n1*x/2^40 */ - /* y-1 takes into account the 2^(-31) error term, to ensure y <= sqrt(n1) - after the right shift below */ y = (y - 0x8000000) >> 32; /* the constant 0x8000000 = 2^(72-5-40) takes into account the 2^(-5) error above, to ensure y <= sqrt(n1) */ +#endif /* y is an approximation of sqrt(n1), with y <= sqrt(n1) */ t = np[1] - y * y; MPFR_ASSERTD((mp_limb_signed_t) t >= 0); +#if GMP_NUMB_BITS == 32 + /* we now compute t = (x * t) >> (20 + 1), but x * t might have + more than 32 bits thus we cut t in two parts of 12 and 11 bits */ + high = t >> 11; + low = t & 0x7ff; + MPFR_ASSERTD((double) high * (double) x < 4294967296.0); + MPFR_ASSERTD((double) low * (double) x < 4294967296.0); + t = high * x + ((low * x) >> 11); /* approximates t*x/2^11 */ + + y = (y << (GMP_NUMB_BITS / 2)) + (t >> 10); +#else /* we now compute t = (x * t) >> (40 + 1), but x * t might have more than 64 bits thus we cut t in two parts of 24 and 23 bits */ high = t >> 23; @@ -191,8 +383,11 @@ mpn_sqrtrem2 (mpfr_limb_ptr sp, mpfr_limb_ptr rp, mpfr_limb_srcptr np) t = high * x + ((low * x) >> 23); /* approximates t*x/2^23 */ y = (y << (GMP_NUMB_BITS / 2)) + (t >> 18); - if (y < (1UL << 63)) - y = 1UL << 63; /* the correction code below assumes y >= 2^63 */ +#endif + + /* the correction code below assumes y >= 2^(GMP_NUMB_BITS - 1) */ + if (y < (MPFR_LIMB_ONE << (GMP_NUMB_BITS - 1))) + y = MPFR_LIMB_ONE << (GMP_NUMB_BITS - 1); umul_ppmm (x, t, y, y); MPFR_ASSERTD(x < np[1] || (x == np[1] && t <= np[0])); /* y should not be too large */ @@ -206,8 +401,11 @@ mpn_sqrtrem2 (mpfr_limb_ptr sp, mpfr_limb_ptr rp, mpfr_limb_srcptr np) x -= 1 + (t < (2 * y + 1)); t -= 2 * y + 1; y ++; - /* maximal number of loops observed is 3, for n1=4651405965214438987, - n0=18443926066120424952, average is 0.576 */ + /* GMP_NUMB_BITS=32: average number of loops observed is 0.982, + max is 3 (for n1=1277869843, n0=3530774227); + GMP_NUMB_BITS=64: average number of loops observed is 0.593, + max is 3 (for n1=4651405965214438987, n0=18443926066120424952). + */ } sp[0] = y; @@ -225,12 +423,13 @@ mpfr_sqrt1 (mpfr_ptr r, mpfr_srcptr u, mpfr_rnd_t rnd_mode) mpfr_limb_ptr rp = MPFR_MANT(r); u0 = MPFR_MANT(u)[0]; - if (exp_u & 1) + if (((unsigned int) exp_u & 1) != 0) { u0 >>= 1; exp_u ++; } - exp_r = exp_u >> 1; + MPFR_ASSERTD (((unsigned int) exp_u & 1) == 0); + exp_r = exp_u / 2; if (p < GMP_NUMB_BITS / 2) r0 = mpn_sqrtrem1 (&sb, u0) << (GMP_NUMB_BITS / 2); @@ -309,7 +508,275 @@ mpfr_sqrt1 (mpfr_ptr r, mpfr_srcptr u, mpfr_rnd_t rnd_mode) MPFR_RET(1); } } -#endif + +#if GMP_NUMB_BITS == 64 +/* For GMP_NUMB_BITS=64: return a (1+80)-bit approximation x = xp[1]*B+xp[0] + of 2^144/sqrt(ap[1]*B+ap[0]). + Assume ap[1] >= B/4, thus sqrt(ap[1]*B+ap[0]) >= B/2, thus x <= 2^81. */ +static void +mpn_rsqrtrem2 (mpfr_limb_ptr xp, mpfr_limb_srcptr ap) +{ + mp_limb_t t1, t0, u1, u0, r0, r1, r2; + + MPFR_STAT_STATIC_ASSERT (GMP_NUMB_BITS == 64); + + xp[1] = mpn_rsqrtrem1 (ap[1]); + + /* now we should compute x + (x/2) * (1 - a*x^2), where the upper ~40 bits of + a*x^2 should cancel with 1, thus we only need the following 40 bits */ + + /* xp[1] has 1+40 bits, with xp[1] <= 2^72/sqrt(ap[1]) */ + umul_ppmm (t1, t0, xp[1], xp[1]); + + /* now t1 has at most 18 bits, with least 16 bits being its fractional value */ + + u1 = ap[1] >> 48; + u0 = (ap[1] << 16) | (ap[0] >> 48); + + /* u1 has the 16 most significant bits of a, and u0 the next 64 bits */ + + /* we want t1*u1 << 48 + (t1*u0+t0*u1) >> 16 + t0*u0 >> 80 + [32 bits] [64 bits] [48 bits] + but since we know the upper ~40 bits cancel with 1, we can ignore t1*u1. */ + umul_ppmm (r2, r1, t0, u0); + r0 = t1 * u0; /* we ignore the upper 16 bits of the product */ + r1 = t0 * u1; /* we ignore the upper 16 bits of the product */ + r0 = r0 + r1 + r2; + + /* the upper ~8 bits of r0 should cancel with 1, we are interested in the next + 40 bits */ + + umul_ppmm (t1, t0, xp[1], -r0); + + /* we should now add t1 >> 33 at xp[1] */ + xp[1] += t1 >> 33; + xp[0] = t1 << 31; + /* shift by 24 bits to the right, since xp[1] has 24 leading zeros, + and we now expect 48 */ + xp[0] = (xp[1] << 40) | (xp[0] >> 24); + xp[1] = xp[1] >> 24; +} + +/* Given as input ap[0-3], with B/4 <= ap[3] (where B = 2^GMP_NUMB_BITS), + mpn_sqrtrem4 returns a value x, 0 <= x <= 1, and stores values s in sp[0-1] and + r in rp[0-1] such that: + + n := ap[3]*B^3 + ap[2]*B^2 + ap[1]*B + ap[0] = s^2 + x*B + r, with n < (s+1)^2 + + or equivalently x*B + r <= 2*s. + + This code currently assumes GMP_NUMB_BITS = 64. */ +static mp_limb_t +mpn_sqrtrem4 (mpfr_limb_ptr sp, mpfr_limb_ptr rp, mpfr_limb_srcptr ap) +{ + mp_limb_t x[2], t1, t0, r2, r1, h, l, u2, u1, b[4]; + + MPFR_STAT_STATIC_ASSERT(GMP_NUMB_BITS == 64); + + mpn_rsqrtrem2 (x, ap + 2); + + /* x[1]*B+x[0] is a 80-bit approximation of 2^144/sqrt(ap[3]*B+ap[2]), + and should be smaller */ + + /* first compute y0 = a*x with at least 80 bits of precision */ + + t1 = ap[3] >> 48; + t0 = (ap[3] << 16) | (ap[2] >> 48); + + /* now t1:t0 is a (16+64)-bit approximation of a, + (x1*B+x0) * (t1*B+t0) = (x1*t1)*B^2 + (x1*t0+x0*t1)*B + x0*t0 */ + r2 = x[1] * t1; /* r2 has 32 bits */ + umul_ppmm (h, r1, x[1], t0); + r2 += h; + umul_ppmm (h, l, x[0], t1); + r1 += l; + r2 += h + (r1 < l); + umul_ppmm (h, l, x[0], t0); + r1 += h; + r2 += (r1 < h); + + /* r2 has 32 bits, r1 has 64 bits, thus we have 96 bits in total, we put 64 + bits in r2 and 16 bits in r1 */ + r2 = (r2 << 32) | (r1 >> 32); + r1 = (r1 << 32) >> 48; + + /* we consider y0 = r2*2^16 + r1, which has 80 bits, and should be smaller than + 2^16*sqrt(ap[3]*B+ap[2]) */ + + /* Now compute y0 + (x/2)*(a - y0^2), which should give ~160 correct bits. + Since a - y0^2 has its ~80 most significant bits that cancel, it remains + only ~48 bits. */ + + /* now r2:r1 represents y0, with r2 of 64 bits and r1 of 16 bits, + and we compute y0^2, whose upper ~80 bits should cancel with a: + y0^2 = r2^2*2^32 + 2*r2*r1*2^16 + r1^2. */ + t1 = r2 * r2; /* we can simply ignore the upper 64 bits of r2^2 */ + umul_ppmm (h, l, r2, r1); + t0 = l << 49; /* takes into account the factor 2 in 2*r2*r1 */ + u1 = (r1 * r1) << 32; /* temporary variable */ + t0 += u1; + t1 += ((h << 49) | (l >> 15)) + (t0 < u1); /* takes into account the factor 2 */ + + /* now t1:t0 >> 32 equals y0^2 mod 2^96, since y0 has 160 bits, we should shift + t1:t0 by 64 bits to the right */ + t0 = ap[2] - t1 - (t0 != 0); /* we round downwards to get a lower approximation + of sqrt(a) at the end */ + + /* now t0 equals ap[3]*B+ap[2] - ceil(y0^2/2^32) */ + + umul_ppmm (u2, u1, x[1], t0); + umul_ppmm (h, l, x[0], t0); + u1 += h; + u2 += (u1 < h); + + /* divide by 2 to take into account the factor 1/2 in (x/2)*(a - y0^2) */ + u1 = (u2 << 63) | (u1 >> 1); + u2 = u2 >> 1; + + /* u2:u1 approximates (x/2)*(ap[3]*B+ap[2] - y0^2/2^32) / 2^64, + and should be smaller */ + + r1 <<= 48; /* put back the most significant bits of r1 in place */ + + /* add u2:u1 >> 16 to y0 */ + sp[0] = r1 + ((u2 << 48) | (u1 >> 16)); + sp[1] = r2 + (u2 >> 16) + (sp[0] < r1); + + mpn_mul_n (b, sp, sp, 2); + b[2] = ap[2] - b[2] - mpn_sub_n (rp, ap, b, 2); + + /* invariant: the remainder {ap, 4} - {sp, 2}^2 is b[2]*B^2 + {rp, 2} */ + + t0 = mpn_lshift (b, sp, 2, 1); + + /* Warning: the initial {sp, 2} might be < 2^127, thus t0 might be 0. */ + + /* invariant: 2*{sp,2} = t0*B + {b, 2} */ + + /* While the remainder is greater than 2*s we should subtract 2*s+1 to the + remainder, and add 1 to the square root. This loop seems to be executed + at most twice. */ + while (b[2] > t0 || (b[2] == t0 && + (rp[1] > b[1] || (rp[1] == b[1] && rp[0] > b[0])))) + { + /* subtract 2*s to b[2]*B^2 + {rp, 2} */ + b[2] -= t0 + mpn_sub_n (rp, rp, b, 2); + /* subtract 1 to b[2]*B^2 + {rp, 2}: b[2] -= mpn_sub_1 (rp, rp, 2, 1) */ + if (rp[0]-- == 0) + b[2] -= (rp[1]-- == 0); + /* add 1 to s */ + mpn_add_1 (sp, sp, 2, 1); + /* add 2 to t0*B + {b, 2}: t0 += mpn_add_1 (b, b, 2, 2) */ + b[0] += 2; + if (b[0] < 2) + t0 += (b[1]++ == 0); + } + + return b[2]; +} + +/* Special code for GMP_NUMB_BITS < prec(r) < 2*GMP_NUMB_BITS, + and GMP_NUMB_BITS < prec(u) <= 2*GMP_NUMB_BITS. + This code should work for any value of GMP_NUMB_BITS, but since mpn_sqrtrem4 + currently assumes GMP_NUMB_BITS=64, it only works for GMP_NUMB_BITS=64. */ +static int +mpfr_sqrt2 (mpfr_ptr r, mpfr_srcptr u, mpfr_rnd_t rnd_mode) +{ + mpfr_prec_t p = MPFR_GET_PREC(r); + mpfr_limb_ptr up = MPFR_MANT(u), rp = MPFR_MANT(r); + mp_limb_t np[4], tp[2], rb, sb, mask; + mpfr_prec_t exp_u = MPFR_EXP(u), exp_r, sh = 2 * GMP_NUMB_BITS - p; + + if (((unsigned int) exp_u & 1) != 0) + { + np[3] = up[1] >> 1; + np[2] = (up[1] << (GMP_NUMB_BITS-1)) | (up[0] >> 1); + exp_u ++; + } + else + { + np[3] = up[1]; + np[2] = up[0]; + } + MPFR_ASSERTD (((unsigned int) exp_u & 1) == 0); + exp_r = exp_u / 2; + + np[1] = np[0] = 0; + sb = mpn_sqrtrem4 (rp, tp, np); + sb |= tp[0] | tp[1]; + rb = rp[0] & (MPFR_LIMB_ONE << (sh - 1)); + mask = MPFR_LIMB_MASK(sh); + sb |= (rp[0] & mask) ^ rb; + rp[0] = rp[0] & ~mask; + + /* rounding */ + if (exp_r > __gmpfr_emax) + return mpfr_overflow (r, rnd_mode, 1); + + /* See comments in mpfr_divsp1 */ + if (exp_r < __gmpfr_emin) + { + if (rnd_mode == MPFR_RNDN) + { + if ((exp_r == __gmpfr_emin - 1) && (rp[1] = ~MPFR_LIMB_ZERO && + rp[0] == ~mask) && rb) + goto rounding; /* no underflow */ + if (exp_r < __gmpfr_emin - 1 || (rp[1] == MPFR_LIMB_HIGHBIT && + rp[0] == MPFR_LIMB_ZERO && sb == 0)) + rnd_mode = MPFR_RNDZ; + } + else if (!MPFR_IS_LIKE_RNDZ(rnd_mode, 0)) + { + if ((exp_r == __gmpfr_emin - 1) && (rp[1] = ~MPFR_LIMB_ZERO && + rp[0] == ~mask) && (rb | sb)) + goto rounding; /* no underflow */ + } + return mpfr_underflow (r, rnd_mode, 1); + } + + rounding: + MPFR_EXP (r) = exp_r; + if (rb == 0 && sb == 0) + { + MPFR_ASSERTD(exp_r >= __gmpfr_emin); + MPFR_ASSERTD(exp_r <= __gmpfr_emax); + return 0; /* idem than MPFR_RET(0) but faster */ + } + else if (rnd_mode == MPFR_RNDN) + { + if (rb == 0 || (rb && sb == 0 && + (rp[0] & (MPFR_LIMB_ONE << sh)) == 0)) + goto truncate; + else + goto add_one_ulp; + } + else if (MPFR_IS_LIKE_RNDZ(rnd_mode, 0)) + { + truncate: + MPFR_ASSERTD(exp_r >= __gmpfr_emin); + MPFR_ASSERTD(exp_r <= __gmpfr_emax); + MPFR_RET(-1); + } + else /* round away from zero */ + { + add_one_ulp: + rp[0] += MPFR_LIMB_ONE << sh; + rp[1] += rp[0] == 0; + if (rp[1] == 0) + { + rp[1] = MPFR_LIMB_HIGHBIT; + if (MPFR_UNLIKELY(exp_r + 1 > __gmpfr_emax)) + return mpfr_overflow (r, rnd_mode, 1); + MPFR_ASSERTD(exp_r + 1 <= __gmpfr_emax); + MPFR_ASSERTD(exp_r + 1 >= __gmpfr_emin); + MPFR_SET_EXP (r, exp_r + 1); + } + MPFR_RET(1); + } +} +#endif /* GMP_NUMB_BITS == 64 */ + +#endif /* !defined(MPFR_GENERIC_ABI) && (GMP_NUMB_BITS == 32 || GMP_NUMB_BITS == 64) */ int mpfr_sqrt (mpfr_ptr r, mpfr_srcptr u, mpfr_rnd_t rnd_mode) @@ -372,10 +839,18 @@ mpfr_sqrt (mpfr_ptr r, mpfr_srcptr u, mpfr_rnd_t rnd_mode) } MPFR_SET_POS(r); -#if GMP_NUMB_BITS == 64 + /* See the note at the beginning of this file about __GNUC__. */ +#if !defined(MPFR_GENERIC_ABI) && defined(__GNUC__) && \ + (GMP_NUMB_BITS == 32 || GMP_NUMB_BITS == 64) if (MPFR_GET_PREC (r) < GMP_NUMB_BITS && MPFR_GET_PREC (u) < GMP_NUMB_BITS) return mpfr_sqrt1 (r, u, rnd_mode); -#endif +#endif + +#if !defined(MPFR_GENERIC_ABI) && GMP_NUMB_BITS == 64 + if (GMP_NUMB_BITS < MPFR_GET_PREC (r) && MPFR_GET_PREC (r) < 2*GMP_NUMB_BITS + && MPFR_LIMB_SIZE(u) == 2) + return mpfr_sqrt2 (r, u, rnd_mode); +#endif MPFR_TMP_MARK (marker); MPFR_UNSIGNED_MINUS_MODULO (sh, MPFR_GET_PREC (r)); |