summaryrefslogtreecommitdiff
path: root/src/sqrt.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/sqrt.c')
-rw-r--r--src/sqrt.c559
1 files changed, 517 insertions, 42 deletions
diff --git a/src/sqrt.c b/src/sqrt.c
index af993fa64..332720e3f 100644
--- a/src/sqrt.c
+++ b/src/sqrt.c
@@ -20,14 +20,115 @@ along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see
http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
+/*
+*** Note ***
+
+ The code of mpfr_sqrt1 and/or functions it calls depends on
+ implementation-defined features of the C standard. See lines with a
+ cast to mp_limb_signed_t, associated with a shift to the right '>>'.
+ Such features are known to behave as the code below expects with GCC,
+ according to the GCC manual (excerpt from the GCC 4.9.3 manual):
+
+ 4 C Implementation-defined behavior
+ ***********************************
+
+ 4.5 Integers
+ ============
+
+ * 'The result of, or the signal raised by, converting an integer to
+ a signed integer type when the value cannot be represented in an
+ object of that type (C90 6.2.1.2, C99 and C11 6.3.1.3).'
+
+ For conversion to a type of width N, the value is reduced modulo
+ 2^N to be within range of the type; no signal is raised.
+
+ * 'The results of some bitwise operations on signed integers (C90
+ 6.3, C99 and C11 6.5).'
+
+ Bitwise operators act on the representation of the value including
+ both the sign and value bits, where the sign bit is considered
+ immediately above the highest-value value bit. Signed '>>' acts
+ on negative numbers by sign extension.
+
+ It is not known whether it works with other compilers. Thus this code
+ is currently enabled only when __GNUC__ is defined (which includes
+ compilers that declare a compatibility with GCC). A configure test
+ might be an alternative solution (but without any guarantee, in case
+ the result may also depend on the context).
+
+ Warning! The right shift of a negative value corresponds to an integer
+ division by a power of two, with rounding toward negative.
+
+ TODO: Complete the comments when a right shift of a negative value
+ may be involved, so that the rounding toward negative appears in the
+ proof. There has been at least an error with a proof of a bound!
+*/
+
#define MPFR_NEED_LONGLONG_H
#include "mpfr-impl.h"
+#if !defined(MPFR_GENERIC_ABI) && (GMP_NUMB_BITS == 32 || GMP_NUMB_BITS == 64)
+
+/* The tables T1[] and T2[] below were generated using the Sage code below,
+ with T1,T2 = bipartite(4,4,4,12,16). Note: we would get a slightly smaller
+ error using an approximation of the form T1[a,b] * (1 + T2[a,b]), but this
+ would make the code more complex, and the approximation would not always fit
+ on p2 bits (assuming p2 >= p1).
+# bi-partite method, using a table T1 of p1 bits, and T2 of p2 bits
+# approximate sqrt(a:b:c) with T1[a,b] + T2[a,c]
+def bipartite(pa,pb,pc,p1,p2):
+ T1 = dict()
+ T2 = dict()
+ maxerr = maxnum = 0
+ for a in range(2^(pa-2),2^pa):
+ for b in range(2^pb):
+ A1 = (a*2^pb+b)/2^(pa+pb)
+ A2 = (a*2^pb+b+1)/2^(pa+pb)
+ X = (1/sqrt(A1) + 1/sqrt(A2))/2
+ X = round(X*2^p1)/2^p1
+ T1[a,b] = X*2^p1
+ maxnum = max(maxnum, abs(T1[a,b]))
+ # print "a=", a, "b=", b, "T1=", x
+ maxerr = max(maxerr, n(abs(1/sqrt(A1) - X)))
+ maxerr = max(maxerr, n(abs(1/sqrt(A2) - X)))
+ print "maxerr1 =", maxerr, log(maxerr)/log(2.0), "maxnum=", maxnum
+ maxerr = maxnum = 0
+ for a in range(2^(pa-2),2^pa):
+ for c in range(2^pc):
+ Xmin = infinity
+ Xmax = -infinity
+ for b in range(2^pb):
+ A = (a*2^(pb+pc)+b*2^pc+c)/2^(pa+pb+pc)
+ X = 1/sqrt(A) - T1[a,b]/2^p1
+ X = round(X*2^p2)/2^p2
+ Xmin = min (Xmin, X)
+ Xmax = max (Xmax, X)
+ A = (a*2^(pb+pc)+b*2^pc+c+1)/2^(pa+pb+pc)
+ X = 1/sqrt(A) - T1[a,b]/2^p1
+ X = round(X*2^p2)/2^p2
+ Xmin = min (Xmin, X)
+ Xmax = max (Xmax, X)
+ T2[a,c] = round((Xmin + Xmax)/2*2^p2)
+ maxnum = max(maxnum, abs(T2[a,c]))
+ # print "a=", a, "c=", c, "T2=", T2[a,c]
+ for b in range(2^pb):
+ A = (a*2^(pb+pc)+b*2^pc+c)/2^(pa+pb+pc)
+ X = 1/sqrt(A)
+ maxerr = max(maxerr, n(abs(X - (T1[a,b]/2^p1 + T2[a,c]/2^p2))))
+ A = (a*2^(pb+pc)+b*2^pc+c+1)/2^(pa+pb+pc)
+ X = 1/sqrt(A)
+ maxerr = max(maxerr, n(abs(X - (T1[a,b]/2^p1 + T2[a,c]/2^p2))))
+ print "maxerr2 =", maxerr, log(maxerr)/log(2.0), "maxnum=", maxnum
+ return [T1[a,b] for a in range(2^(pa-2),2^pa) for b in range(2^pb)], \
+ [T2[a,c] for a in range(2^(pa-2),2^pa) for c in range(2^pc)]
+*/
+
static const unsigned short T1[] = {8160, 8098, 8037, 7977, 7919, 7861, 7805, 7751, 7697, 7644, 7593, 7542, 7493, 7445, 7397, 7350, 7304, 7260, 7215, 7172, 7129, 7088, 7047, 7006, 6966, 6927, 6889, 6851, 6814, 6778, 6742, 6706, 6671, 6637, 6603, 6570, 6537, 6505, 6473, 6442, 6411, 6381, 6351, 6321, 6292, 6263, 6235, 6206, 6179, 6152, 6125, 6098, 6072, 6046, 6020, 5995, 5970, 5946, 5921, 5897, 5874, 5850, 5827, 5804, 5781, 5759, 5737, 5715, 5693, 5672, 5651, 5630, 5609, 5589, 5569, 5549, 5529, 5509, 5490, 5471, 5452, 5433, 5415, 5396, 5378, 5360, 5342, 5324, 5307, 5290, 5273, 5256, 5239, 5222, 5206, 5189, 5173, 5157, 5141, 5125, 5110, 5094, 5079, 5064, 5049, 5034, 5019, 5004, 4990, 4975, 4961, 4947, 4933, 4919, 4905, 4892, 4878, 4865, 4851, 4838, 4825, 4812, 4799, 4786, 4773, 4761, 4748, 4736, 4724, 4711, 4699, 4687, 4675, 4663, 4652, 4640, 4628, 4617, 4605, 4594, 4583, 4572, 4561, 4550, 4539, 4528, 4517, 4506, 4496, 4485, 4475, 4464, 4454, 4444, 4434, 4423, 4413, 4403, 4394, 4384, 4374, 4364, 4355, 4345, 4335, 4326, 4317, 4307, 4298, 4289, 4280, 4271, 4262, 4253, 4244, 4235, 4226, 4217, 4208, 4200, 4191, 4183, 4174, 4166, 4157, 4149, 4141, 4132, 4124, 4116, 4108, 4100};
static const short T2[] = {420, 364, 308, 252, 196, 140, 84, 28, -31, -87, -141, -194, -248, -302, -356, -410, 307, 267, 226, 185, 145, 104, 63, 21, -24, -65, -105, -145, -185, -224, -263, -303, 237, 205, 174, 142, 111, 79, 48, 16, -16, -45, -75, -105, -136, -167, -198, -229, 187, 161, 136, 110, 85, 61, 36, 12, -15, -41, -67, -92, -117, -142, -167, -192, 159, 138, 117, 96, 76, 54, 33, 12, -10, -30, -51, -71, -92, -113, -134, -154, 130, 112, 95, 77, 60, 43, 26, 9, -10, -28, -46, -64, -81, -98, -116, -133, 115, 100, 85, 70, 54, 39, 23, 8, -7, -22, -37, -52, -66, -82, -96, -111, 99, 86, 73, 60, 46, 32, 19, 6, -8, -21, -34, -47, -60, -73, -86, -100, 86, 75, 63, 52, 40, 28, 17, 5, -7, -19, -31, -43, -54, -66, -78, -90, 77, 66, 56, 46, 35, 25, 16, 6, -5, -15, -26, -36, -47, -57, -67, -77, 70, 60, 51, 42, 33, 23, 14, 5, -5, -14, -24, -33, -43, -52, -62, -72, 65, 57, 49, 40, 31, 23, 14, 6, -3, -12, -20, -28, -37, -45, -54, -62};
-#if GMP_NUMB_BITS == 64
+/* return x0 and write rp[0] such that a0 = x0^2 + rp[0]
+ with x0^2 <= a0 < (x0+1)^2 */
static mp_limb_t
mpn_sqrtrem1 (mpfr_limb_ptr rp, mp_limb_t a0)
{
@@ -36,21 +137,41 @@ mpn_sqrtrem1 (mpfr_limb_ptr rp, mp_limb_t a0)
mp_limb_t c = (a0 >> (GMP_NUMB_BITS - 12)) & 0xf;
mp_limb_t x0, a1, t, y, x2;
- x0 = (T1[(a-4)*16+b] << 4) + T2[(a-4)*16+c];
- /* now x0 is a 16-bit approximation, with maximal error 2^(-9.46) */
-
+ x0 = ((mp_limb_t) T1[(a-4)*16+b] << 4) + T2[(a-4)*16+c];
+ /* now x0/2^16 is a (1+16)-bit approximation of 2^6/sqrt(a*2^8+b*2^4+c),
+ thus of 2^(GMP_NUMB_BITS/2)/sqrt(a0), with maximal error 2^(-9.46) */
+
+#if GMP_NUMB_BITS == 32
+ x0 -= 89; /* x0 -= 93 ensures that x0/2^16 <= 2^16/sqrt(a0) (proof by
+ exhaustive search), which ensures that t = a0-y^2 >= below, and
+ thus that the truncated Karp-Markstein trick gives x0 <= sqrt(a0
+ at the end. However (still by exhaustive search) x0 -= 89 is
+ enough to guarantee x0^2 <= a0 at the end, and at most one
+ correction is needed. With x0 -= 89 the probability of correction
+ is 0.097802, with x0 -= 93 it is 0.106486. */
+ a1 = a0 >> (GMP_NUMB_BITS - 16); /* a1 has 16 bits */
+ y = (a1 * (x0 >> 1)) >> 15; /* y is near 2^32/x0, with 16 bits, and should be
+ an approximation of sqrt(a0) */
+ /* |a0 - y^2| <= 13697110 < 2^24 (by exhaustive search) */
+ /* a0 >= y^2 (proof by exhaustive search) */
+ t = (a0 - y * y) >> 8;
+ /* t/2^24 approximates a0/2^32 - (y/2^16)^2, with |t| < 2^16 */
+ /* x0*t/2^41 approximates (x0/2^16)/2*(a0/2^32 - (y/2^16)^2) */
+ /* x0 * t < 2^32 (proof by exhaustive search) */
+ x0 = y + ((x0 * t) >> 25);
+#else /* GMP_NUMB_BITS = 64 */
a1 = a0 >> (GMP_NUMB_BITS - 32);
/* a1 has 32 bits, thus a1*x0^2 has 64 bits */
- t = (mp_limb_signed_t) (a1 * (x0 * x0)) >> 32;
- /* t has 32 bits now */
- x0 = (x0 << 15) - ((mp_limb_signed_t) (x0 * t) >> (17+1));
+ /* a1*x^0 might exceed 2^64, but we are only interested in
+ a1*x^0 - 2^64, which is small */
+ t = (mp_limb_signed_t) (a1 * (x0 * x0)) >> 9;
+ /* |t| < 2^46 (proof by exhaustive search on all possible values of a1,
+ since x0 depends on a1 only) */
+ x0 = (x0 << 16) - ((mp_limb_signed_t) (x0 * t) >> (39+1)) - 1;
- /* now x0 is a 31-bit approximation (32 bits, 1 <= x0/2^31 <= 2),
- with maximal error 2^(-19.19). More precisely by exhaustive search
- on all 32-bit values of a1:
- -1.665416e-06 <= x0/2^31 - 1/sqrt(a1/2^32) <= 6.959122e-10,
- with the left worst case for a1=1326448622, x0=3864240766,
- and the right worst case for a1=1081715548, x0=4279108124. */
+ /* now x0 is a (1+32)-bit approximation such that (by exhaustive search on all
+ 32-bit values of a1):
+ -1.67e-06 <= x0/2^32 - 2^16/sqrt(a1) <= 0 */
/* we now use Karp-Markstein's trick to get a 32-bit approximation of the
square root of a0:
@@ -58,16 +179,23 @@ mpn_sqrtrem1 (mpfr_limb_ptr rp, mp_limb_t a0)
t = a - y^2 [target accuracy, high 19 bits are zero]
y = y + x0/2 * t [target accuracy] */
- /* a1*x0^2 is near 2^94, thus a1*x0 is near 2^94/x0 */
- y = (a1 * x0) >> 31; /* y is near 2^63/x0, with 32 bits */
- t = (mp_limb_signed_t) (a0 - y * y) >> 14;
- /* t should have at most 64-14-19=31 significant bits */
- x0 = y + ((mp_limb_signed_t) (x0 * t) >> (49+1));
+ /* a1*x0^2 is near 2^96, thus a1*x0 is near 2^96/x0, thus near from
+ 2^48*sqrt(a1), thus near from 2^32*sqrt(a0) */
+ y = (a1 * x0) >> 32; /* y is near sqrt(a0), with 32 bits */
+ /* now a0 >= y^2 */
+ t = (a0 - y * y) >> 13;
+ /* t < 2^31 (by exhaustive search on all possible values of a1, with
+ a0 = 2^32*a1+(2^32-1) */
+ /* since x0 < 2^33 and t < 2^31, x0*t does not overflow */
+ x0 = y + ((x0 * t) >> (64-13+1));
+#endif
- /* x0 is now a 32-bit approximation of sqrt(a0) */
+ /* x0 is now a (GMP_NUMB_BITS/2)-bit approximation of sqrt(a0),
+ with x0 <= sqrt(a0) */
x2 = x0 * x0;
- while (x2 + 2*x0 < a0)
+ if (x2 + 2*x0 < a0) /* x0 is too small: probability of correction is 0.097802
+ for GMP_NUMB_BITS=32, 0.000017 for GMP_NUMB_BITS=64 */
{
x2 += 2*x0 + 1;
x0++;
@@ -77,10 +205,11 @@ mpn_sqrtrem1 (mpfr_limb_ptr rp, mp_limb_t a0)
return x0;
}
-/* Return a (1+40)-bit approximation x0 of 2^72/sqrt(a0).
- Assume a0 >= 2^(GMP_NUMB_BITS-2), thus x0 has 41 bits.
- Assume GMP_NUMB_BITS=64.
-*/
+/* For GMP_NUMB_BITS=32: return a (1+20)-bit approximation x0 of 2^36/sqrt(a0).
+ For GMP_NUMB_BITS=64: return a (1+40)-bit approximation x0 of 2^72/sqrt(a0).
+ Assume a0 >= 2^(GMP_NUMB_BITS-2), and GMP_NUMB_BITS = 32 or 64.
+ Must ensure: x0 <= 2^36/sqrt(a0) for GMP_NUMB_BITS=32,
+ x0 <= 2^72/sqrt(a0) for GMP_NUMB_BITS=64. */
static mp_limb_t
mpn_rsqrtrem1 (mp_limb_t a0)
{
@@ -89,12 +218,31 @@ mpn_rsqrtrem1 (mp_limb_t a0)
mp_limb_t c = (a0 >> (GMP_NUMB_BITS - 12)) & 0xf;
mp_limb_t x0, a1, t;
- MPFR_STAT_STATIC_ASSERT (GMP_NUMB_BITS == 64);
+ MPFR_STAT_STATIC_ASSERT (GMP_NUMB_BITS == 32 || GMP_NUMB_BITS == 64);
- x0 = (T1[(a-4)*16+b] << 4) + T2[(a-4)*16+c];
+ x0 = ((mp_limb_t) T1[(a-4)*16+b] << 4) + T2[(a-4)*16+c];
/* now x0 is a 16-bit approximation, with maximal error 2^(-9.46):
-2^(-9.46) <= x0/2^16 - 1/sqrt(a/2^4) <= 2^(-9.46) */
+#if GMP_NUMB_BITS == 32
+ x0 >>= 1; /* reduce approximation to 1+15 bits */
+ /* In principle, a0>>10 can have up to 22 bits, and (x0^2)>>12 can have up to
+ 20 bits, thus the product can have up to 42 bits, but since a0*x0^2 is very
+ near 2^62, thus (a0>>10)*(x0^2)>>12 is very near 2^40, and when we reduce
+ it mod 2^32 and interpret as a signed number in [-2^31, 2^31-1], we get
+ the correct remainder (a0>>10)*(x0^2)>>12 - 2^40 */
+ t = (mp_limb_signed_t) ((a0 >> 10) * ((x0 * x0) >> 12)) >> 8;
+ /* |t| < 6742843 <= 2^23 - 256 (by exhaustive search) */
+ t = (mp_limb_signed_t) t >> 8; /* now |t| < 2^15, thus |x0*t| < 2^31 */
+ x0 = (x0 << 5) - ((mp_limb_signed_t) (x0 * t) >> 20);
+
+ /* by exhaustive search on all possible values of a0, we get:
+ -1.61 <= x0 - 2^36/sqrt(a0) <= 3.11 thus
+ -2^(-19.3) < -1.54e-6 <= x0/2^20 - 2^16/sqrt(a0) <= 2.97e-6 < 2^(-18.3)
+ */
+
+ return x0 - 4; /* ensures x0 <= 2^36/sqrt(a0) */
+#else /* GMP_NUMB_BITS = 64 */
a1 = a0 >> (GMP_NUMB_BITS - 32);
/* a1 has 32 bits, thus a1*x0^2 has 64 bits */
t = (mp_limb_signed_t) (-a1 * (x0 * x0)) >> 32;
@@ -112,13 +260,21 @@ mpn_rsqrtrem1 (mp_limb_t a0)
/* now x0 is a 1+40-bit approximation,
more precisely we have (experimentally):
- -3.157551e-12 <= x0/2^40 - 2^32/sqrt(a0) <= 3.834848e-12
+ -2^(-38.2) < -3.16e-12 <= x0/2^40 - 2^32/sqrt(a0) <= 3.84e-12 < 2^(-37.9)
*/
-
- return x0;
+ return x0 - 5; /* ensures x0 <= 2^72/sqrt(a0) */
+#endif
}
-/* Comments below are for GMP_NUMB_BITS=64 for simplicity, but the code is valid
+/* Given as input np[0] and np[1], with B/4 <= np[1] (where B = 2^GMP_NUMB_BITS),
+ mpn_sqrtrem2 returns a value x, 0 <= x <= 1, and stores values s in sp[0] and
+ r in rp[0] such that:
+
+ n := np[1]*B + np[0] = s^2 + x*B + r, with n < (s+1)^2
+
+ or equivalently x*B + r <= 2*s.
+
+ This comment is for GMP_NUMB_BITS=64 for simplicity, but the code is valid
for any even value of GMP_NUMB_BITS.
The algorithm used is the following, and uses Karp-Markstein's trick:
- start from x, a 33-bit approximation of 2^64/sqrt(n1), with x <= 2^64/sqrt(n1)
@@ -126,6 +282,7 @@ mpn_rsqrtrem1 (mp_limb_t a0)
- t = n1 - y^2
- u = (x * t) >> 33
- y = (y << 32) + u
+
Proof:
* we know that Newton's iteration for the reciprocal square root,
@@ -159,7 +316,32 @@ mpn_sqrtrem2 (mpfr_limb_ptr sp, mpfr_limb_ptr rp, mpfr_limb_srcptr np)
mp_limb_t x, y, t, high, low;
x = mpn_rsqrtrem1 (np[1]);
- /* x is an approximation of 2^72/sqrt(n1), x has 41 bits */
+ /* we must have x^2*n1 <= 2^72 for GMP_NUMB_BITS=32
+ <= 2^144 for GMP_NUMB_BITS=64 */
+
+#if GMP_NUMB_BITS == 32
+ MPFR_ASSERTD ((double) x * (double) x * (double) np[1]
+ < 4722366482869645213696.0);
+ /* x is an approximation of 2^36/sqrt(n1), x has 1+20 bits */
+
+ /* We know x/2^20 <= 2^16/sqrt(n1) + 2^(-18)
+ thus n1*x/2^36 <= sqrt(n1) + 2^(-18)*n1/2^16 <= sqrt(n1) + 2^(-2). */
+
+ /* compute y = floor(np[1]*x/2^36), cutting the upper 24 bits of n1 in two
+ parts of 12 and 11 bits, which can be multiplied by x without overflow
+ (warning: if we take 12 bits from low, it might overflow with x */
+ high = np[1] >> 20; /* upper 12 bits from n1 */
+ MPFR_ASSERTD((double) high * (double) x < 4294967296.0);
+ low = (np[1] >> 9) & 0x7ff; /* next 11 bits */
+ MPFR_ASSERTD((double) low * (double) x < 4294967296.0);
+ y = high * x + ((low * x) >> 11); /* y approximates n1*x/2^20 */
+ y = (y - 0x4000) >> 16; /* the constant 0x4000 = 2^(36-2-20) takes
+ into account the 2^(-2) error above, to ensure
+ y <= sqrt(n1) */
+#else /* GMP_NUMB_BITS = 64 */
+ MPFR_ASSERTD ((double) x * (double) x * (double) np[1]
+ < 2.2300745198530623e43);
+ /* x is an approximation of 2^72/sqrt(n1), x has 1+40 bits */
/* We know x/2^40 <= 2^32/sqrt(n1) + 3.9e-12 <= 2^32/sqrt(n1) + 2^(-37)
thus n1*x/2^72 <= sqrt(n1) + 2^(-37)*n1/2^32 <= sqrt(n1) + 2^(-5). */
@@ -172,16 +354,26 @@ mpn_sqrtrem2 (mpfr_limb_ptr sp, mpfr_limb_ptr rp, mpfr_limb_srcptr np)
low = (np[1] >> 17) & 0x7fffff; /* next 23 bits */
MPFR_ASSERTD((double) low * (double) x < 18446744073709551616.0);
y = high * x + ((low * x) >> 23); /* y approximates n1*x/2^40 */
- /* y-1 takes into account the 2^(-31) error term, to ensure y <= sqrt(n1)
- after the right shift below */
y = (y - 0x8000000) >> 32; /* the constant 0x8000000 = 2^(72-5-40) takes
into account the 2^(-5) error above, to ensure
y <= sqrt(n1) */
+#endif
/* y is an approximation of sqrt(n1), with y <= sqrt(n1) */
t = np[1] - y * y;
MPFR_ASSERTD((mp_limb_signed_t) t >= 0);
+#if GMP_NUMB_BITS == 32
+ /* we now compute t = (x * t) >> (20 + 1), but x * t might have
+ more than 32 bits thus we cut t in two parts of 12 and 11 bits */
+ high = t >> 11;
+ low = t & 0x7ff;
+ MPFR_ASSERTD((double) high * (double) x < 4294967296.0);
+ MPFR_ASSERTD((double) low * (double) x < 4294967296.0);
+ t = high * x + ((low * x) >> 11); /* approximates t*x/2^11 */
+
+ y = (y << (GMP_NUMB_BITS / 2)) + (t >> 10);
+#else
/* we now compute t = (x * t) >> (40 + 1), but x * t might have
more than 64 bits thus we cut t in two parts of 24 and 23 bits */
high = t >> 23;
@@ -191,8 +383,11 @@ mpn_sqrtrem2 (mpfr_limb_ptr sp, mpfr_limb_ptr rp, mpfr_limb_srcptr np)
t = high * x + ((low * x) >> 23); /* approximates t*x/2^23 */
y = (y << (GMP_NUMB_BITS / 2)) + (t >> 18);
- if (y < (1UL << 63))
- y = 1UL << 63; /* the correction code below assumes y >= 2^63 */
+#endif
+
+ /* the correction code below assumes y >= 2^(GMP_NUMB_BITS - 1) */
+ if (y < (MPFR_LIMB_ONE << (GMP_NUMB_BITS - 1)))
+ y = MPFR_LIMB_ONE << (GMP_NUMB_BITS - 1);
umul_ppmm (x, t, y, y);
MPFR_ASSERTD(x < np[1] || (x == np[1] && t <= np[0])); /* y should not be too large */
@@ -206,8 +401,11 @@ mpn_sqrtrem2 (mpfr_limb_ptr sp, mpfr_limb_ptr rp, mpfr_limb_srcptr np)
x -= 1 + (t < (2 * y + 1));
t -= 2 * y + 1;
y ++;
- /* maximal number of loops observed is 3, for n1=4651405965214438987,
- n0=18443926066120424952, average is 0.576 */
+ /* GMP_NUMB_BITS=32: average number of loops observed is 0.982,
+ max is 3 (for n1=1277869843, n0=3530774227);
+ GMP_NUMB_BITS=64: average number of loops observed is 0.593,
+ max is 3 (for n1=4651405965214438987, n0=18443926066120424952).
+ */
}
sp[0] = y;
@@ -225,12 +423,13 @@ mpfr_sqrt1 (mpfr_ptr r, mpfr_srcptr u, mpfr_rnd_t rnd_mode)
mpfr_limb_ptr rp = MPFR_MANT(r);
u0 = MPFR_MANT(u)[0];
- if (exp_u & 1)
+ if (((unsigned int) exp_u & 1) != 0)
{
u0 >>= 1;
exp_u ++;
}
- exp_r = exp_u >> 1;
+ MPFR_ASSERTD (((unsigned int) exp_u & 1) == 0);
+ exp_r = exp_u / 2;
if (p < GMP_NUMB_BITS / 2)
r0 = mpn_sqrtrem1 (&sb, u0) << (GMP_NUMB_BITS / 2);
@@ -309,7 +508,275 @@ mpfr_sqrt1 (mpfr_ptr r, mpfr_srcptr u, mpfr_rnd_t rnd_mode)
MPFR_RET(1);
}
}
-#endif
+
+#if GMP_NUMB_BITS == 64
+/* For GMP_NUMB_BITS=64: return a (1+80)-bit approximation x = xp[1]*B+xp[0]
+ of 2^144/sqrt(ap[1]*B+ap[0]).
+ Assume ap[1] >= B/4, thus sqrt(ap[1]*B+ap[0]) >= B/2, thus x <= 2^81. */
+static void
+mpn_rsqrtrem2 (mpfr_limb_ptr xp, mpfr_limb_srcptr ap)
+{
+ mp_limb_t t1, t0, u1, u0, r0, r1, r2;
+
+ MPFR_STAT_STATIC_ASSERT (GMP_NUMB_BITS == 64);
+
+ xp[1] = mpn_rsqrtrem1 (ap[1]);
+
+ /* now we should compute x + (x/2) * (1 - a*x^2), where the upper ~40 bits of
+ a*x^2 should cancel with 1, thus we only need the following 40 bits */
+
+ /* xp[1] has 1+40 bits, with xp[1] <= 2^72/sqrt(ap[1]) */
+ umul_ppmm (t1, t0, xp[1], xp[1]);
+
+ /* now t1 has at most 18 bits, with least 16 bits being its fractional value */
+
+ u1 = ap[1] >> 48;
+ u0 = (ap[1] << 16) | (ap[0] >> 48);
+
+ /* u1 has the 16 most significant bits of a, and u0 the next 64 bits */
+
+ /* we want t1*u1 << 48 + (t1*u0+t0*u1) >> 16 + t0*u0 >> 80
+ [32 bits] [64 bits] [48 bits]
+ but since we know the upper ~40 bits cancel with 1, we can ignore t1*u1. */
+ umul_ppmm (r2, r1, t0, u0);
+ r0 = t1 * u0; /* we ignore the upper 16 bits of the product */
+ r1 = t0 * u1; /* we ignore the upper 16 bits of the product */
+ r0 = r0 + r1 + r2;
+
+ /* the upper ~8 bits of r0 should cancel with 1, we are interested in the next
+ 40 bits */
+
+ umul_ppmm (t1, t0, xp[1], -r0);
+
+ /* we should now add t1 >> 33 at xp[1] */
+ xp[1] += t1 >> 33;
+ xp[0] = t1 << 31;
+ /* shift by 24 bits to the right, since xp[1] has 24 leading zeros,
+ and we now expect 48 */
+ xp[0] = (xp[1] << 40) | (xp[0] >> 24);
+ xp[1] = xp[1] >> 24;
+}
+
+/* Given as input ap[0-3], with B/4 <= ap[3] (where B = 2^GMP_NUMB_BITS),
+ mpn_sqrtrem4 returns a value x, 0 <= x <= 1, and stores values s in sp[0-1] and
+ r in rp[0-1] such that:
+
+ n := ap[3]*B^3 + ap[2]*B^2 + ap[1]*B + ap[0] = s^2 + x*B + r, with n < (s+1)^2
+
+ or equivalently x*B + r <= 2*s.
+
+ This code currently assumes GMP_NUMB_BITS = 64. */
+static mp_limb_t
+mpn_sqrtrem4 (mpfr_limb_ptr sp, mpfr_limb_ptr rp, mpfr_limb_srcptr ap)
+{
+ mp_limb_t x[2], t1, t0, r2, r1, h, l, u2, u1, b[4];
+
+ MPFR_STAT_STATIC_ASSERT(GMP_NUMB_BITS == 64);
+
+ mpn_rsqrtrem2 (x, ap + 2);
+
+ /* x[1]*B+x[0] is a 80-bit approximation of 2^144/sqrt(ap[3]*B+ap[2]),
+ and should be smaller */
+
+ /* first compute y0 = a*x with at least 80 bits of precision */
+
+ t1 = ap[3] >> 48;
+ t0 = (ap[3] << 16) | (ap[2] >> 48);
+
+ /* now t1:t0 is a (16+64)-bit approximation of a,
+ (x1*B+x0) * (t1*B+t0) = (x1*t1)*B^2 + (x1*t0+x0*t1)*B + x0*t0 */
+ r2 = x[1] * t1; /* r2 has 32 bits */
+ umul_ppmm (h, r1, x[1], t0);
+ r2 += h;
+ umul_ppmm (h, l, x[0], t1);
+ r1 += l;
+ r2 += h + (r1 < l);
+ umul_ppmm (h, l, x[0], t0);
+ r1 += h;
+ r2 += (r1 < h);
+
+ /* r2 has 32 bits, r1 has 64 bits, thus we have 96 bits in total, we put 64
+ bits in r2 and 16 bits in r1 */
+ r2 = (r2 << 32) | (r1 >> 32);
+ r1 = (r1 << 32) >> 48;
+
+ /* we consider y0 = r2*2^16 + r1, which has 80 bits, and should be smaller than
+ 2^16*sqrt(ap[3]*B+ap[2]) */
+
+ /* Now compute y0 + (x/2)*(a - y0^2), which should give ~160 correct bits.
+ Since a - y0^2 has its ~80 most significant bits that cancel, it remains
+ only ~48 bits. */
+
+ /* now r2:r1 represents y0, with r2 of 64 bits and r1 of 16 bits,
+ and we compute y0^2, whose upper ~80 bits should cancel with a:
+ y0^2 = r2^2*2^32 + 2*r2*r1*2^16 + r1^2. */
+ t1 = r2 * r2; /* we can simply ignore the upper 64 bits of r2^2 */
+ umul_ppmm (h, l, r2, r1);
+ t0 = l << 49; /* takes into account the factor 2 in 2*r2*r1 */
+ u1 = (r1 * r1) << 32; /* temporary variable */
+ t0 += u1;
+ t1 += ((h << 49) | (l >> 15)) + (t0 < u1); /* takes into account the factor 2 */
+
+ /* now t1:t0 >> 32 equals y0^2 mod 2^96, since y0 has 160 bits, we should shift
+ t1:t0 by 64 bits to the right */
+ t0 = ap[2] - t1 - (t0 != 0); /* we round downwards to get a lower approximation
+ of sqrt(a) at the end */
+
+ /* now t0 equals ap[3]*B+ap[2] - ceil(y0^2/2^32) */
+
+ umul_ppmm (u2, u1, x[1], t0);
+ umul_ppmm (h, l, x[0], t0);
+ u1 += h;
+ u2 += (u1 < h);
+
+ /* divide by 2 to take into account the factor 1/2 in (x/2)*(a - y0^2) */
+ u1 = (u2 << 63) | (u1 >> 1);
+ u2 = u2 >> 1;
+
+ /* u2:u1 approximates (x/2)*(ap[3]*B+ap[2] - y0^2/2^32) / 2^64,
+ and should be smaller */
+
+ r1 <<= 48; /* put back the most significant bits of r1 in place */
+
+ /* add u2:u1 >> 16 to y0 */
+ sp[0] = r1 + ((u2 << 48) | (u1 >> 16));
+ sp[1] = r2 + (u2 >> 16) + (sp[0] < r1);
+
+ mpn_mul_n (b, sp, sp, 2);
+ b[2] = ap[2] - b[2] - mpn_sub_n (rp, ap, b, 2);
+
+ /* invariant: the remainder {ap, 4} - {sp, 2}^2 is b[2]*B^2 + {rp, 2} */
+
+ t0 = mpn_lshift (b, sp, 2, 1);
+
+ /* Warning: the initial {sp, 2} might be < 2^127, thus t0 might be 0. */
+
+ /* invariant: 2*{sp,2} = t0*B + {b, 2} */
+
+ /* While the remainder is greater than 2*s we should subtract 2*s+1 to the
+ remainder, and add 1 to the square root. This loop seems to be executed
+ at most twice. */
+ while (b[2] > t0 || (b[2] == t0 &&
+ (rp[1] > b[1] || (rp[1] == b[1] && rp[0] > b[0]))))
+ {
+ /* subtract 2*s to b[2]*B^2 + {rp, 2} */
+ b[2] -= t0 + mpn_sub_n (rp, rp, b, 2);
+ /* subtract 1 to b[2]*B^2 + {rp, 2}: b[2] -= mpn_sub_1 (rp, rp, 2, 1) */
+ if (rp[0]-- == 0)
+ b[2] -= (rp[1]-- == 0);
+ /* add 1 to s */
+ mpn_add_1 (sp, sp, 2, 1);
+ /* add 2 to t0*B + {b, 2}: t0 += mpn_add_1 (b, b, 2, 2) */
+ b[0] += 2;
+ if (b[0] < 2)
+ t0 += (b[1]++ == 0);
+ }
+
+ return b[2];
+}
+
+/* Special code for GMP_NUMB_BITS < prec(r) < 2*GMP_NUMB_BITS,
+ and GMP_NUMB_BITS < prec(u) <= 2*GMP_NUMB_BITS.
+ This code should work for any value of GMP_NUMB_BITS, but since mpn_sqrtrem4
+ currently assumes GMP_NUMB_BITS=64, it only works for GMP_NUMB_BITS=64. */
+static int
+mpfr_sqrt2 (mpfr_ptr r, mpfr_srcptr u, mpfr_rnd_t rnd_mode)
+{
+ mpfr_prec_t p = MPFR_GET_PREC(r);
+ mpfr_limb_ptr up = MPFR_MANT(u), rp = MPFR_MANT(r);
+ mp_limb_t np[4], tp[2], rb, sb, mask;
+ mpfr_prec_t exp_u = MPFR_EXP(u), exp_r, sh = 2 * GMP_NUMB_BITS - p;
+
+ if (((unsigned int) exp_u & 1) != 0)
+ {
+ np[3] = up[1] >> 1;
+ np[2] = (up[1] << (GMP_NUMB_BITS-1)) | (up[0] >> 1);
+ exp_u ++;
+ }
+ else
+ {
+ np[3] = up[1];
+ np[2] = up[0];
+ }
+ MPFR_ASSERTD (((unsigned int) exp_u & 1) == 0);
+ exp_r = exp_u / 2;
+
+ np[1] = np[0] = 0;
+ sb = mpn_sqrtrem4 (rp, tp, np);
+ sb |= tp[0] | tp[1];
+ rb = rp[0] & (MPFR_LIMB_ONE << (sh - 1));
+ mask = MPFR_LIMB_MASK(sh);
+ sb |= (rp[0] & mask) ^ rb;
+ rp[0] = rp[0] & ~mask;
+
+ /* rounding */
+ if (exp_r > __gmpfr_emax)
+ return mpfr_overflow (r, rnd_mode, 1);
+
+ /* See comments in mpfr_divsp1 */
+ if (exp_r < __gmpfr_emin)
+ {
+ if (rnd_mode == MPFR_RNDN)
+ {
+ if ((exp_r == __gmpfr_emin - 1) && (rp[1] = ~MPFR_LIMB_ZERO &&
+ rp[0] == ~mask) && rb)
+ goto rounding; /* no underflow */
+ if (exp_r < __gmpfr_emin - 1 || (rp[1] == MPFR_LIMB_HIGHBIT &&
+ rp[0] == MPFR_LIMB_ZERO && sb == 0))
+ rnd_mode = MPFR_RNDZ;
+ }
+ else if (!MPFR_IS_LIKE_RNDZ(rnd_mode, 0))
+ {
+ if ((exp_r == __gmpfr_emin - 1) && (rp[1] = ~MPFR_LIMB_ZERO &&
+ rp[0] == ~mask) && (rb | sb))
+ goto rounding; /* no underflow */
+ }
+ return mpfr_underflow (r, rnd_mode, 1);
+ }
+
+ rounding:
+ MPFR_EXP (r) = exp_r;
+ if (rb == 0 && sb == 0)
+ {
+ MPFR_ASSERTD(exp_r >= __gmpfr_emin);
+ MPFR_ASSERTD(exp_r <= __gmpfr_emax);
+ return 0; /* idem than MPFR_RET(0) but faster */
+ }
+ else if (rnd_mode == MPFR_RNDN)
+ {
+ if (rb == 0 || (rb && sb == 0 &&
+ (rp[0] & (MPFR_LIMB_ONE << sh)) == 0))
+ goto truncate;
+ else
+ goto add_one_ulp;
+ }
+ else if (MPFR_IS_LIKE_RNDZ(rnd_mode, 0))
+ {
+ truncate:
+ MPFR_ASSERTD(exp_r >= __gmpfr_emin);
+ MPFR_ASSERTD(exp_r <= __gmpfr_emax);
+ MPFR_RET(-1);
+ }
+ else /* round away from zero */
+ {
+ add_one_ulp:
+ rp[0] += MPFR_LIMB_ONE << sh;
+ rp[1] += rp[0] == 0;
+ if (rp[1] == 0)
+ {
+ rp[1] = MPFR_LIMB_HIGHBIT;
+ if (MPFR_UNLIKELY(exp_r + 1 > __gmpfr_emax))
+ return mpfr_overflow (r, rnd_mode, 1);
+ MPFR_ASSERTD(exp_r + 1 <= __gmpfr_emax);
+ MPFR_ASSERTD(exp_r + 1 >= __gmpfr_emin);
+ MPFR_SET_EXP (r, exp_r + 1);
+ }
+ MPFR_RET(1);
+ }
+}
+#endif /* GMP_NUMB_BITS == 64 */
+
+#endif /* !defined(MPFR_GENERIC_ABI) && (GMP_NUMB_BITS == 32 || GMP_NUMB_BITS == 64) */
int
mpfr_sqrt (mpfr_ptr r, mpfr_srcptr u, mpfr_rnd_t rnd_mode)
@@ -372,10 +839,18 @@ mpfr_sqrt (mpfr_ptr r, mpfr_srcptr u, mpfr_rnd_t rnd_mode)
}
MPFR_SET_POS(r);
-#if GMP_NUMB_BITS == 64
+ /* See the note at the beginning of this file about __GNUC__. */
+#if !defined(MPFR_GENERIC_ABI) && defined(__GNUC__) && \
+ (GMP_NUMB_BITS == 32 || GMP_NUMB_BITS == 64)
if (MPFR_GET_PREC (r) < GMP_NUMB_BITS && MPFR_GET_PREC (u) < GMP_NUMB_BITS)
return mpfr_sqrt1 (r, u, rnd_mode);
-#endif
+#endif
+
+#if !defined(MPFR_GENERIC_ABI) && GMP_NUMB_BITS == 64
+ if (GMP_NUMB_BITS < MPFR_GET_PREC (r) && MPFR_GET_PREC (r) < 2*GMP_NUMB_BITS
+ && MPFR_LIMB_SIZE(u) == 2)
+ return mpfr_sqrt2 (r, u, rnd_mode);
+#endif
MPFR_TMP_MARK (marker);
MPFR_UNSIGNED_MINUS_MODULO (sh, MPFR_GET_PREC (r));