diff options
author | zimmerma <zimmerma@280ebfd0-de03-0410-8827-d642c229c3f4> | 2008-01-04 10:35:04 +0000 |
---|---|---|
committer | zimmerma <zimmerma@280ebfd0-de03-0410-8827-d642c229c3f4> | 2008-01-04 10:35:04 +0000 |
commit | 5561775a3e2c63adb7153f0ca73b2396c76b3d10 (patch) | |
tree | 87ff2deba65d29a759a01cb585d2c585b485f301 /rec_sqrt.c | |
parent | 8397845b9b2c7ca46e8f99c2d9db3dfbc8a50542 (diff) | |
download | mpfr-5561775a3e2c63adb7153f0ca73b2396c76b3d10.tar.gz |
the function mpfr_mpn_rec_sqrt() provides a faithful approximation of the
inverse square root. Some improvements can still be made, but the interface
should not change, thus we can start writing the mpfr_rec_sqrt function that
calls it.
git-svn-id: svn://scm.gforge.inria.fr/svn/mpfr/trunk@5166 280ebfd0-de03-0410-8827-d642c229c3f4
Diffstat (limited to 'rec_sqrt.c')
-rwxr-xr-x | rec_sqrt.c | 481 |
1 files changed, 481 insertions, 0 deletions
diff --git a/rec_sqrt.c b/rec_sqrt.c new file mode 100755 index 000000000..0c26d8d09 --- /dev/null +++ b/rec_sqrt.c @@ -0,0 +1,481 @@ +/* mpfr_rec_sqrt -- inverse square root + +Copyright 2008 Free Software Foundation, Inc. +Contributed by the Arenaire and Cacao projects, INRIA. + +This file is part of the MPFR Library. + +The MPFR Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The MPFR Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the MPFR Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, +MA 02110-1301, USA. */ + +#include <stdio.h> +#include <stdlib.h> + +#define MPFR_NEED_LONGLONG_H /* for umul_ppmm */ +#include "mpfr-impl.h" + +/* number of guard bits in the one-word case */ +#define GUARD 1 + +#define ONE ((mp_limb_t) 1) +#define LIMB_SIZE(x) ((((x)-1)>>MPFR_LOG2_BITS_PER_MP_LIMB) + 1) + +#define MPFR_COM_N(x,y,n) \ + { \ + mp_size_t i; \ + for (i = 0; i < n; i++) \ + *((x)+i) = ~*((y)+i); \ + } + +/* check that X is at distance at most 1 ulp of A^{-1/2}, + where X has p bits (msb aligned). +*/ +static void +check (mp_srcptr x, mp_srcptr a, mp_prec_t p) +{ + mp_size_t n = 1 + (p - 1) / GMP_NUMB_BITS; + mp_prec_t l = n * GMP_NUMB_BITS - p; /* low ignored bits */ + mp_ptr y, t, u; + + /* we should have X - 2^p < A^{-1/2} < X + 2^p */ + + y = (mp_ptr) malloc (n * sizeof (mp_limb_t)); + t = (mp_ptr) malloc (2 * n * sizeof (mp_limb_t)); + u = (mp_ptr) malloc ((3 * n + 1) * sizeof (mp_limb_t)); + + /* check the low bits of x are zero */ + if (l) + { + if (x[0] & ((ONE << l) - ONE)) + { + fprintf (stderr, "Error, low bits from x are not zero: %lu\n", + x[0] & ((ONE << l) - ONE)); + exit (1); + } + } + + MPN_COPY (y, x, n); + if (mpn_sub_1 (y, y, n, ONE << l)) + { + fprintf (stderr, "Error in check, borrow in X - ulp\n"); + exit (1); + } + mpn_mul_n (t, y, y, n); + mpn_mul (u, t, 2 * n, a, n + 1); + if (u[3 * n] != 0) + { + fprintf (stderr, "(X-ulp)^2*A >= B^(3n)\n"); + exit (1); + } + + MPN_COPY (y, x, n); + /* if there is a carry in x + ulp(x), then x + ulp(x) = 1, + and surely A^{-1/2} < x + ulp(x) */ + if (mpn_add_1 (y, y, n, ONE << l) == 0) + { + mpn_mul_n (t, y, y, n); + mpn_mul (u, t, 2 * n, a, n + 1); + if (u[3 * n] == 0) + { + fprintf (stderr, "(X+ulp)^2*A < B^(3n)\n"); + exit (1); + } + } + + free (u); + free (t); + free (y); +} + +/* Put in X a p-bit approximation of 1/sqrt(A), + where X = {x, n}/B^n, A = {a, n+1}/B^n, and n = ceil(p/GMP_NUMB_BITS), + where B = 2^GMP_NUMB_BITS. + Note: x and a are left-aligned. + The error in the approximate result with respect to the true value + 1/sqrt(A) is bounded by 1 ulp. + + If p is not a multiple of GMP_NUMB_BITS, the extra low bits of the input + A should be 0. Similarly, the extra low bits of the output X are set to 0. + + Assumptions: + (1) A should be normalized, i.e., 1 <= A[n] < 4, thus 1/2 <= X < 1. + (2) p >= 11 + (3) {a, n+1} and {x, n} should not overlap + (4) GMP_NUMB_BITS >= 12 and is even + + Reference: Modern Computer Algebra, Richard Brent and Paul Zimmermann, + http://www.loria.fr/~zimmerma/mca/pub226.html + {a, n+1} and {x, n} should not overlap. +*/ +void +mpfr_mpn_rec_sqrt (mp_ptr x, mp_srcptr a, mp_prec_t p) +{ + /* the following T1 and T2 are bipartite tables giving initial + approximation for the inverse square root, with 13-bit input split in + 5+4+4, and 11-bit output. More precisely, if 2048 <= i < 8192, + with i = a*2^8 + b*2^4 + c, we use for approximation of + 2048/sqrt(i/2048) the value x = T1[16*(a-8)+b] + T2[16*(a-8)+c]. + The largest error is obtained for i = 3347, where x = 1601, + and 2048/sqrt(i/2048) = 1602.0168227... + */ + static mp_limb_t T1[384] = { +2040, 2032, 2024, 2016, 2009, 2001, 1994, 1986, 1979, 1972, /* 10 */ +1965, 1958, 1951, 1944, 1937, 1930, 1924, 1917, 1911, 1904, /* 20 */ +1898, 1891, 1885, 1879, 1873, 1867, 1861, 1855, 1849, 1843, /* 30 */ +1837, 1831, 1826, 1820, 1814, 1809, 1803, 1798, 1792, 1787, /* 40 */ +1782, 1777, 1771, 1766, 1761, 1756, 1751, 1746, 1741, 1736, /* 50 */ +1731, 1727, 1722, 1717, 1712, 1708, 1703, 1698, 1694, 1689, /* 60 */ +1685, 1680, 1676, 1672, 1667, 1663, 1659, 1655, 1650, 1646, /* 70 */ +1642, 1638, 1634, 1630, 1626, 1622, 1618, 1614, 1610, 1606, /* 80 */ +1602, 1598, 1595, 1591, 1587, 1583, 1580, 1576, 1572, 1569, /* 90 */ +1565, 1562, 1558, 1555, 1551, 1548, 1544, 1541, 1537, 1534, /* 100 */ +1531, 1527, 1524, 1521, 1517, 1514, 1511, 1508, 1505, 1501, /* 110 */ +1498, 1495, 1492, 1489, 1486, 1483, 1480, 1477, 1474, 1471, /* 120 */ +1468, 1465, 1462, 1459, 1456, 1453, 1450, 1448, 1445, 1442, /* 130 */ +1439, 1436, 1434, 1431, 1428, 1426, 1423, 1420, 1418, 1415, /* 140 */ +1412, 1410, 1407, 1404, 1402, 1399, 1397, 1394, 1392, 1389, /* 150 */ +1387, 1384, 1382, 1379, 1377, 1374, 1372, 1370, 1367, 1365, /* 160 */ +1362, 1360, 1358, 1355, 1353, 1351, 1349, 1346, 1344, 1342, /* 170 */ +1339, 1337, 1335, 1333, 1331, 1328, 1326, 1324, 1322, 1320, /* 180 */ +1318, 1315, 1313, 1311, 1309, 1307, 1305, 1303, 1301, 1299, /* 190 */ +1297, 1295, 1293, 1291, 1289, 1287, 1285, 1283, 1281, 1279, /* 200 */ +1277, 1275, 1273, 1271, 1269, 1267, 1265, 1264, 1262, 1260, /* 210 */ +1258, 1256, 1254, 1252, 1251, 1249, 1247, 1245, 1243, 1242, /* 220 */ +1240, 1238, 1236, 1234, 1233, 1231, 1229, 1228, 1226, 1224, /* 230 */ +1222, 1221, 1219, 1217, 1216, 1214, 1212, 1211, 1209, 1207, /* 240 */ +1206, 1204, 1202, 1201, 1199, 1198, 1196, 1194, 1193, 1191, /* 250 */ +1190, 1188, 1187, 1185, 1183, 1182, 1180, 1179, 1177, 1176, /* 260 */ +1174, 1173, 1171, 1170, 1168, 1167, 1165, 1164, 1162, 1161, /* 270 */ +1159, 1158, 1157, 1155, 1154, 1152, 1151, 1149, 1148, 1147, /* 280 */ +1145, 1144, 1142, 1141, 1140, 1138, 1137, 1136, 1134, 1133, /* 290 */ +1131, 1130, 1129, 1127, 1126, 1125, 1123, 1122, 1121, 1119, /* 300 */ +1118, 1117, 1116, 1114, 1113, 1112, 1110, 1109, 1108, 1107, /* 310 */ +1105, 1104, 1103, 1102, 1100, 1099, 1098, 1097, 1095, 1094, /* 320 */ +1093, 1092, 1091, 1089, 1088, 1087, 1086, 1085, 1083, 1082, /* 330 */ +1081, 1080, 1079, 1077, 1076, 1075, 1074, 1073, 1072, 1071, /* 340 */ +1069, 1068, 1067, 1066, 1065, 1064, 1063, 1062, 1060, 1059, /* 350 */ +1058, 1057, 1056, 1055, 1054, 1053, 1052, 1051, 1049, 1048, /* 360 */ +1047, 1046, 1045, 1044, 1043, 1042, 1041, 1040, 1039, 1038, /* 370 */ +1037, 1036, 1035, 1034, 1033, 1032, 1031, 1030, 1029, 1028, /* 380 */ +1027, 1026, 1025, 1024}; + static mp_limb_t T2[384] = { + 8, 7, 7, 6, 6, 6, 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 7, 6, 6, 5, /* 20 */ + 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 6, 6, 5, 5, 5, 4, 4, 4, /* 40 */ + 3, 3, 3, 2, 2, 2, 1, 1, 5, 5, 5, 4, 4, 4, 3, 3, 3, 3, 2, 2, /* 60 */ + 2, 1, 1, 1, 5, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 2, 2, 1, 1, 1, /* 80 */ + 4, 4, 4, 3, 3, 3, 3, 3, 2, 2, 2, 2, 1, 1, 1, 1, 4, 4, 3, 3, /* 100 */ + 3, 3, 3, 2, 2, 2, 2, 2, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 2, 2, /* 120 */ + 2, 2, 2, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 1, /* 140 */ + 1, 1, 1, 1, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, /* 160 */ + 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 3, 3, 2, 2, /* 180 */ + 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, /* 200 */ + 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, /* 220 */ + 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 240 */ + 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, /* 260 */ + 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 1, 1, 1, /* 280 */ + 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, /* 300 */ + 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 320 */ + 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, /* 340 */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, /* 360 */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* 380 */ + 0, 0, 0, 0}; + mp_size_t n = LIMB_SIZE(p); + + /* A should be normalized */ + MPFR_ASSERTN((1 <= a[n]) && (a[n] < 4)); + /* we should have enough bits in one limb */ + MPFR_ASSERTN(GMP_NUMB_BITS >= 11); + /* {a, n+1} and {x, n} should not overlap */ + MPFR_ASSERTN((a + n + 1 <= x) || (x + n <= a)); + MPFR_ASSERTN(p >= 11); + + if (p == 11) + { + unsigned long i, ab, ac; + mp_limb_t t; + + /* take the 13 (or 12) most significant bits of A */ + i = (a[1] << 11) | (a[0] >> (GMP_NUMB_BITS - 11)); + switch (i) + { + case 2048: + t = 2047; /* otherwise we get t=2048, which gives X=1 */ + break; + case 2387: /* table value is 1896, error <= 1.0061 ulp */ + t = 1897; + break; + case 3347: /* table value is 1601, error <= 1.0169 ulp */ + t = 1602; + break; + default: + ab = i >> 4; + ac = (ab & 0x3F0) | (i & 0x0F); + t = T1[ab - 0x80] + T2[ac - 0x80]; + } + x[0] = t << (GMP_NUMB_BITS - p); + } + else /* p >= 12 */ + { + mp_prec_t h = (p < 18) ? 11 : (p >> 1) + 2; /* max(11, ceil((p+3)/2)) */ + mp_ptr r, s, t, u; + mp_size_t xn = LIMB_SIZE(h); + mp_size_t rn = LIMB_SIZE(2 * h); + /* the remaining value of t has at most h+3 bits, but since they are + in 2h bits aligned left, the number of limbs is the following */ + mp_size_t th = (h - 3) >> MPFR_LOG2_BITS_PER_MP_LIMB; + mp_size_t ln = n - xn; + mp_size_t sn, tn; + mp_size_t un = xn + rn - th; + int neg; + mp_limb_t cy, cu; + mp_prec_t pl = n * GMP_NUMB_BITS - p; /* low bits from x */ + MPFR_TMP_DECL(marker); + + mpfr_mpn_rec_sqrt (x + ln, a + ln, h); + /* the most h significant bits of X are set, X has ceil(h/GMP_NUMB_BITS) + limbs, the low (-h) % GMP_NUMB_BITS bits are zero */ + // gmp_printf ("xh=%Nd\n", x+ln, xn); + + MPFR_TMP_MARK (marker); + /* first step: square X in r, result is exact */ + u = r = (mp_ptr) MPFR_TMP_ALLOC (un * sizeof (mp_limb_t)); + if (2 * h <= GMP_NUMB_BITS) /* xn=rn=1 */ + { + mp_limb_t xx = x[ln] >> (GMP_NUMB_BITS >> 1); + r ++; + r[0] = xx * xx; + } + else if (xn == 1) /* xn=1, rn=2 */ + umul_ppmm(r[1], r[0], x[ln], x[ln]); + else + { + mpn_mul_n (r, x + ln, x + ln, xn); + if (rn < 2 * xn) + r ++; + } + /* now the 2h most significant bits of {r, rn} contains X^2, r has rn + limbs, and the low (-2h) % GMP_NUMB_BITS bits are zero */ + // gmp_printf ("r=%Nd\n", r, rn); + + /* Second step: s <- A * (r^2), and truncate the low p bits, + i.e., at weight 2^{-2h} (s is aligned to the low significant bits) + */ + sn = n + rn; + s = (mp_ptr) MPFR_TMP_ALLOC ((sn + 1) * sizeof (mp_limb_t)); + if (2 * h + p + 2 <= GMP_NUMB_BITS) + { + /* we should have n=rn=1, thus sn=2 */ + mp_limb_t ah = (a[1] << p) | (a[0] >> (GMP_NUMB_BITS - p)); + s[1] = (r[0] >> (GMP_NUMB_BITS - 2 * h)) * ah; + /* s[1] should have at most 2h+p+2 bits */ + s[2] = s[1] >> (2 * h + p); + s[1] = s[1] << (GMP_NUMB_BITS - (2 * h + p)); + } + else if (rn == 1) /* rn=1 implies n=1, since rn*GMP_NUMB_BITS >= 2h, + and 2h >= p+3 */ + { + /* necessarily p <= GMP_NUMB_BITS-3: we can ignore the two low + bits from A */ + umul_ppmm (s[1], s[0], r[0], a[1] << (GMP_NUMB_BITS - 2) + | a[0] >> 2); + s[2] = mpn_lshift (s, s, 2, 2); + } + else + { + /* we have p <= n * GMP_NUMB_BITS + 2h <= rn * GMP_NUMB_BITS with p+3 <= 2h <= p+4 + thus n <= rn <= n + 1 */ + MPFR_ASSERTN(rn <= n + 1); + mpn_mul (s, a, n + 1, r, rn); + /* s should be near B^sn, thus either s[sn] is 1 and s[sn-1] is + near 0, or t[sn]=0 and s[sn-1] is near 111...111. + We ignore the bits of s after the first 2h ones. + */ + } + // gmp_printf ("s=%Nd\n", s, n+1+rn); + /* We ignore the bits of s after the first 2h ones. */ + t = s + n; /* pointer to low limb of t */ + tn = rn; + /* t has in theory (n+rn) * GMP_NUMB_BITS - floor(p / GMP_NUMB_BITS) bits + but the upper h-3 bits of 1-t should be zero */ + + /* compute t <- 1 - t, which is B^tn - {t, tn+1} */ + neg = t[tn] != 0; + if (neg == 0) /* Ax^2 < 1 */ + { + MPFR_COM_N (t, t, tn); + mpn_add_1 (t, t, tn, 1); /* no carry here */ + } + /* otherwise we do nothing: we already have t-1 in {t, tn} */ + + tn -= th; /* we know at least th = floor((h-3)/GMP_NUMB_LIMBS) of the + high limbs of {t, tn} are zero */ + + /* tn = rn - th, where rn * GMP_NUMB_BITS >= 2*h and + th * GMP_NUMB_BITS <= h-3, thus tn > 0 */ + MPFR_ASSERTN(tn > 0); + + /* Since |x-a^{-1/2}| <= 1.5*2^{-h}, we have + x = a^{-1/2} + e with |e| <= 1.5*2^{-h}, thus + x^2 = 1/a + f with |f| <= 3*2^{-h}*a^{-1/2}+2.25*2^{-2h} + ax^2 = 1 + g with |g| <= 3*2^{-h}*a^{1/2} + 2.25*2^{-2h}*a + <= 6*2^{-h} + 9*2^{-2h} since a < 4. + Since we truncated s at 2^{-2h}, we have: + |s - 1| <= 6*2^{-h} + 10*2^{-2h} < 2^{3-h} since h >= 3. + Thus t should have at most h+3 bits, instead of 2h in theory. + */ + + /* u <- x * t + {t, tn} contains at least h+3 bits, and {x, xn} contains h bits, + thus tn >= xn */ + MPFR_ASSERTN(tn >= xn); + if (tn == 1) /* necessarily xn=1 */ + umul_ppmm (u[1], u[0], t[0], x[ln]); + else + mpn_mul (u, t, tn, x + ln, xn); + + /* we have already discarded th high limbs of t, thus we only have to + consider the upper n - th limbs of u */ + sn = n - th; /* sn cannot be zero, since for n=1 we have th=0, and + for n>=2 we have p <= n*GMP_NUMB_BITS, + thus h=ceil((p+3)/2) <= (p+4)/2 and + th*GMP_NUMB_BITS <= (h-3) <= p/2 <= n/2*GMP_NUMB_BITS */ + MPFR_ASSERTN(sn > 0); + un -= sn; /* xn + rn - n */ + u += un; + + /* u will be shifted to the right, and after that we want that the low + pl bits are zero, thus we want now that the pl+1 bits are zero, + thus we round u to nearest at bit pl+1 of u[0] */ + cu = mpn_add_1 (u, u, sn, u[0] & (ONE << pl)); + /* mask bits 0..pl of u[0] */ + if (pl + 1 == GMP_NUMB_BITS) + u[0] = 0; + else + u[0] &= ~((ONE << (pl + 1)) - ONE); + + /* We already have filled {x + ln, xn = n - ln}, and we want to add or + subtract cu*B^sn + {u, sn} divided by two at position x. + sn = n - th, where th contains <= h-3 bits + ln = n - xn, where xn contains >= h bits + thus sn > ln. + Warning: ln might be zero. + */ + MPFR_ASSERTN(sn > ln); + /* we can have sn = ln + 2, for example with GMP_NUMB_BITS=32 and + p=62, then h=33, n=2, th=0, xn=2, thus sn=2 and ln=0. */ + MPFR_ASSERTN(sn == ln + 1 || sn == ln + 2); + /* the high sn-ln limbs of u will overlap the low part of {x+ln,xn} */ + if (ln > 0) + { + mpn_rshift (x, u, ln, 1); /* no carry out */ + x[ln - 1] |= mpn_rshift (u, u + ln, sn - ln, 1); + } + else /* ln = 0 */ + mpn_rshift (u, u, sn, 1); /* no carry out */ + /* incorporate possible carry out from rounding of u */ + u[sn - ln - 1] |= cu << (GMP_NUMB_BITS - 1); + /* we need to add or subtract the overlapping part {u, sn - ln} */ + if (neg == 0) + cy = mpn_add (x + ln, x + ln, xn, u, sn - ln); + else /* negative case */ + { + cy = mpn_sub (x + ln, x + ln, xn, u, sn - ln); + cy = -mpn_sub_1 (x + sn, x + sn, th, cy); /* n - sn = th */ + if (ln > 0) + { + MPFR_COM_N (x, x, ln); + cy = mpn_add_1 (x, x, n, ONE) - cy; + /* we also need to subtract 1 at x[ln] */ + cy -= mpn_sub_1 (x + ln, x + ln, xn, ONE); /* n - ln = xn */ + } + } + +#if 0 + /* if cu is non-zero (necessarily 1), add/subtract cu/2 at x[sn] */ + if (MPFR_UNLIKELY(cu != 0)) + { + abort(); + if (neg == 0) + cy += mpn_add_1 (x + sn - 1, x + sn - 1, th+1, MPFR_LIMB_HIGHBIT); + else + cy -= mpn_sub_1 (x + sn - 1, x + sn - 1, th+1, MPFR_LIMB_HIGHBIT); + } +#endif + + /* cy can be 1 when A=1, i.e., {a, n} = B^n. Setting X to + 1-ulp(1) satisties the error bound of 1 ulp. */ + if (MPFR_UNLIKELY(cy != 0)) + { + cy -= mpn_sub_1 (x, x, n, ONE << pl); + MPFR_ASSERTN(cy == 0); + } + + MPFR_TMP_FREE (marker); + } +} + +#ifdef MAIN +#include "cputime.h" + +#define N 1000000 + +/* test for mpfr_mpn_rec_sqrt */ +int +main (int argc, char *argv[]) +{ + mp_ptr X, A; + int i; + mp_prec_t p = atoi (argv[1]); + mp_size_t n; + mp_prec_t pl; + int st; + + if (argc != 2) + { + fprintf (stderr, "Usage: %s precision\n", argv[0]); + exit (1); + } + + n = 1 + (p - 1) / GMP_NUMB_BITS; + pl = n * GMP_NUMB_BITS - p; + + X = malloc (n * sizeof (mp_limb_t)); + A = malloc ((n + 1) * sizeof (mp_limb_t)); + +#ifdef TIMING + mpn_random2 (A, n); + A[n] = 1 + (lrand48 () % 3); + A[0] &= ~((ONE << pl) - ONE); + st = cputime (); + for (i = 0; i < N; i++) + mpfr_mpn_rec_sqrt (X, A, p); + st = cputime () - st; + printf ("time per call: %e ms\n", (double) st / (double) N); +#else + for (i = 0; i < N; i++) + { + mpn_random2 (A, n); + A[n] = 1 + (lrand48 () % 3); + A[0] &= ~((ONE << pl) - ONE); + // gmp_printf ("\nA=%Nd\n", A, n + 1); + mpfr_mpn_rec_sqrt (X, A, p); + // gmp_printf ("A=%Nd X=%Nd\n", A, n + 1, X, n); + check (X, A, p); + } +#endif + return 0; +} +#endif |