summaryrefslogtreecommitdiff
path: root/src/sqrt.c
diff options
context:
space:
mode:
authorzimmerma <zimmerma@280ebfd0-de03-0410-8827-d642c229c3f4>2017-01-12 10:16:40 +0000
committerzimmerma <zimmerma@280ebfd0-de03-0410-8827-d642c229c3f4>2017-01-12 10:16:40 +0000
commit44f74670c344cab411016f898a8f89b18773646a (patch)
treed6a605f35ea217fd9ec3b798bee87be881483d7b /src/sqrt.c
parent7a25dc395bac51db512ad78d40e477b1ee2ac557 (diff)
downloadmpfr-44f74670c344cab411016f898a8f89b18773646a.tar.gz
[src/sqrt.c] improved mpfr_sqrt2_approx()
git-svn-id: svn://scm.gforge.inria.fr/svn/mpfr/trunk@11187 280ebfd0-de03-0410-8827-d642c229c3f4
Diffstat (limited to 'src/sqrt.c')
-rw-r--r--src/sqrt.c56
1 files changed, 34 insertions, 22 deletions
diff --git a/src/sqrt.c b/src/sqrt.c
index 60fe7420c..166d79a24 100644
--- a/src/sqrt.c
+++ b/src/sqrt.c
@@ -85,19 +85,33 @@ mpfr_sqrt1_approx (mp_limb_t n)
} \
while (0)
-/* Put in rp[1]*2^64+rp[0] an approximation of sqrt(2^128*n),
+/* Put in rp[1]*2^64+rp[0] an approximation of floor(sqrt(2^128*n)),
with 2^126 <= n := np[1]*2^64 + np[0] < 2^128.
- The error on {rp, 2} is less than 35 ulps, with {rp, 2} <= sqrt(2^128*n).
+ The error on {rp, 2} is at most 27 ulps, with {rp, 2} <= floor(sqrt(2^128*n)).
*/
static void
mpfr_sqrt2_approx (mpfr_limb_ptr rp, mpfr_limb_srcptr np)
{
- int i = np[1] >> 56;
+ int i;
mp_limb_t x, r1, r0, h, l, t;
const mp_limb_t *u;
const mp_limb_t magic = 0xda9fbe76c8b43800; /* ceil(0.854*2^64) */
- x = np[1] << 8 | (np[0] >> 56);
+ /* We round x upward to ensure {np,2} >= r1^2 below. Since we consider also the upper 8
+ bits of np[0] into x, we have to deal separately with the case np[1] = 2^64-1 and the
+ upper 8 bits of np[0] are all 1. */
+ l = np[0] + MPFR_LIMB_MASK(56);
+ h = np[1] + (l < np[0]);
+ i = h >> 56;
+ x = (h << 8) | (l >> 56);
+ if (MPFR_UNLIKELY(i == 0))
+ /* this happens when np[1] = 2^64-1, the upper 8 bits of np[0] are all 1, and the lower
+ 56 bits of np[0] are not all zero */
+ {
+ MPFR_ASSERTD(x == MPFR_LIMB_ZERO);
+ goto compute_r1;
+ }
+ MPFR_ASSERTD(64 <= i && i < 256);
u = V[i - 64];
umul_ppmm (h, l, u[8], x);
/* the truncation error on h is at most 1 here */
@@ -116,12 +130,14 @@ mpfr_sqrt2_approx (mpfr_limb_ptr rp, mpfr_limb_srcptr np)
truncation error on h+l/2^64 is <= 6/2^6 */
sub_ddmmss (h, l, u[0], u[1], h, l);
/* Since the mathematical error is < 0.412e-19*2^64, the total error on
- h + l/2^64 is less than 0.854; magic = ceil(0.854*2^64). */
+ h + l/2^64 is less than 0.854; magic = ceil(0.854*2^64). We subtract it,
+ while keeping x >= 0. */
x = h - (l < magic && h != 0);
- /* now 2^64 + x is an approximation of 2^96/sqrt(np[1]),
- with 2^64 + x < 2^96/sqrt(np[1]) */
-
+ /* now 2^64 + x is an approximation of 2^96/sqrt(np[1]+1),
+ with 2^64 + x <= 2^96/sqrt(np[1]+1) */
+
+ compute_r1:
umul_ppmm (r1, l, np[1], x);
r1 += np[1];
@@ -132,11 +148,13 @@ mpfr_sqrt2_approx (mpfr_limb_ptr rp, mpfr_limb_srcptr np)
r1 = MPFR_LIMB_HIGHBIT;
umul_ppmm (h, l, r1, r1);
+ MPFR_ASSERTD(h < np[1] || (h == np[1] && l <= np[0]));
sub_ddmmss (h, l, np[1], np[0], h, l);
+ /* divide by 2 */
l = (h << 63) | (l >> 1);
h = h >> 1;
- /* now add (2^64+x) * (h*2^64+l) / 2^64 to r0 */
+ /* now add (2^64+x) * (h*2^64+l) / 2^64 to [r1*2^64, 0] */
umul_ppmm (r0, t, x, l); /* x * l */
r0 += l;
@@ -147,16 +165,10 @@ mpfr_sqrt2_approx (mpfr_limb_ptr rp, mpfr_limb_srcptr np)
r1 += (r0 < x); /* add x */
}
- if (r1 & MPFR_LIMB_HIGHBIT)
- {
- rp[0] = r0;
- rp[1] = r1;
- }
- else /* overflow occurred in r1 */
- {
- rp[0] = ~MPFR_LIMB_ZERO;
- rp[1] = ~MPFR_LIMB_ZERO;
- }
+ MPFR_ASSERTD(r1 & MPFR_LIMB_HIGHBIT);
+
+ rp[0] = r0;
+ rp[1] = r1;
}
/* Special code for prec(r), prec(u) < GMP_NUMB_BITS. We cannot have
@@ -312,10 +324,10 @@ mpfr_sqrt2 (mpfr_ptr r, mpfr_srcptr u, mpfr_rnd_t rnd_mode)
mask = MPFR_LIMB_MASK(sh);
mpfr_sqrt2_approx (rp, np + 2);
- /* the error is less than 35 ulps on rp[0], with {rp, 2} smaller or equal
+ /* the error is at most 27 ulps on rp[0], with {rp, 2} smaller or equal
to the exact square root, thus we can round correctly except when the
- number formed by the last sh-1 bits of rp[0] is 0, -1, -2, ..., -34. */
- if (((rp[0] + 34) & (mask >> 1)) > 34)
+ number formed by the last sh-1 bits of rp[0] is 0, -1, -2, ..., -27. */
+ if (((rp[0] + 27) & (mask >> 1)) > 27)
sb = 1;
else
{