[src/sqrt.c] improved mpfr_sqrt2_approx()

git-svn-id: svn://scm.gforge.inria.fr/svn/mpfr/trunk@11187 280ebfd0-de03-0410-8827-d642c229c3f4
author: zimmerma <zimmerma@280ebfd0-de03-0410-8827-d642c229c3f4> 2017-01-12 10:16:40 +0000
committer: zimmerma <zimmerma@280ebfd0-de03-0410-8827-d642c229c3f4> 2017-01-12 10:16:40 +0000
commit: 44f74670c344cab411016f898a8f89b18773646a (patch)
tree: d6a605f35ea217fd9ec3b798bee87be881483d7b /src/sqrt.c
parent: 7a25dc395bac51db512ad78d40e477b1ee2ac557 (diff)
download: mpfr-44f74670c344cab411016f898a8f89b18773646a.tar.gz
1 files changed, 34 insertions, 22 deletions
diff --git a/src/sqrt.c b/src/sqrt.c
index 60fe7420c..166d79a24 100644
--- a/src/sqrt.c
+++ b/src/sqrt.c
@@ -85,19 +85,33 @@ mpfr_sqrt1_approx (mp_limb_t n)
     }                                    \
   while (0)
 
-/* Put in rp[1]*2^64+rp[0] an approximation of sqrt(2^128*n),
+/* Put in rp[1]*2^64+rp[0] an approximation of floor(sqrt(2^128*n)),
    with 2^126 <= n := np[1]*2^64 + np[0] < 2^128.
-   The error on {rp, 2} is less than 35 ulps, with {rp, 2} <= sqrt(2^128*n).
+   The error on {rp, 2} is at most 27 ulps, with {rp, 2} <= floor(sqrt(2^128*n)).
 */
 static void
 mpfr_sqrt2_approx (mpfr_limb_ptr rp, mpfr_limb_srcptr np)
 {
-  int i = np[1] >> 56;
+  int i;
   mp_limb_t x, r1, r0, h, l, t;
   const mp_limb_t *u;
   const mp_limb_t magic = 0xda9fbe76c8b43800; /* ceil(0.854*2^64) */
 
-  x = np[1] << 8 | (np[0] >> 56);
+  /* We round x upward to ensure {np,2} >= r1^2 below. Since we consider also the upper 8
+     bits of np[0] into x, we have to deal separately with the case np[1] = 2^64-1 and the
+     upper 8 bits of np[0] are all 1. */
+  l = np[0] + MPFR_LIMB_MASK(56);
+  h = np[1] + (l < np[0]);
+  i = h >> 56;
+  x = (h << 8) | (l >> 56);
+  if (MPFR_UNLIKELY(i == 0))
+    /* this happens when np[1] = 2^64-1, the upper 8 bits of np[0] are all 1, and the lower
+       56 bits of np[0] are not all zero */
+    {
+      MPFR_ASSERTD(x == MPFR_LIMB_ZERO);
+      goto compute_r1;
+    }
+  MPFR_ASSERTD(64 <= i && i < 256);
   u = V[i - 64];
   umul_ppmm (h, l, u[8], x);
   /* the truncation error on h is at most 1 here */
@@ -116,12 +130,14 @@ mpfr_sqrt2_approx (mpfr_limb_ptr rp, mpfr_limb_srcptr np)
                                          truncation error on h+l/2^64 is <= 6/2^6 */
   sub_ddmmss (h, l, u[0], u[1], h, l);
   /* Since the mathematical error is < 0.412e-19*2^64, the total error on
-     h + l/2^64 is less than 0.854; magic = ceil(0.854*2^64). */
+     h + l/2^64 is less than 0.854; magic = ceil(0.854*2^64). We subtract it,
+     while keeping x >= 0. */
   x = h - (l < magic && h != 0);
 
-  /* now 2^64 + x is an approximation of 2^96/sqrt(np[1]),
-     with 2^64 + x < 2^96/sqrt(np[1]) */
-
+  /* now 2^64 + x is an approximation of 2^96/sqrt(np[1]+1),
+     with 2^64 + x <= 2^96/sqrt(np[1]+1) */
+  
+ compute_r1:
   umul_ppmm (r1, l, np[1], x);
   r1 += np[1];
 
@@ -132,11 +148,13 @@ mpfr_sqrt2_approx (mpfr_limb_ptr rp, mpfr_limb_srcptr np)
     r1 = MPFR_LIMB_HIGHBIT;
 
   umul_ppmm (h, l, r1, r1);
+  MPFR_ASSERTD(h < np[1] || (h == np[1] && l <= np[0]));
   sub_ddmmss (h, l, np[1], np[0], h, l);
+  /* divide by 2 */
   l = (h << 63) | (l >> 1);
   h = h >> 1;
 
-  /* now add (2^64+x) * (h*2^64+l) / 2^64 to r0 */
+  /* now add (2^64+x) * (h*2^64+l) / 2^64 to [r1*2^64, 0] */
 
   umul_ppmm (r0, t, x, l); /* x * l */
   r0 += l;
@@ -147,16 +165,10 @@ mpfr_sqrt2_approx (mpfr_limb_ptr rp, mpfr_limb_srcptr np)
       r1 += (r0 < x); /* add x */
     }
 
-  if (r1 & MPFR_LIMB_HIGHBIT)
-    {
-      rp[0] = r0;
-      rp[1] = r1;
-    }
-  else /* overflow occurred in r1 */
-    {
-      rp[0] = ~MPFR_LIMB_ZERO;
-      rp[1] = ~MPFR_LIMB_ZERO;
-    }
+  MPFR_ASSERTD(r1 & MPFR_LIMB_HIGHBIT);
+
+  rp[0] = r0;
+  rp[1] = r1;
 }
 
 /* Special code for prec(r), prec(u) < GMP_NUMB_BITS. We cannot have
@@ -312,10 +324,10 @@ mpfr_sqrt2 (mpfr_ptr r, mpfr_srcptr u, mpfr_rnd_t rnd_mode)
   mask = MPFR_LIMB_MASK(sh);
 
   mpfr_sqrt2_approx (rp, np + 2);
-  /* the error is less than 35 ulps on rp[0], with {rp, 2} smaller or equal
+  /* the error is at most 27 ulps on rp[0], with {rp, 2} smaller or equal
      to the exact square root, thus we can round correctly except when the
-     number formed by the last sh-1 bits of rp[0] is 0, -1, -2, ..., -34. */
-  if (((rp[0] + 34) & (mask >> 1)) > 34)
+     number formed by the last sh-1 bits of rp[0] is 0, -1, -2, ..., -27. */
+  if (((rp[0] + 27) & (mask >> 1)) > 27)
     sb = 1;
   else
     {
author	zimmerma <zimmerma@280ebfd0-de03-0410-8827-d642c229c3f4>	2017-01-12 10:16:40 +0000
committer	zimmerma <zimmerma@280ebfd0-de03-0410-8827-d642c229c3f4>	2017-01-12 10:16:40 +0000
commit	44f74670c344cab411016f898a8f89b18773646a (patch)
tree	d6a605f35ea217fd9ec3b798bee87be881483d7b /src/sqrt.c
parent	7a25dc395bac51db512ad78d40e477b1ee2ac557 (diff)
download	mpfr-44f74670c344cab411016f898a8f89b18773646a.tar.gz