[src/sqrt.c] improved slow branch of mpfr_sqrt2

git-svn-id: svn://scm.gforge.inria.fr/svn/mpfr/trunk@11248 280ebfd0-de03-0410-8827-d642c229c3f4
author: zimmerma <zimmerma@280ebfd0-de03-0410-8827-d642c229c3f4> 2017-01-30 17:52:19 +0000
committer: zimmerma <zimmerma@280ebfd0-de03-0410-8827-d642c229c3f4> 2017-01-30 17:52:19 +0000
commit: 969313276b25aad478706270092e2fd56a6b1118 (patch)
tree: 8d67e1e4281d3064eb9c208258d4884eee9fa37e /src/sqrt.c
parent: 3ee9a48e4fcae769e6843c86acb6af08ae8926a2 (diff)
download: mpfr-969313276b25aad478706270092e2fd56a6b1118.tar.gz
1 files changed, 35 insertions, 23 deletions
diff --git a/src/sqrt.c b/src/sqrt.c
index 90e5b76db..8e4af6657 100644
--- a/src/sqrt.c
+++ b/src/sqrt.c
@@ -126,7 +126,7 @@ mpfr_sqrt1 (mpfr_ptr r, mpfr_srcptr u, mpfr_rnd_t rnd_mode)
           sb -= r0;
         }
       /* now we should have rb*2^64 + sb <= 2*r0 */
-      MPFR_ASSERTN(rb == 0 || (rb == 1 && sb <= 2 * r0));
+      MPFR_ASSERTD(rb == 0 || (rb == 1 && sb <= 2 * r0));
       sb = rb | sb;
     }
 
@@ -231,39 +231,51 @@ mpfr_sqrt2 (mpfr_ptr r, mpfr_srcptr u, mpfr_rnd_t rnd_mode)
 
   mpfr_sqrt2_approx (rp, np + 2);
   /* with n = np[3]*2^64+np[2], we have:
-     {rp, 2} - 4 <= floor(sqrt(2^128*n)) <= {rp, 2} + 26,
-     thus we can round correctly except when the number formed by the last sh-1 bits
+     {rp, 2} - 4 <= floor(sqrt(2^128*n)) <= {rp, 2} + 26, thus we can round
+     correctly except when the number formed by the last sh-1 bits
      of rp[0] is in the range [-26, 4]. */
-  if (((rp[0] + 26) & (mask >> 1)) > 30)
+  if (MPFR_LIKELY(((rp[0] + 26) & (mask >> 1)) > 30))
     sb = 1;
   else
     {
-      mp_limb_t tp[4], cy;
+      mp_limb_t tp[4], h, l;
 
       np[0] = 0;
-      mpn_mul_n (tp, rp, rp, 2);
-      while (mpn_cmp (tp, np, 4) > 0) /* approximation is too large */
+      mpn_sqr (tp, rp, 2);
+      /* since we know s - 26 <= r <= s + 4 and 0 <= n^2 - s <= 2*s, we have
+         -8*s-16 <= n - r^2 <= 54*s - 676, thus it suffices to compute
+         n - r^2 modulo 2^192 */
+      mpn_sub_n (tp, np, tp, 3);
+      /* invariant: h:l = 2 * {rp, 2}, with upper bit implicit */
+      h = (rp[1] << 1) | (rp[0] >> (GMP_NUMB_BITS - 1));
+      l = rp[0] << 1;
+      while ((mp_limb_signed_t) tp[2] < 0) /* approximation was too large */
         {
-          mpn_sub_1 (rp, rp, 2, 1);
-          /* subtract 2*{rp,2}+1 to {tp,4} */
-          cy = mpn_submul_1 (tp, rp, 2, 2);
-          mpn_sub_1 (tp + 2, tp + 2, 2, cy);
-          mpn_sub_1 (tp, tp, 4, 1);
+          /* subtract 1 to {rp, 2}, thus 2 to h:l */
+          h -= (l <= MPFR_LIMB_ONE);
+          l -= 2;
+          /* add (1:h:l)+1 to {tp,3} */
+          tp[0] += l + 1;
+          tp[1] += h + (tp[0] < l);
+          /* necessarily rp[1] has its most significant bit set */
+          tp[2] += MPFR_LIMB_ONE + (tp[1] < h || (tp[1] == h && tp[0] < l));
         }
-      /* now {tp, 4} <= {np, 4} */
-      mpn_sub_n (tp, np, tp, 4);
+      /* now tp[2] >= 0 */
       /* now we want {tp, 4} <= 2 * {rp, 2}, which implies tp[2] <= 1 */
-      MPFR_ASSERTD(tp[3] == 0);
-      while (tp[2] > 1 ||
-             (tp[2] == 1 && tp[1] > ((rp[1] << 1) | (rp[0] >> 63))) ||
-             (tp[2] == 1 && tp[1] == ((rp[1] << 1) | (rp[0] >> 63)) &&
-              tp[0] > (rp[0] << 1)))
+      while (tp[2] > 1 || (tp[2] == 1 && tp[1] > h) ||
+             (tp[2] == 1 && tp[1] == h && tp[0] > l))
         {
-          /* subtract 2*{rp,2}+1 to {tp,3} (we know tp[3] = 0) */
-          tp[2] -= mpn_submul_1 (tp, rp, 2, 2);
-          mpn_sub_1 (tp, tp, 3, 1);
-          mpn_add_1 (rp, rp, 2, 1);
+          /* subtract (1:h:l)+1 from {tp,3} */
+          tp[2] -= MPFR_LIMB_ONE + (tp[1] < h || (tp[1] == h && tp[0] <= l));
+          tp[1] -= h + (tp[0] <= l);
+          tp[0] -= l + 1;
+          /* add 2 to  h:l */
+          l += 2;
+          h += (l <= MPFR_LIMB_ONE);
         }
+      /* restore {rp, 2} from h:l */
+      rp[1] = MPFR_LIMB_HIGHBIT | (h >> 1);
+      rp[0] = (h << (GMP_NUMB_BITS - 1)) | (l >> 1);
       sb = tp[2] | tp[0] | tp[1];
     }
author	zimmerma <zimmerma@280ebfd0-de03-0410-8827-d642c229c3f4>	2017-01-30 17:52:19 +0000
committer	zimmerma <zimmerma@280ebfd0-de03-0410-8827-d642c229c3f4>	2017-01-30 17:52:19 +0000
commit	969313276b25aad478706270092e2fd56a6b1118 (patch)
tree	8d67e1e4281d3064eb9c208258d4884eee9fa37e /src/sqrt.c
parent	3ee9a48e4fcae769e6843c86acb6af08ae8926a2 (diff)
download	mpfr-969313276b25aad478706270092e2fd56a6b1118.tar.gz