summaryrefslogtreecommitdiff
path: root/src/sqrt.c
diff options
context:
space:
mode:
authorzimmerma <zimmerma@280ebfd0-de03-0410-8827-d642c229c3f4>2017-01-30 17:52:19 +0000
committerzimmerma <zimmerma@280ebfd0-de03-0410-8827-d642c229c3f4>2017-01-30 17:52:19 +0000
commit969313276b25aad478706270092e2fd56a6b1118 (patch)
tree8d67e1e4281d3064eb9c208258d4884eee9fa37e /src/sqrt.c
parent3ee9a48e4fcae769e6843c86acb6af08ae8926a2 (diff)
downloadmpfr-969313276b25aad478706270092e2fd56a6b1118.tar.gz
[src/sqrt.c] improved slow branch of mpfr_sqrt2
git-svn-id: svn://scm.gforge.inria.fr/svn/mpfr/trunk@11248 280ebfd0-de03-0410-8827-d642c229c3f4
Diffstat (limited to 'src/sqrt.c')
-rw-r--r--src/sqrt.c58
1 files changed, 35 insertions, 23 deletions
diff --git a/src/sqrt.c b/src/sqrt.c
index 90e5b76db..8e4af6657 100644
--- a/src/sqrt.c
+++ b/src/sqrt.c
@@ -126,7 +126,7 @@ mpfr_sqrt1 (mpfr_ptr r, mpfr_srcptr u, mpfr_rnd_t rnd_mode)
sb -= r0;
}
/* now we should have rb*2^64 + sb <= 2*r0 */
- MPFR_ASSERTN(rb == 0 || (rb == 1 && sb <= 2 * r0));
+ MPFR_ASSERTD(rb == 0 || (rb == 1 && sb <= 2 * r0));
sb = rb | sb;
}
@@ -231,39 +231,51 @@ mpfr_sqrt2 (mpfr_ptr r, mpfr_srcptr u, mpfr_rnd_t rnd_mode)
mpfr_sqrt2_approx (rp, np + 2);
/* with n = np[3]*2^64+np[2], we have:
- {rp, 2} - 4 <= floor(sqrt(2^128*n)) <= {rp, 2} + 26,
- thus we can round correctly except when the number formed by the last sh-1 bits
+ {rp, 2} - 4 <= floor(sqrt(2^128*n)) <= {rp, 2} + 26, thus we can round
+ correctly except when the number formed by the last sh-1 bits
of rp[0] is in the range [-26, 4]. */
- if (((rp[0] + 26) & (mask >> 1)) > 30)
+ if (MPFR_LIKELY(((rp[0] + 26) & (mask >> 1)) > 30))
sb = 1;
else
{
- mp_limb_t tp[4], cy;
+ mp_limb_t tp[4], h, l;
np[0] = 0;
- mpn_mul_n (tp, rp, rp, 2);
- while (mpn_cmp (tp, np, 4) > 0) /* approximation is too large */
+ mpn_sqr (tp, rp, 2);
+ /* since we know s - 26 <= r <= s + 4 and 0 <= n^2 - s <= 2*s, we have
+ -8*s-16 <= n - r^2 <= 54*s - 676, thus it suffices to compute
+ n - r^2 modulo 2^192 */
+ mpn_sub_n (tp, np, tp, 3);
+ /* invariant: h:l = 2 * {rp, 2}, with upper bit implicit */
+ h = (rp[1] << 1) | (rp[0] >> (GMP_NUMB_BITS - 1));
+ l = rp[0] << 1;
+ while ((mp_limb_signed_t) tp[2] < 0) /* approximation was too large */
{
- mpn_sub_1 (rp, rp, 2, 1);
- /* subtract 2*{rp,2}+1 to {tp,4} */
- cy = mpn_submul_1 (tp, rp, 2, 2);
- mpn_sub_1 (tp + 2, tp + 2, 2, cy);
- mpn_sub_1 (tp, tp, 4, 1);
+ /* subtract 1 to {rp, 2}, thus 2 to h:l */
+ h -= (l <= MPFR_LIMB_ONE);
+ l -= 2;
+ /* add (1:h:l)+1 to {tp,3} */
+ tp[0] += l + 1;
+ tp[1] += h + (tp[0] < l);
+ /* necessarily rp[1] has its most significant bit set */
+ tp[2] += MPFR_LIMB_ONE + (tp[1] < h || (tp[1] == h && tp[0] < l));
}
- /* now {tp, 4} <= {np, 4} */
- mpn_sub_n (tp, np, tp, 4);
+ /* now tp[2] >= 0 */
/* now we want {tp, 4} <= 2 * {rp, 2}, which implies tp[2] <= 1 */
- MPFR_ASSERTD(tp[3] == 0);
- while (tp[2] > 1 ||
- (tp[2] == 1 && tp[1] > ((rp[1] << 1) | (rp[0] >> 63))) ||
- (tp[2] == 1 && tp[1] == ((rp[1] << 1) | (rp[0] >> 63)) &&
- tp[0] > (rp[0] << 1)))
+ while (tp[2] > 1 || (tp[2] == 1 && tp[1] > h) ||
+ (tp[2] == 1 && tp[1] == h && tp[0] > l))
{
- /* subtract 2*{rp,2}+1 to {tp,3} (we know tp[3] = 0) */
- tp[2] -= mpn_submul_1 (tp, rp, 2, 2);
- mpn_sub_1 (tp, tp, 3, 1);
- mpn_add_1 (rp, rp, 2, 1);
+ /* subtract (1:h:l)+1 from {tp,3} */
+ tp[2] -= MPFR_LIMB_ONE + (tp[1] < h || (tp[1] == h && tp[0] <= l));
+ tp[1] -= h + (tp[0] <= l);
+ tp[0] -= l + 1;
+ /* add 2 to h:l */
+ l += 2;
+ h += (l <= MPFR_LIMB_ONE);
}
+ /* restore {rp, 2} from h:l */
+ rp[1] = MPFR_LIMB_HIGHBIT | (h >> 1);
+ rp[0] = (h << (GMP_NUMB_BITS - 1)) | (l >> 1);
sb = tp[2] | tp[0] | tp[1];
}