From 34b1d7160590c5c6fb674eefc959d0a88e723fa7 Mon Sep 17 00:00:00 2001 From: zimmerma Date: Thu, 15 Dec 2016 08:28:05 +0000 Subject: [sqrt.c] simplify code for GMP_NUMB_BITS = 64 git-svn-id: svn://scm.gforge.inria.fr/svn/mpfr/trunk@11044 280ebfd0-de03-0410-8827-d642c229c3f4 --- src/sqrt.c | 108 +++++++++++++++++++------------------------------------------ 1 file changed, 33 insertions(+), 75 deletions(-) (limited to 'src/sqrt.c') diff --git a/src/sqrt.c b/src/sqrt.c index 1e9cd88fb..ad93aa155 100644 --- a/src/sqrt.c +++ b/src/sqrt.c @@ -253,6 +253,7 @@ mpn_sqrtrem2_approx (mp_limb_t n) } #endif /* GMP_NUMB_BITS == 64 */ +#if GMP_NUMB_BITS == 32 /* Given as input np[0] and np[1], with B/4 <= np[1] (where B = 2^GMP_NUMB_BITS), mpn_sqrtrem2 returns a value x, 0 <= x <= 1, and stores values s in sp[0] and r in rp[0] such that: @@ -408,6 +409,7 @@ mpn_sqrtrem2 (mpfr_limb_ptr sp, mpfr_limb_ptr rp, mpfr_limb_srcptr np) rp[0] = t; return x; } +#endif /* Special code for prec(r), prec(u) < GMP_NUMB_BITS. We cannot have prec(u) = GMP_NUMB_BITS here, since when the exponent of u is odd, @@ -417,7 +419,7 @@ mpfr_sqrt1 (mpfr_ptr r, mpfr_srcptr u, mpfr_rnd_t rnd_mode) { mpfr_prec_t p = MPFR_GET_PREC(r); mpfr_prec_t exp_u = MPFR_EXP(u), exp_r, sh = GMP_NUMB_BITS - p; - mp_limb_t u0, r0, rb, sb, mask = MPFR_LIMB_MASK(sh), sp[2]; + mp_limb_t u0, r0, rb, sb, mask = MPFR_LIMB_MASK(sh); mpfr_limb_ptr rp = MPFR_MANT(r); /* first make the exponent even */ @@ -436,16 +438,38 @@ mpfr_sqrt1 (mpfr_ptr r, mpfr_srcptr u, mpfr_rnd_t rnd_mode) sb = 1; /* when we can round correctly with the approximation, the sticky bit is non-zero */ - /* since the error is at most 1 ulp, we can round correctly except when the last - sh-1 bits of r0+sb are <= sb (this includes the case where the last sh-1 bits of - r0 are 000...000 since we might have an exact result or not) */ - if (MPFR_UNLIKELY(((r0 + 1) & (mask >> 1)) <= 1)) -#endif + /* Since the exact square root is in [r0 - 0.5406, r0 + 0.5406], we can round + correctly except when the last sh-1 bits of r0 are 000...000. */ + if (MPFR_UNLIKELY((r0 & (mask >> 1)) == 0)) { - sp[1] = u0; - sp[0] = 0; - sb |= mpn_sqrtrem2 (&r0, &sb, sp); + umul_ppmm (rb, sb, r0, r0); + /* for the exact square root, we should have 0 <= (u0-rb)*2^64 - sb <= 2*r0 */ + if (rb > u0 || (rb == u0 && sb > 0)) /* r0 is too large */ + { + r0 --; + umul_ppmm (rb, sb, r0, r0); + } + /* if u0 <= rb + 1, then (u0-rb)*2^64 - sb <= 2^64 <= 2*r0 + if u0 >= rb + 3, then (u0-rb)*2^64 - sb > 2*2*64 > 2*r0 */ + else if (u0 > rb + 2 || (u0 == rb + 2 && -sb > 2 * r0)) + { + r0 ++; + umul_ppmm (rb, sb, r0, r0); + } + sub_ddmmss (rb, sb, u0, 0, rb, sb); + /* now we should have rb*2^64 + sb <= 2*r0 */ + MPFR_ASSERTN(rb == 0 || (rb == 1 && sb <= 2 * r0)); + sb = rb | sb; } +#else /* 32-bit variant. FIXME: write mpn_sqrtrem2_approx for GMP_NUMB_BITS=32, + then use the above code and get rid of mpn_sqrtrem2 */ + { + mp_limb_t sp[2]; + sp[1] = u0; + sp[0] = 0; + sb |= mpn_sqrtrem2 (&r0, &sb, sp); + } +#endif rb = r0 & (MPFR_LIMB_ONE << (sh - 1)); sb |= (r0 & mask) ^ rb; @@ -564,71 +588,6 @@ mpn_rsqrtrem2 (mpfr_limb_ptr xp, mpfr_limb_srcptr ap) xp[1] = xp[1] >> 24; } -#if 0 -/* this code seems to be slightly slower than the one below, for precision - 113 bits on a 64-bit machine, and in addition it requires GMP internals - (for __gmpn_invert_limb) */ -static mp_limb_t -mpn_sqrtrem4 (mpfr_limb_ptr sp, mpfr_limb_ptr rp, mpfr_limb_srcptr ap) -{ - mp_limb_t x, r, t; - - x = mpn_sqrtrem2 (sp + 1, rp + 1, ap + 2, 0); - - /* now a1 = s1^2 + x*B + r1, with a1 = {ap+2,2}, s1 = sp[1], r1 = rp[1], - with 0 <= x*B + r1 <= 2*s1 */ - - /* use Newton's iteration for the square root, x' = 1/2 * (x + a/x), which - rewrites as: - - x' = x + (a - x^2)/(2x) - - Since x' - sqrt(a) = (x-sqrt(a))^2/(2x), we have x' >= sqrt(a). - If we round (a - x^2)/(2x) downwards, then we will still get - an upper approximation of floor(sqrt(a)). */ - - t = rp[1] << (GMP_NUMB_BITS - 1); - rp[1] = (x << (GMP_NUMB_BITS - 1)) | (rp[1] >> 1); /* rp[1] = floor((a-x^2)/2) */ - /* Since x*B + r1 <= 2*s1, we now have rp[1] <= s1, and since __udiv_qrnnd_preinv - requires its 3rd argument to be smaller than its 5th argument, we must - distinguish the case rp[1] == s1. */ - if (MPFR_UNLIKELY(rp[1] == sp[1])) - { - /* Necessarily t=0 since x*B + r1 <= 2*s1 initially. - Taking sp[0] = 2^GMP_NUMB_BITS (if representable) would be too large - since it would mean sp[1] = sp[1]+1, and the remainder rp[1] would - become negative. */ - sp[0] = MPFR_LIMB_MAX; - r = sp[1]; - } - else - { - r = __gmpn_invert_limb (sp[1]); - __udiv_qrnnd_preinv (sp[0], r, rp[1], t, sp[1], r); - } - - /* now sp[0] = floor((a-x^2)/(2x)) = floor((x*B+r1)/2/s1) */ - - /* r1*B = 2*s1*s0 + 2*r - a - s^2 = a1*B^2 + a0 - (s1*B+s0)^2 - = (s1^2+r1)*B^2 + a0 - s1^2*B^2 - 2*s1*s0*B - s0^2 - = r1*B^2 + a0 - 2*s1*s0*B - s0^2 - = 2*r*B + a0 - s0^2 */ - umul_ppmm (rp[1], rp[0], sp[0], sp[0]); /* s0^2 */ - t = -mpn_sub_n (rp, ap, rp, 2); /* a0 - s0^2 */ - t += 2 * (r >> (GMP_NUMB_BITS - 1)); - r <<= 1; - rp[1] += r; - t += rp[1] < r; - if ((mp_limb_signed_t) t < 0) - { - mpn_sub_1 (sp, sp, 2, 1); - t += mpn_addmul_1 (rp, sp, 2, 2); - t += mpn_add_1 (rp, rp, 2, 1); - } - return t; -} -#else /* Given as input ap[0-3], with B/4 <= ap[3] (where B = 2^GMP_NUMB_BITS), mpn_sqrtrem4 returns a value x, 0 <= x <= 1, and stores values s in sp[0-1] and r in rp[0-1] such that: @@ -747,7 +706,6 @@ mpn_sqrtrem4 (mpfr_limb_ptr sp, mpfr_limb_ptr rp, mpfr_limb_srcptr ap) return b[2]; } -#endif /* Special code for GMP_NUMB_BITS < prec(r) < 2*GMP_NUMB_BITS, and GMP_NUMB_BITS < prec(u) <= 2*GMP_NUMB_BITS. -- cgit v1.2.1