summaryrefslogtreecommitdiff
path: root/src/sqrt.c
diff options
context:
space:
mode:
authorzimmerma <zimmerma@280ebfd0-de03-0410-8827-d642c229c3f4>2016-12-15 08:28:05 +0000
committerzimmerma <zimmerma@280ebfd0-de03-0410-8827-d642c229c3f4>2016-12-15 08:28:05 +0000
commit34b1d7160590c5c6fb674eefc959d0a88e723fa7 (patch)
treef836ac47e0bea753eeb297508b0bf6de582c90fb /src/sqrt.c
parentb7143fe294f1e96df7f4af7af3e819ff5c7beafc (diff)
downloadmpfr-34b1d7160590c5c6fb674eefc959d0a88e723fa7.tar.gz
[sqrt.c] simplify code for GMP_NUMB_BITS = 64
git-svn-id: svn://scm.gforge.inria.fr/svn/mpfr/trunk@11044 280ebfd0-de03-0410-8827-d642c229c3f4
Diffstat (limited to 'src/sqrt.c')
-rw-r--r--src/sqrt.c108
1 files changed, 33 insertions, 75 deletions
diff --git a/src/sqrt.c b/src/sqrt.c
index 1e9cd88fb..ad93aa155 100644
--- a/src/sqrt.c
+++ b/src/sqrt.c
@@ -253,6 +253,7 @@ mpn_sqrtrem2_approx (mp_limb_t n)
}
#endif /* GMP_NUMB_BITS == 64 */
+#if GMP_NUMB_BITS == 32
/* Given as input np[0] and np[1], with B/4 <= np[1] (where B = 2^GMP_NUMB_BITS),
mpn_sqrtrem2 returns a value x, 0 <= x <= 1, and stores values s in sp[0] and
r in rp[0] such that:
@@ -408,6 +409,7 @@ mpn_sqrtrem2 (mpfr_limb_ptr sp, mpfr_limb_ptr rp, mpfr_limb_srcptr np)
rp[0] = t;
return x;
}
+#endif
/* Special code for prec(r), prec(u) < GMP_NUMB_BITS. We cannot have
prec(u) = GMP_NUMB_BITS here, since when the exponent of u is odd,
@@ -417,7 +419,7 @@ mpfr_sqrt1 (mpfr_ptr r, mpfr_srcptr u, mpfr_rnd_t rnd_mode)
{
mpfr_prec_t p = MPFR_GET_PREC(r);
mpfr_prec_t exp_u = MPFR_EXP(u), exp_r, sh = GMP_NUMB_BITS - p;
- mp_limb_t u0, r0, rb, sb, mask = MPFR_LIMB_MASK(sh), sp[2];
+ mp_limb_t u0, r0, rb, sb, mask = MPFR_LIMB_MASK(sh);
mpfr_limb_ptr rp = MPFR_MANT(r);
/* first make the exponent even */
@@ -436,16 +438,38 @@ mpfr_sqrt1 (mpfr_ptr r, mpfr_srcptr u, mpfr_rnd_t rnd_mode)
sb = 1; /* when we can round correctly with the approximation, the sticky bit
is non-zero */
- /* since the error is at most 1 ulp, we can round correctly except when the last
- sh-1 bits of r0+sb are <= sb (this includes the case where the last sh-1 bits of
- r0 are 000...000 since we might have an exact result or not) */
- if (MPFR_UNLIKELY(((r0 + 1) & (mask >> 1)) <= 1))
-#endif
+ /* Since the exact square root is in [r0 - 0.5406, r0 + 0.5406], we can round
+ correctly except when the last sh-1 bits of r0 are 000...000. */
+ if (MPFR_UNLIKELY((r0 & (mask >> 1)) == 0))
{
- sp[1] = u0;
- sp[0] = 0;
- sb |= mpn_sqrtrem2 (&r0, &sb, sp);
+ umul_ppmm (rb, sb, r0, r0);
+ /* for the exact square root, we should have 0 <= (u0-rb)*2^64 - sb <= 2*r0 */
+ if (rb > u0 || (rb == u0 && sb > 0)) /* r0 is too large */
+ {
+ r0 --;
+ umul_ppmm (rb, sb, r0, r0);
+ }
+ /* if u0 <= rb + 1, then (u0-rb)*2^64 - sb <= 2^64 <= 2*r0
+ if u0 >= rb + 3, then (u0-rb)*2^64 - sb > 2*2*64 > 2*r0 */
+ else if (u0 > rb + 2 || (u0 == rb + 2 && -sb > 2 * r0))
+ {
+ r0 ++;
+ umul_ppmm (rb, sb, r0, r0);
+ }
+ sub_ddmmss (rb, sb, u0, 0, rb, sb);
+ /* now we should have rb*2^64 + sb <= 2*r0 */
+ MPFR_ASSERTN(rb == 0 || (rb == 1 && sb <= 2 * r0));
+ sb = rb | sb;
}
+#else /* 32-bit variant. FIXME: write mpn_sqrtrem2_approx for GMP_NUMB_BITS=32,
+ then use the above code and get rid of mpn_sqrtrem2 */
+ {
+ mp_limb_t sp[2];
+ sp[1] = u0;
+ sp[0] = 0;
+ sb |= mpn_sqrtrem2 (&r0, &sb, sp);
+ }
+#endif
rb = r0 & (MPFR_LIMB_ONE << (sh - 1));
sb |= (r0 & mask) ^ rb;
@@ -564,71 +588,6 @@ mpn_rsqrtrem2 (mpfr_limb_ptr xp, mpfr_limb_srcptr ap)
xp[1] = xp[1] >> 24;
}
-#if 0
-/* this code seems to be slightly slower than the one below, for precision
- 113 bits on a 64-bit machine, and in addition it requires GMP internals
- (for __gmpn_invert_limb) */
-static mp_limb_t
-mpn_sqrtrem4 (mpfr_limb_ptr sp, mpfr_limb_ptr rp, mpfr_limb_srcptr ap)
-{
- mp_limb_t x, r, t;
-
- x = mpn_sqrtrem2 (sp + 1, rp + 1, ap + 2, 0);
-
- /* now a1 = s1^2 + x*B + r1, with a1 = {ap+2,2}, s1 = sp[1], r1 = rp[1],
- with 0 <= x*B + r1 <= 2*s1 */
-
- /* use Newton's iteration for the square root, x' = 1/2 * (x + a/x), which
- rewrites as:
-
- x' = x + (a - x^2)/(2x)
-
- Since x' - sqrt(a) = (x-sqrt(a))^2/(2x), we have x' >= sqrt(a).
- If we round (a - x^2)/(2x) downwards, then we will still get
- an upper approximation of floor(sqrt(a)). */
-
- t = rp[1] << (GMP_NUMB_BITS - 1);
- rp[1] = (x << (GMP_NUMB_BITS - 1)) | (rp[1] >> 1); /* rp[1] = floor((a-x^2)/2) */
- /* Since x*B + r1 <= 2*s1, we now have rp[1] <= s1, and since __udiv_qrnnd_preinv
- requires its 3rd argument to be smaller than its 5th argument, we must
- distinguish the case rp[1] == s1. */
- if (MPFR_UNLIKELY(rp[1] == sp[1]))
- {
- /* Necessarily t=0 since x*B + r1 <= 2*s1 initially.
- Taking sp[0] = 2^GMP_NUMB_BITS (if representable) would be too large
- since it would mean sp[1] = sp[1]+1, and the remainder rp[1] would
- become negative. */
- sp[0] = MPFR_LIMB_MAX;
- r = sp[1];
- }
- else
- {
- r = __gmpn_invert_limb (sp[1]);
- __udiv_qrnnd_preinv (sp[0], r, rp[1], t, sp[1], r);
- }
-
- /* now sp[0] = floor((a-x^2)/(2x)) = floor((x*B+r1)/2/s1) */
-
- /* r1*B = 2*s1*s0 + 2*r
- a - s^2 = a1*B^2 + a0 - (s1*B+s0)^2
- = (s1^2+r1)*B^2 + a0 - s1^2*B^2 - 2*s1*s0*B - s0^2
- = r1*B^2 + a0 - 2*s1*s0*B - s0^2
- = 2*r*B + a0 - s0^2 */
- umul_ppmm (rp[1], rp[0], sp[0], sp[0]); /* s0^2 */
- t = -mpn_sub_n (rp, ap, rp, 2); /* a0 - s0^2 */
- t += 2 * (r >> (GMP_NUMB_BITS - 1));
- r <<= 1;
- rp[1] += r;
- t += rp[1] < r;
- if ((mp_limb_signed_t) t < 0)
- {
- mpn_sub_1 (sp, sp, 2, 1);
- t += mpn_addmul_1 (rp, sp, 2, 2);
- t += mpn_add_1 (rp, rp, 2, 1);
- }
- return t;
-}
-#else
/* Given as input ap[0-3], with B/4 <= ap[3] (where B = 2^GMP_NUMB_BITS),
mpn_sqrtrem4 returns a value x, 0 <= x <= 1, and stores values s in sp[0-1] and
r in rp[0-1] such that:
@@ -747,7 +706,6 @@ mpn_sqrtrem4 (mpfr_limb_ptr sp, mpfr_limb_ptr rp, mpfr_limb_srcptr ap)
return b[2];
}
-#endif
/* Special code for GMP_NUMB_BITS < prec(r) < 2*GMP_NUMB_BITS,
and GMP_NUMB_BITS < prec(u) <= 2*GMP_NUMB_BITS.