diff options
author | zimmerma <zimmerma@280ebfd0-de03-0410-8827-d642c229c3f4> | 2017-01-12 10:16:40 +0000 |
---|---|---|
committer | zimmerma <zimmerma@280ebfd0-de03-0410-8827-d642c229c3f4> | 2017-01-12 10:16:40 +0000 |
commit | 44f74670c344cab411016f898a8f89b18773646a (patch) | |
tree | d6a605f35ea217fd9ec3b798bee87be881483d7b /src/sqrt.c | |
parent | 7a25dc395bac51db512ad78d40e477b1ee2ac557 (diff) | |
download | mpfr-44f74670c344cab411016f898a8f89b18773646a.tar.gz |
[src/sqrt.c] improved mpfr_sqrt2_approx()
git-svn-id: svn://scm.gforge.inria.fr/svn/mpfr/trunk@11187 280ebfd0-de03-0410-8827-d642c229c3f4
Diffstat (limited to 'src/sqrt.c')
-rw-r--r-- | src/sqrt.c | 56 |
1 files changed, 34 insertions, 22 deletions
diff --git a/src/sqrt.c b/src/sqrt.c index 60fe7420c..166d79a24 100644 --- a/src/sqrt.c +++ b/src/sqrt.c @@ -85,19 +85,33 @@ mpfr_sqrt1_approx (mp_limb_t n) } \ while (0) -/* Put in rp[1]*2^64+rp[0] an approximation of sqrt(2^128*n), +/* Put in rp[1]*2^64+rp[0] an approximation of floor(sqrt(2^128*n)), with 2^126 <= n := np[1]*2^64 + np[0] < 2^128. - The error on {rp, 2} is less than 35 ulps, with {rp, 2} <= sqrt(2^128*n). + The error on {rp, 2} is at most 27 ulps, with {rp, 2} <= floor(sqrt(2^128*n)). */ static void mpfr_sqrt2_approx (mpfr_limb_ptr rp, mpfr_limb_srcptr np) { - int i = np[1] >> 56; + int i; mp_limb_t x, r1, r0, h, l, t; const mp_limb_t *u; const mp_limb_t magic = 0xda9fbe76c8b43800; /* ceil(0.854*2^64) */ - x = np[1] << 8 | (np[0] >> 56); + /* We round x upward to ensure {np,2} >= r1^2 below. Since we consider also the upper 8 + bits of np[0] into x, we have to deal separately with the case np[1] = 2^64-1 and the + upper 8 bits of np[0] are all 1. */ + l = np[0] + MPFR_LIMB_MASK(56); + h = np[1] + (l < np[0]); + i = h >> 56; + x = (h << 8) | (l >> 56); + if (MPFR_UNLIKELY(i == 0)) + /* this happens when np[1] = 2^64-1, the upper 8 bits of np[0] are all 1, and the lower + 56 bits of np[0] are not all zero */ + { + MPFR_ASSERTD(x == MPFR_LIMB_ZERO); + goto compute_r1; + } + MPFR_ASSERTD(64 <= i && i < 256); u = V[i - 64]; umul_ppmm (h, l, u[8], x); /* the truncation error on h is at most 1 here */ @@ -116,12 +130,14 @@ mpfr_sqrt2_approx (mpfr_limb_ptr rp, mpfr_limb_srcptr np) truncation error on h+l/2^64 is <= 6/2^6 */ sub_ddmmss (h, l, u[0], u[1], h, l); /* Since the mathematical error is < 0.412e-19*2^64, the total error on - h + l/2^64 is less than 0.854; magic = ceil(0.854*2^64). */ + h + l/2^64 is less than 0.854; magic = ceil(0.854*2^64). We subtract it, + while keeping x >= 0. */ x = h - (l < magic && h != 0); - /* now 2^64 + x is an approximation of 2^96/sqrt(np[1]), - with 2^64 + x < 2^96/sqrt(np[1]) */ - + /* now 2^64 + x is an approximation of 2^96/sqrt(np[1]+1), + with 2^64 + x <= 2^96/sqrt(np[1]+1) */ + + compute_r1: umul_ppmm (r1, l, np[1], x); r1 += np[1]; @@ -132,11 +148,13 @@ mpfr_sqrt2_approx (mpfr_limb_ptr rp, mpfr_limb_srcptr np) r1 = MPFR_LIMB_HIGHBIT; umul_ppmm (h, l, r1, r1); + MPFR_ASSERTD(h < np[1] || (h == np[1] && l <= np[0])); sub_ddmmss (h, l, np[1], np[0], h, l); + /* divide by 2 */ l = (h << 63) | (l >> 1); h = h >> 1; - /* now add (2^64+x) * (h*2^64+l) / 2^64 to r0 */ + /* now add (2^64+x) * (h*2^64+l) / 2^64 to [r1*2^64, 0] */ umul_ppmm (r0, t, x, l); /* x * l */ r0 += l; @@ -147,16 +165,10 @@ mpfr_sqrt2_approx (mpfr_limb_ptr rp, mpfr_limb_srcptr np) r1 += (r0 < x); /* add x */ } - if (r1 & MPFR_LIMB_HIGHBIT) - { - rp[0] = r0; - rp[1] = r1; - } - else /* overflow occurred in r1 */ - { - rp[0] = ~MPFR_LIMB_ZERO; - rp[1] = ~MPFR_LIMB_ZERO; - } + MPFR_ASSERTD(r1 & MPFR_LIMB_HIGHBIT); + + rp[0] = r0; + rp[1] = r1; } /* Special code for prec(r), prec(u) < GMP_NUMB_BITS. We cannot have @@ -312,10 +324,10 @@ mpfr_sqrt2 (mpfr_ptr r, mpfr_srcptr u, mpfr_rnd_t rnd_mode) mask = MPFR_LIMB_MASK(sh); mpfr_sqrt2_approx (rp, np + 2); - /* the error is less than 35 ulps on rp[0], with {rp, 2} smaller or equal + /* the error is at most 27 ulps on rp[0], with {rp, 2} smaller or equal to the exact square root, thus we can round correctly except when the - number formed by the last sh-1 bits of rp[0] is 0, -1, -2, ..., -34. */ - if (((rp[0] + 34) & (mask >> 1)) > 34) + number formed by the last sh-1 bits of rp[0] is 0, -1, -2, ..., -27. */ + if (((rp[0] + 27) & (mask >> 1)) > 27) sb = 1; else { |