diff options
author | zimmerma <zimmerma@280ebfd0-de03-0410-8827-d642c229c3f4> | 2017-01-05 21:13:50 +0000 |
---|---|---|
committer | zimmerma <zimmerma@280ebfd0-de03-0410-8827-d642c229c3f4> | 2017-01-05 21:13:50 +0000 |
commit | d57e0486c2de1ba38e8125459f4fe45fc5ddfd36 (patch) | |
tree | fefecf6e3af0576f110e5508d2ecdde5eef9bfe5 /src/div.c | |
parent | cb6f5c7fc0a7081a4a70607d8d81198c55199b56 (diff) | |
download | mpfr-d57e0486c2de1ba38e8125459f4fe45fc5ddfd36.tar.gz |
[src/div.c] faster division for 2 limbs
git-svn-id: svn://scm.gforge.inria.fr/svn/mpfr/trunk@11148 280ebfd0-de03-0410-8827-d642c229c3f4
Diffstat (limited to 'src/div.c')
-rw-r--r-- | src/div.c | 63 |
1 files changed, 51 insertions, 12 deletions
@@ -24,6 +24,8 @@ http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., [1] Short Division of Long Integers, David Harvey and Paul Zimmermann, Proceedings of the 20th Symposium on Computer Arithmetic (ARITH-20), July 25-27, 2011, pages 7-14. + [2] Improved Division by Invariant Integers, Niels Möller and Torbjörn Granlund, + IEEE Transactions on Computers, volume 60, number 2, pages 165-175, 2011. */ #define MPFR_NEED_LONGLONG_H @@ -33,6 +35,45 @@ http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., #include "invert_limb.h" +/* Given u = u1*B+u0 < d = d1*B+d0 with d normalized (high bit of d1 set), + put in v = v1*B+v0 an approximation of floor(u*B^2/d), with: + B = 2^GMP_NUMB_BITS and v <= floor(u*B^2/d) <= v + 16. */ +static void +mpfr_div2_approx (mp_ptr v1, mp_ptr v0, mp_limb_t u1, mp_limb_t u0, + mp_limb_t d1, mp_limb_t d0) +{ + mp_limb_t x, y, dummy, z2, z1, z0; + + if (MPFR_UNLIKELY(d1 + 1 == MPFR_LIMB_ZERO)) + x = 0; + else + __gmpfr_invert_limb (x, d1 + 1); /* B + x = floor((B^2-1)/(d1+1)) */ + umul_ppmm (y, dummy, u1, x); + y += u1; + /* now y = floor(B*u1/d1) with y < B*u1/d1, thus even when + u1=d1, y < B */ + umul_ppmm (dummy, z0, y, d0); + umul_ppmm (z2, z1, y, d1); + z1 += dummy; + z2 += (z1 < dummy); + z1 += (z0 != 0); + z2 += (z1 == 0 && z0 != 0); + /* now z = z2*B+z1 = ceil(y*d/B), and should cancel with u */ + sub_ddmmss (z2, z1, u1, u0, z2, z1); + *v1 = y; + /* y*B + (B+x)*(z2*B+z1)/B */ + umul_ppmm (*v0, dummy, x, z1); + /* add z1 */ + *v0 += z1; + *v1 += (*v0 < z1); + /* add (B+x)*z2 */ + while (z2--) + { + *v0 += x; + *v1 += 1 + (*v0 < x); + } +} + /* special code for p=PREC(q) < GMP_NUMB_BITS, and PREC(u), PREC(v) <= GMP_NUMB_BITS */ static int @@ -158,10 +199,6 @@ mpfr_div_2 (mpfr_ptr q, mpfr_srcptr u, mpfr_srcptr v, mpfr_rnd_t rnd_mode) mp_limb_t v1 = MPFR_MANT(v)[1], v0 = MPFR_MANT(v)[0]; mp_limb_t q1, q0, r3, r2, r1, r0, l, t; int extra; -#if GMP_NUMB_BITS == 64 - mp_limb_t inv; - __gmpfr_invert_limb (inv, v1); -#endif r3 = MPFR_MANT(u)[1]; r2 = MPFR_MANT(u)[0]; @@ -171,6 +208,15 @@ mpfr_div_2 (mpfr_ptr q, mpfr_srcptr u, mpfr_srcptr v, mpfr_rnd_t rnd_mode) MPFR_ASSERTD(r3 < v1 || (r3 == v1 && r2 < v0)); + mpfr_div2_approx (&q1, &q0, r3, r2, v1, v0); + /* we know q1*B+q0 is smaller or equal to the exact quotient, with + difference at most 16 */ + if (MPFR_LIKELY(((q0 + 16) & (mask >> 1)) > 16)) + { + sb = 1; /* result is not exact when we can round with an approximation */ + goto round_div2; + } + /* now r3:r2 < v1:v0 */ if (MPFR_UNLIKELY(r3 == v1)) /* can occur in some rare cases */ { @@ -198,11 +244,7 @@ mpfr_div_2 (mpfr_ptr q, mpfr_srcptr u, mpfr_srcptr v, mpfr_rnd_t rnd_mode) else { /* divide r3:r2 by v1: requires r3 < v1 */ -#if GMP_NUMB_BITS == 64 /* use __gmpfr_invert_limb */ - __udiv_qrnnd_preinv (q1, r2, r3, r2, v1, inv); -#else udiv_qrnnd (q1, r2, r3, r2, v1); -#endif /* u-extra*v = q1 * v1 + r2 */ /* now subtract q1*v0 to r2:0 */ @@ -252,11 +294,7 @@ mpfr_div_2 (mpfr_ptr q, mpfr_srcptr u, mpfr_srcptr v, mpfr_rnd_t rnd_mode) else { /* divide r2:r1 by v1: requires r2 < v1 */ -#if GMP_NUMB_BITS == 64 /* use __gmpfr_invert_limb */ - __udiv_qrnnd_preinv (q0, r1, r2, r1, v1, inv); -#else udiv_qrnnd (q0, r1, r2, r1, v1); -#endif /* r2:r1 = q0*v1 + r1 */ @@ -287,6 +325,7 @@ mpfr_div_2 (mpfr_ptr q, mpfr_srcptr u, mpfr_srcptr v, mpfr_rnd_t rnd_mode) sb = r1 | r0; + round_div2: if (extra) { qx ++; |