summaryrefslogtreecommitdiff
path: root/src/div.c
diff options
context:
space:
mode:
authorzimmerma <zimmerma@280ebfd0-de03-0410-8827-d642c229c3f4>2017-01-05 21:13:50 +0000
committerzimmerma <zimmerma@280ebfd0-de03-0410-8827-d642c229c3f4>2017-01-05 21:13:50 +0000
commitd57e0486c2de1ba38e8125459f4fe45fc5ddfd36 (patch)
treefefecf6e3af0576f110e5508d2ecdde5eef9bfe5 /src/div.c
parentcb6f5c7fc0a7081a4a70607d8d81198c55199b56 (diff)
downloadmpfr-d57e0486c2de1ba38e8125459f4fe45fc5ddfd36.tar.gz
[src/div.c] faster division for 2 limbs
git-svn-id: svn://scm.gforge.inria.fr/svn/mpfr/trunk@11148 280ebfd0-de03-0410-8827-d642c229c3f4
Diffstat (limited to 'src/div.c')
-rw-r--r--src/div.c63
1 files changed, 51 insertions, 12 deletions
diff --git a/src/div.c b/src/div.c
index 6d110d29c..0fedd85bd 100644
--- a/src/div.c
+++ b/src/div.c
@@ -24,6 +24,8 @@ http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
[1] Short Division of Long Integers, David Harvey and Paul Zimmermann,
Proceedings of the 20th Symposium on Computer Arithmetic (ARITH-20),
July 25-27, 2011, pages 7-14.
+ [2] Improved Division by Invariant Integers, Niels Möller and Torbjörn Granlund,
+ IEEE Transactions on Computers, volume 60, number 2, pages 165-175, 2011.
*/
#define MPFR_NEED_LONGLONG_H
@@ -33,6 +35,45 @@ http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
#include "invert_limb.h"
+/* Given u = u1*B+u0 < d = d1*B+d0 with d normalized (high bit of d1 set),
+ put in v = v1*B+v0 an approximation of floor(u*B^2/d), with:
+ B = 2^GMP_NUMB_BITS and v <= floor(u*B^2/d) <= v + 16. */
+static void
+mpfr_div2_approx (mp_ptr v1, mp_ptr v0, mp_limb_t u1, mp_limb_t u0,
+ mp_limb_t d1, mp_limb_t d0)
+{
+ mp_limb_t x, y, dummy, z2, z1, z0;
+
+ if (MPFR_UNLIKELY(d1 + 1 == MPFR_LIMB_ZERO))
+ x = 0;
+ else
+ __gmpfr_invert_limb (x, d1 + 1); /* B + x = floor((B^2-1)/(d1+1)) */
+ umul_ppmm (y, dummy, u1, x);
+ y += u1;
+ /* now y = floor(B*u1/d1) with y < B*u1/d1, thus even when
+ u1=d1, y < B */
+ umul_ppmm (dummy, z0, y, d0);
+ umul_ppmm (z2, z1, y, d1);
+ z1 += dummy;
+ z2 += (z1 < dummy);
+ z1 += (z0 != 0);
+ z2 += (z1 == 0 && z0 != 0);
+ /* now z = z2*B+z1 = ceil(y*d/B), and should cancel with u */
+ sub_ddmmss (z2, z1, u1, u0, z2, z1);
+ *v1 = y;
+ /* y*B + (B+x)*(z2*B+z1)/B */
+ umul_ppmm (*v0, dummy, x, z1);
+ /* add z1 */
+ *v0 += z1;
+ *v1 += (*v0 < z1);
+ /* add (B+x)*z2 */
+ while (z2--)
+ {
+ *v0 += x;
+ *v1 += 1 + (*v0 < x);
+ }
+}
+
/* special code for p=PREC(q) < GMP_NUMB_BITS,
and PREC(u), PREC(v) <= GMP_NUMB_BITS */
static int
@@ -158,10 +199,6 @@ mpfr_div_2 (mpfr_ptr q, mpfr_srcptr u, mpfr_srcptr v, mpfr_rnd_t rnd_mode)
mp_limb_t v1 = MPFR_MANT(v)[1], v0 = MPFR_MANT(v)[0];
mp_limb_t q1, q0, r3, r2, r1, r0, l, t;
int extra;
-#if GMP_NUMB_BITS == 64
- mp_limb_t inv;
- __gmpfr_invert_limb (inv, v1);
-#endif
r3 = MPFR_MANT(u)[1];
r2 = MPFR_MANT(u)[0];
@@ -171,6 +208,15 @@ mpfr_div_2 (mpfr_ptr q, mpfr_srcptr u, mpfr_srcptr v, mpfr_rnd_t rnd_mode)
MPFR_ASSERTD(r3 < v1 || (r3 == v1 && r2 < v0));
+ mpfr_div2_approx (&q1, &q0, r3, r2, v1, v0);
+ /* we know q1*B+q0 is smaller or equal to the exact quotient, with
+ difference at most 16 */
+ if (MPFR_LIKELY(((q0 + 16) & (mask >> 1)) > 16))
+ {
+ sb = 1; /* result is not exact when we can round with an approximation */
+ goto round_div2;
+ }
+
/* now r3:r2 < v1:v0 */
if (MPFR_UNLIKELY(r3 == v1)) /* can occur in some rare cases */
{
@@ -198,11 +244,7 @@ mpfr_div_2 (mpfr_ptr q, mpfr_srcptr u, mpfr_srcptr v, mpfr_rnd_t rnd_mode)
else
{
/* divide r3:r2 by v1: requires r3 < v1 */
-#if GMP_NUMB_BITS == 64 /* use __gmpfr_invert_limb */
- __udiv_qrnnd_preinv (q1, r2, r3, r2, v1, inv);
-#else
udiv_qrnnd (q1, r2, r3, r2, v1);
-#endif
/* u-extra*v = q1 * v1 + r2 */
/* now subtract q1*v0 to r2:0 */
@@ -252,11 +294,7 @@ mpfr_div_2 (mpfr_ptr q, mpfr_srcptr u, mpfr_srcptr v, mpfr_rnd_t rnd_mode)
else
{
/* divide r2:r1 by v1: requires r2 < v1 */
-#if GMP_NUMB_BITS == 64 /* use __gmpfr_invert_limb */
- __udiv_qrnnd_preinv (q0, r1, r2, r1, v1, inv);
-#else
udiv_qrnnd (q0, r1, r2, r1, v1);
-#endif
/* r2:r1 = q0*v1 + r1 */
@@ -287,6 +325,7 @@ mpfr_div_2 (mpfr_ptr q, mpfr_srcptr u, mpfr_srcptr v, mpfr_rnd_t rnd_mode)
sb = r1 | r0;
+ round_div2:
if (extra)
{
qx ++;