[src/mulders.c] added new function mpfr_divhigh_n() for short division

(not used yet), fixed comments and added error analysis in mpfr_mulhigh_n() and mpfr_sqrhigh_n() [src/sqrt.c] fixed ill-placed MPFR_TMP_MARK [src/mparam_h.in] added default MPFR_DIVHIGH_TAB for mpfr_divhigh_n() [src/round_p.c] typo [src/mpfr-impl.h] added prototype for mpfr_divhigh_n [src/mul.c] added comment, simplified code [tune/tuneup.c] added tuning for mpfr_divhigh_n(), increased MAX_STEPS to get a better tuning (will take longer), set tolerance to 1.0 git-svn-id: svn://scm.gforge.inria.fr/svn/mpfr/trunk@7183 280ebfd0-de03-0410-8827-d642c229c3f4
author: zimmerma <zimmerma@280ebfd0-de03-0410-8827-d642c229c3f4> 2010-09-29 16:03:04 +0000
committer: zimmerma <zimmerma@280ebfd0-de03-0410-8827-d642c229c3f4> 2010-09-29 16:03:04 +0000
commit: 185793f681db0cb4b8fb4c03a072a79daa64da00 (patch)
tree: 3542ce4dbe4d259cc64ea9ed5ae542edd38ee789 /src/mulders.c
parent: 9cfe7f7883710e842da212956360ecd79ecc40a6 (diff)
download: mpfr-185793f681db0cb4b8fb4c03a072a79daa64da00.tar.gz
1 files changed, 78 insertions, 15 deletions
diff --git a/src/mulders.c b/src/mulders.c
index 6a6a04f64..057474165 100644
--- a/src/mulders.c
+++ b/src/mulders.c
@@ -37,19 +37,23 @@ static short mulhigh_ktab[] = {MPFR_MULHIGH_TAB};
 #endif
 
 /* Put in  rp[n..2n-1] an approximation of the n high limbs
-   of {mp, n} * {np, n}.
-   The error is at worst of ln(n) for rp[n] and rp[n-1] is totally wrong */
+   of {up, n} * {vp, n}. The error is less than n ulps of rp[n]. */
 static void
 mpfr_mulhigh_n_basecase (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
 {
   mp_size_t i;
 
-  rp += n-1;
-  umul_ppmm (rp[1], rp[0], up[n-1], vp[0]);
+  rp += n - 1;
+  umul_ppmm (rp[1], rp[0], up[n-1], vp[0]); /* we neglect up[0..n-2]*vp[0],
+                                               which is less than B^n */
   for (i = 1 ; i < n ; i++)
-    rp[i+1] = mpn_addmul_1 (rp, up + (n - i - 1), i+1, vp[i]);
+    /* here, we neglect up[0..n-i-2] * vp[i], which is less than B^n too */
+    rp[i + 1] = mpn_addmul_1 (rp, up + (n - i - 1), i + 1, vp[i]);
+  /* in total, we neglect less than n*B^n, i.e., n ulps of rp[n]. */
 }
 
+/* Put in  rp[n..2n-1] an approximation of the n high limbs
+   of {np, n} * {mp, n}. The error is less than n ulps of rp[n]. */
 void
 mpfr_mulhigh_n (mp_ptr rp, mp_srcptr np, mp_srcptr mp, mp_size_t n)
 {
@@ -59,22 +63,26 @@ mpfr_mulhigh_n (mp_ptr rp, mp_srcptr np, mp_srcptr mp, mp_size_t n)
   k = MPFR_LIKELY (n < MPFR_MULHIGH_TAB_SIZE) ? mulhigh_ktab[n] : 3*(n/4);
   MPFR_ASSERTD (k == -1 || k == 0 || (k > n/2 && k < n));
   if (k < 0)
-    mpn_mul_basecase (rp, np, n, mp, n);
+    mpn_mul_basecase (rp, np, n, mp, n); /* result is exact, no error */
   else if (k == 0)
-    mpfr_mulhigh_n_basecase (rp, np, mp, n);
+    mpfr_mulhigh_n_basecase (rp, np, mp, n); /* basecase error < n ulps */
   else if (n > MUL_FFT_THRESHOLD)
-    mpn_mul_n (rp, np, mp, n);
+    mpn_mul_n (rp, np, mp, n); /* result is exact, no error */
   else
     {
       mp_size_t l = n - k;
       mp_limb_t cy;
 
       mpn_mul_n (rp + 2 * l, np + l, mp + l, k); /* fills rp[2l..2n-1] */
-      mpfr_mulhigh_n (rp, np + k, mp, l);          /* fills rp[l-1..2l-1] */
+      mpfr_mulhigh_n (rp, np + k, mp, l);        /* fills rp[l-1..2l-1] */
       cy = mpn_add_n (rp + n - 1, rp + n - 1, rp + l - 1, l + 1);
-      mpfr_mulhigh_n (rp, np, mp + k, l);          /* fills rp[l-1..2l-1] */
+      mpfr_mulhigh_n (rp, np, mp + k, l);        /* fills rp[l-1..2l-1] */
       cy += mpn_add_n (rp + n - 1, rp + n - 1, rp + l - 1, l + 1);
       mpn_add_1 (rp + n + l, rp + n + l, k, cy); /* propagate carry */
+      /* the neglected terms are in the two recursive calls to mpfr_mulhigh_n,
+         where in each case by induction the error is at most l ulps, thus
+         the total error is at most 2l ulps. Since k > n/2, l < n/2 which
+         gives an error < n ulps. */
     }
 }
 
@@ -85,6 +93,8 @@ static short sqrhigh_ktab[] = {MPFR_SQRHIGH_TAB};
 #define MPFR_SQRHIGH_TAB_SIZE (sizeof(sqrhigh_ktab) / sizeof(sqrhigh_ktab[0]))
 #endif
 
+/* Put in  rp[n..2n-1] an approximation of the n high limbs
+   of {np, n}^2. The error is less than n ulps of rp[n]. */
 void
 mpfr_sqrhigh_n (mp_ptr rp, mp_srcptr np, mp_size_t n)
 {
@@ -109,13 +119,66 @@ mpfr_sqrhigh_n (mp_ptr rp, mp_srcptr np, mp_size_t n)
       mpn_sqr_n (rp + 2 * l, np + l, k);          /* fills rp[2l..2n-1] */
       mpfr_mulhigh_n (rp, np, np + k, l);         /* fills rp[l-1..2l-1] */
       /* {rp+n-1,l+1} += 2 * {rp+l-1,l+1} */
-      /* mpn_lshift is 30% to 50% faster than mpn_add_n on Core 2 */
-#if 0
-      cy = mpn_add_n (rp + n - 1, rp + n - 1, rp + l - 1, l + 1);
-#else
       cy = mpn_lshift (rp + l - 1, rp + l - 1, l + 1, 1);
-#endif
       cy += mpn_add_n (rp + n - 1, rp + n - 1, rp + l - 1, l + 1);
       mpn_add_1 (rp + n + l, rp + n + l, k, cy); /* propagate carry */
     }
 }
+
+#ifdef MPFR_DIVHIGH_TAB_SIZE
+static short divhigh_ktab[MPFR_DIVHIGH_TAB_SIZE];
+#else
+static short divhigh_ktab[] = {MPFR_DIVHIGH_TAB};
+#define MPFR_DIVHIGH_TAB_SIZE (sizeof(divhigh_ktab) / sizeof(divhigh_ktab[0]))
+#endif
+
+/* put in {qp, n} an approximation of N={np, 2*n} divided by D={dp, n},
+   with the most significant limb of the quotient as return value (0 or 1).
+   Assumes the most significant bit of D is set. Clobbers N.
+   THIS IS PRELIMINARY CODE, DO NOT USE IT.
+*/
+mp_limb_t
+mpfr_divhigh_n (mp_ptr qp, mp_ptr np, mp_ptr dp, mp_size_t n)
+{
+  mp_size_t k, l;
+  mp_limb_t qh, cy;
+  mp_ptr tp;
+  int count = 0;
+  MPFR_TMP_DECL(marker);
+
+  k = divhigh_ktab[n];
+  MPFR_ASSERTD ((n+1)/2 <= k && k <= n); /* we should have n/2 <= k <= n */
+
+  /* for k=0, we use a full division (mpn_divrem) */
+
+  if (k == n)
+    return mpn_divrem (qp, 0, np, 2 * n, dp, n);
+
+  MPFR_TMP_MARK (marker);
+  l = n - k;
+  /* first divide the most significant 2k limbs from N by the most significant
+     k limbs of D */
+  qh = mpn_divrem (qp + l, 0, np + 2 * l, 2 * k, dp + l, k); /* exact */
+
+  /* it remains {np,2l+k} = {np,n+l} as remainder */
+
+  /* now we have to subtract high(Q1)*D0 where Q1={qp+l,k} and D0={dp,l} */
+  tp = MPFR_TMP_ALLOC (2 * l * sizeof (mp_limb_t));
+  mpfr_mulhigh_n (tp, qp + k, dp, l);
+  /* we are only interested in the upper l limbs from {tp,2l} */
+  cy = mpn_sub_n (np + k, np + k, tp, 2 * l);
+  while (cy > 0) /* Q1 was too large: subtract 1 to Q1 and add D to np+l */
+    {
+      if (count++ >= 2)
+        abort();
+      qh -= mpn_sub_1 (qp + l, qp + l, k, MPFR_LIMB_ONE);
+      cy -= mpn_add_n (np + l, np + l, dp, n);
+    }
+
+  /* now it remains {np,n+l} to divide by D */
+  cy = mpfr_divhigh_n (qp, np + k, dp + k, l);
+  qh += mpn_add_1 (qp + l, qp + l, k, cy);
+  MPFR_TMP_FREE(marker);
+
+  return qh;
+}
author	zimmerma <zimmerma@280ebfd0-de03-0410-8827-d642c229c3f4>	2010-09-29 16:03:04 +0000
committer	zimmerma <zimmerma@280ebfd0-de03-0410-8827-d642c229c3f4>	2010-09-29 16:03:04 +0000
commit	185793f681db0cb4b8fb4c03a072a79daa64da00 (patch)
tree	3542ce4dbe4d259cc64ea9ed5ae542edd38ee789 /src/mulders.c
parent	9cfe7f7883710e842da212956360ecd79ecc40a6 (diff)
download	mpfr-185793f681db0cb4b8fb4c03a072a79daa64da00.tar.gz