summaryrefslogtreecommitdiff
path: root/src/mulders.c
diff options
context:
space:
mode:
authorzimmerma <zimmerma@280ebfd0-de03-0410-8827-d642c229c3f4>2010-09-29 16:03:04 +0000
committerzimmerma <zimmerma@280ebfd0-de03-0410-8827-d642c229c3f4>2010-09-29 16:03:04 +0000
commit185793f681db0cb4b8fb4c03a072a79daa64da00 (patch)
tree3542ce4dbe4d259cc64ea9ed5ae542edd38ee789 /src/mulders.c
parent9cfe7f7883710e842da212956360ecd79ecc40a6 (diff)
downloadmpfr-185793f681db0cb4b8fb4c03a072a79daa64da00.tar.gz
[src/mulders.c] added new function mpfr_divhigh_n() for short division
(not used yet), fixed comments and added error analysis in mpfr_mulhigh_n() and mpfr_sqrhigh_n() [src/sqrt.c] fixed ill-placed MPFR_TMP_MARK [src/mparam_h.in] added default MPFR_DIVHIGH_TAB for mpfr_divhigh_n() [src/round_p.c] typo [src/mpfr-impl.h] added prototype for mpfr_divhigh_n [src/mul.c] added comment, simplified code [tune/tuneup.c] added tuning for mpfr_divhigh_n(), increased MAX_STEPS to get a better tuning (will take longer), set tolerance to 1.0 git-svn-id: svn://scm.gforge.inria.fr/svn/mpfr/trunk@7183 280ebfd0-de03-0410-8827-d642c229c3f4
Diffstat (limited to 'src/mulders.c')
-rw-r--r--src/mulders.c93
1 files changed, 78 insertions, 15 deletions
diff --git a/src/mulders.c b/src/mulders.c
index 6a6a04f64..057474165 100644
--- a/src/mulders.c
+++ b/src/mulders.c
@@ -37,19 +37,23 @@ static short mulhigh_ktab[] = {MPFR_MULHIGH_TAB};
#endif
/* Put in rp[n..2n-1] an approximation of the n high limbs
- of {mp, n} * {np, n}.
- The error is at worst of ln(n) for rp[n] and rp[n-1] is totally wrong */
+ of {up, n} * {vp, n}. The error is less than n ulps of rp[n]. */
static void
mpfr_mulhigh_n_basecase (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
{
mp_size_t i;
- rp += n-1;
- umul_ppmm (rp[1], rp[0], up[n-1], vp[0]);
+ rp += n - 1;
+ umul_ppmm (rp[1], rp[0], up[n-1], vp[0]); /* we neglect up[0..n-2]*vp[0],
+ which is less than B^n */
for (i = 1 ; i < n ; i++)
- rp[i+1] = mpn_addmul_1 (rp, up + (n - i - 1), i+1, vp[i]);
+ /* here, we neglect up[0..n-i-2] * vp[i], which is less than B^n too */
+ rp[i + 1] = mpn_addmul_1 (rp, up + (n - i - 1), i + 1, vp[i]);
+ /* in total, we neglect less than n*B^n, i.e., n ulps of rp[n]. */
}
+/* Put in rp[n..2n-1] an approximation of the n high limbs
+ of {np, n} * {mp, n}. The error is less than n ulps of rp[n]. */
void
mpfr_mulhigh_n (mp_ptr rp, mp_srcptr np, mp_srcptr mp, mp_size_t n)
{
@@ -59,22 +63,26 @@ mpfr_mulhigh_n (mp_ptr rp, mp_srcptr np, mp_srcptr mp, mp_size_t n)
k = MPFR_LIKELY (n < MPFR_MULHIGH_TAB_SIZE) ? mulhigh_ktab[n] : 3*(n/4);
MPFR_ASSERTD (k == -1 || k == 0 || (k > n/2 && k < n));
if (k < 0)
- mpn_mul_basecase (rp, np, n, mp, n);
+ mpn_mul_basecase (rp, np, n, mp, n); /* result is exact, no error */
else if (k == 0)
- mpfr_mulhigh_n_basecase (rp, np, mp, n);
+ mpfr_mulhigh_n_basecase (rp, np, mp, n); /* basecase error < n ulps */
else if (n > MUL_FFT_THRESHOLD)
- mpn_mul_n (rp, np, mp, n);
+ mpn_mul_n (rp, np, mp, n); /* result is exact, no error */
else
{
mp_size_t l = n - k;
mp_limb_t cy;
mpn_mul_n (rp + 2 * l, np + l, mp + l, k); /* fills rp[2l..2n-1] */
- mpfr_mulhigh_n (rp, np + k, mp, l); /* fills rp[l-1..2l-1] */
+ mpfr_mulhigh_n (rp, np + k, mp, l); /* fills rp[l-1..2l-1] */
cy = mpn_add_n (rp + n - 1, rp + n - 1, rp + l - 1, l + 1);
- mpfr_mulhigh_n (rp, np, mp + k, l); /* fills rp[l-1..2l-1] */
+ mpfr_mulhigh_n (rp, np, mp + k, l); /* fills rp[l-1..2l-1] */
cy += mpn_add_n (rp + n - 1, rp + n - 1, rp + l - 1, l + 1);
mpn_add_1 (rp + n + l, rp + n + l, k, cy); /* propagate carry */
+ /* the neglected terms are in the two recursive calls to mpfr_mulhigh_n,
+ where in each case by induction the error is at most l ulps, thus
+ the total error is at most 2l ulps. Since k > n/2, l < n/2 which
+ gives an error < n ulps. */
}
}
@@ -85,6 +93,8 @@ static short sqrhigh_ktab[] = {MPFR_SQRHIGH_TAB};
#define MPFR_SQRHIGH_TAB_SIZE (sizeof(sqrhigh_ktab) / sizeof(sqrhigh_ktab[0]))
#endif
+/* Put in rp[n..2n-1] an approximation of the n high limbs
+ of {np, n}^2. The error is less than n ulps of rp[n]. */
void
mpfr_sqrhigh_n (mp_ptr rp, mp_srcptr np, mp_size_t n)
{
@@ -109,13 +119,66 @@ mpfr_sqrhigh_n (mp_ptr rp, mp_srcptr np, mp_size_t n)
mpn_sqr_n (rp + 2 * l, np + l, k); /* fills rp[2l..2n-1] */
mpfr_mulhigh_n (rp, np, np + k, l); /* fills rp[l-1..2l-1] */
/* {rp+n-1,l+1} += 2 * {rp+l-1,l+1} */
- /* mpn_lshift is 30% to 50% faster than mpn_add_n on Core 2 */
-#if 0
- cy = mpn_add_n (rp + n - 1, rp + n - 1, rp + l - 1, l + 1);
-#else
cy = mpn_lshift (rp + l - 1, rp + l - 1, l + 1, 1);
-#endif
cy += mpn_add_n (rp + n - 1, rp + n - 1, rp + l - 1, l + 1);
mpn_add_1 (rp + n + l, rp + n + l, k, cy); /* propagate carry */
}
}
+
+#ifdef MPFR_DIVHIGH_TAB_SIZE
+static short divhigh_ktab[MPFR_DIVHIGH_TAB_SIZE];
+#else
+static short divhigh_ktab[] = {MPFR_DIVHIGH_TAB};
+#define MPFR_DIVHIGH_TAB_SIZE (sizeof(divhigh_ktab) / sizeof(divhigh_ktab[0]))
+#endif
+
+/* put in {qp, n} an approximation of N={np, 2*n} divided by D={dp, n},
+ with the most significant limb of the quotient as return value (0 or 1).
+ Assumes the most significant bit of D is set. Clobbers N.
+ THIS IS PRELIMINARY CODE, DO NOT USE IT.
+*/
+mp_limb_t
+mpfr_divhigh_n (mp_ptr qp, mp_ptr np, mp_ptr dp, mp_size_t n)
+{
+ mp_size_t k, l;
+ mp_limb_t qh, cy;
+ mp_ptr tp;
+ int count = 0;
+ MPFR_TMP_DECL(marker);
+
+ k = divhigh_ktab[n];
+ MPFR_ASSERTD ((n+1)/2 <= k && k <= n); /* we should have n/2 <= k <= n */
+
+ /* for k=0, we use a full division (mpn_divrem) */
+
+ if (k == n)
+ return mpn_divrem (qp, 0, np, 2 * n, dp, n);
+
+ MPFR_TMP_MARK (marker);
+ l = n - k;
+ /* first divide the most significant 2k limbs from N by the most significant
+ k limbs of D */
+ qh = mpn_divrem (qp + l, 0, np + 2 * l, 2 * k, dp + l, k); /* exact */
+
+ /* it remains {np,2l+k} = {np,n+l} as remainder */
+
+ /* now we have to subtract high(Q1)*D0 where Q1={qp+l,k} and D0={dp,l} */
+ tp = MPFR_TMP_ALLOC (2 * l * sizeof (mp_limb_t));
+ mpfr_mulhigh_n (tp, qp + k, dp, l);
+ /* we are only interested in the upper l limbs from {tp,2l} */
+ cy = mpn_sub_n (np + k, np + k, tp, 2 * l);
+ while (cy > 0) /* Q1 was too large: subtract 1 to Q1 and add D to np+l */
+ {
+ if (count++ >= 2)
+ abort();
+ qh -= mpn_sub_1 (qp + l, qp + l, k, MPFR_LIMB_ONE);
+ cy -= mpn_add_n (np + l, np + l, dp, n);
+ }
+
+ /* now it remains {np,n+l} to divide by D */
+ cy = mpfr_divhigh_n (qp, np + k, dp + k, l);
+ qh += mpn_add_1 (qp + l, qp + l, k, cy);
+ MPFR_TMP_FREE(marker);
+
+ return qh;
+}