diff options
author | zimmerma <zimmerma@280ebfd0-de03-0410-8827-d642c229c3f4> | 2010-09-29 16:03:04 +0000 |
---|---|---|
committer | zimmerma <zimmerma@280ebfd0-de03-0410-8827-d642c229c3f4> | 2010-09-29 16:03:04 +0000 |
commit | 185793f681db0cb4b8fb4c03a072a79daa64da00 (patch) | |
tree | 3542ce4dbe4d259cc64ea9ed5ae542edd38ee789 /src/mulders.c | |
parent | 9cfe7f7883710e842da212956360ecd79ecc40a6 (diff) | |
download | mpfr-185793f681db0cb4b8fb4c03a072a79daa64da00.tar.gz |
[src/mulders.c] added new function mpfr_divhigh_n() for short division
(not used yet), fixed comments and added error analysis
in mpfr_mulhigh_n() and mpfr_sqrhigh_n()
[src/sqrt.c] fixed ill-placed MPFR_TMP_MARK
[src/mparam_h.in] added default MPFR_DIVHIGH_TAB for mpfr_divhigh_n()
[src/round_p.c] typo
[src/mpfr-impl.h] added prototype for mpfr_divhigh_n
[src/mul.c] added comment, simplified code
[tune/tuneup.c] added tuning for mpfr_divhigh_n(), increased MAX_STEPS to get
a better tuning (will take longer), set tolerance to 1.0
git-svn-id: svn://scm.gforge.inria.fr/svn/mpfr/trunk@7183 280ebfd0-de03-0410-8827-d642c229c3f4
Diffstat (limited to 'src/mulders.c')
-rw-r--r-- | src/mulders.c | 93 |
1 files changed, 78 insertions, 15 deletions
diff --git a/src/mulders.c b/src/mulders.c index 6a6a04f64..057474165 100644 --- a/src/mulders.c +++ b/src/mulders.c @@ -37,19 +37,23 @@ static short mulhigh_ktab[] = {MPFR_MULHIGH_TAB}; #endif /* Put in rp[n..2n-1] an approximation of the n high limbs - of {mp, n} * {np, n}. - The error is at worst of ln(n) for rp[n] and rp[n-1] is totally wrong */ + of {up, n} * {vp, n}. The error is less than n ulps of rp[n]. */ static void mpfr_mulhigh_n_basecase (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n) { mp_size_t i; - rp += n-1; - umul_ppmm (rp[1], rp[0], up[n-1], vp[0]); + rp += n - 1; + umul_ppmm (rp[1], rp[0], up[n-1], vp[0]); /* we neglect up[0..n-2]*vp[0], + which is less than B^n */ for (i = 1 ; i < n ; i++) - rp[i+1] = mpn_addmul_1 (rp, up + (n - i - 1), i+1, vp[i]); + /* here, we neglect up[0..n-i-2] * vp[i], which is less than B^n too */ + rp[i + 1] = mpn_addmul_1 (rp, up + (n - i - 1), i + 1, vp[i]); + /* in total, we neglect less than n*B^n, i.e., n ulps of rp[n]. */ } +/* Put in rp[n..2n-1] an approximation of the n high limbs + of {np, n} * {mp, n}. The error is less than n ulps of rp[n]. */ void mpfr_mulhigh_n (mp_ptr rp, mp_srcptr np, mp_srcptr mp, mp_size_t n) { @@ -59,22 +63,26 @@ mpfr_mulhigh_n (mp_ptr rp, mp_srcptr np, mp_srcptr mp, mp_size_t n) k = MPFR_LIKELY (n < MPFR_MULHIGH_TAB_SIZE) ? mulhigh_ktab[n] : 3*(n/4); MPFR_ASSERTD (k == -1 || k == 0 || (k > n/2 && k < n)); if (k < 0) - mpn_mul_basecase (rp, np, n, mp, n); + mpn_mul_basecase (rp, np, n, mp, n); /* result is exact, no error */ else if (k == 0) - mpfr_mulhigh_n_basecase (rp, np, mp, n); + mpfr_mulhigh_n_basecase (rp, np, mp, n); /* basecase error < n ulps */ else if (n > MUL_FFT_THRESHOLD) - mpn_mul_n (rp, np, mp, n); + mpn_mul_n (rp, np, mp, n); /* result is exact, no error */ else { mp_size_t l = n - k; mp_limb_t cy; mpn_mul_n (rp + 2 * l, np + l, mp + l, k); /* fills rp[2l..2n-1] */ - mpfr_mulhigh_n (rp, np + k, mp, l); /* fills rp[l-1..2l-1] */ + mpfr_mulhigh_n (rp, np + k, mp, l); /* fills rp[l-1..2l-1] */ cy = mpn_add_n (rp + n - 1, rp + n - 1, rp + l - 1, l + 1); - mpfr_mulhigh_n (rp, np, mp + k, l); /* fills rp[l-1..2l-1] */ + mpfr_mulhigh_n (rp, np, mp + k, l); /* fills rp[l-1..2l-1] */ cy += mpn_add_n (rp + n - 1, rp + n - 1, rp + l - 1, l + 1); mpn_add_1 (rp + n + l, rp + n + l, k, cy); /* propagate carry */ + /* the neglected terms are in the two recursive calls to mpfr_mulhigh_n, + where in each case by induction the error is at most l ulps, thus + the total error is at most 2l ulps. Since k > n/2, l < n/2 which + gives an error < n ulps. */ } } @@ -85,6 +93,8 @@ static short sqrhigh_ktab[] = {MPFR_SQRHIGH_TAB}; #define MPFR_SQRHIGH_TAB_SIZE (sizeof(sqrhigh_ktab) / sizeof(sqrhigh_ktab[0])) #endif +/* Put in rp[n..2n-1] an approximation of the n high limbs + of {np, n}^2. The error is less than n ulps of rp[n]. */ void mpfr_sqrhigh_n (mp_ptr rp, mp_srcptr np, mp_size_t n) { @@ -109,13 +119,66 @@ mpfr_sqrhigh_n (mp_ptr rp, mp_srcptr np, mp_size_t n) mpn_sqr_n (rp + 2 * l, np + l, k); /* fills rp[2l..2n-1] */ mpfr_mulhigh_n (rp, np, np + k, l); /* fills rp[l-1..2l-1] */ /* {rp+n-1,l+1} += 2 * {rp+l-1,l+1} */ - /* mpn_lshift is 30% to 50% faster than mpn_add_n on Core 2 */ -#if 0 - cy = mpn_add_n (rp + n - 1, rp + n - 1, rp + l - 1, l + 1); -#else cy = mpn_lshift (rp + l - 1, rp + l - 1, l + 1, 1); -#endif cy += mpn_add_n (rp + n - 1, rp + n - 1, rp + l - 1, l + 1); mpn_add_1 (rp + n + l, rp + n + l, k, cy); /* propagate carry */ } } + +#ifdef MPFR_DIVHIGH_TAB_SIZE +static short divhigh_ktab[MPFR_DIVHIGH_TAB_SIZE]; +#else +static short divhigh_ktab[] = {MPFR_DIVHIGH_TAB}; +#define MPFR_DIVHIGH_TAB_SIZE (sizeof(divhigh_ktab) / sizeof(divhigh_ktab[0])) +#endif + +/* put in {qp, n} an approximation of N={np, 2*n} divided by D={dp, n}, + with the most significant limb of the quotient as return value (0 or 1). + Assumes the most significant bit of D is set. Clobbers N. + THIS IS PRELIMINARY CODE, DO NOT USE IT. +*/ +mp_limb_t +mpfr_divhigh_n (mp_ptr qp, mp_ptr np, mp_ptr dp, mp_size_t n) +{ + mp_size_t k, l; + mp_limb_t qh, cy; + mp_ptr tp; + int count = 0; + MPFR_TMP_DECL(marker); + + k = divhigh_ktab[n]; + MPFR_ASSERTD ((n+1)/2 <= k && k <= n); /* we should have n/2 <= k <= n */ + + /* for k=0, we use a full division (mpn_divrem) */ + + if (k == n) + return mpn_divrem (qp, 0, np, 2 * n, dp, n); + + MPFR_TMP_MARK (marker); + l = n - k; + /* first divide the most significant 2k limbs from N by the most significant + k limbs of D */ + qh = mpn_divrem (qp + l, 0, np + 2 * l, 2 * k, dp + l, k); /* exact */ + + /* it remains {np,2l+k} = {np,n+l} as remainder */ + + /* now we have to subtract high(Q1)*D0 where Q1={qp+l,k} and D0={dp,l} */ + tp = MPFR_TMP_ALLOC (2 * l * sizeof (mp_limb_t)); + mpfr_mulhigh_n (tp, qp + k, dp, l); + /* we are only interested in the upper l limbs from {tp,2l} */ + cy = mpn_sub_n (np + k, np + k, tp, 2 * l); + while (cy > 0) /* Q1 was too large: subtract 1 to Q1 and add D to np+l */ + { + if (count++ >= 2) + abort(); + qh -= mpn_sub_1 (qp + l, qp + l, k, MPFR_LIMB_ONE); + cy -= mpn_add_n (np + l, np + l, dp, n); + } + + /* now it remains {np,n+l} to divide by D */ + cy = mpfr_divhigh_n (qp, np + k, dp + k, l); + qh += mpn_add_1 (qp + l, qp + l, k, cy); + MPFR_TMP_FREE(marker); + + return qh; +} |