diff options
author | zimmerma <zimmerma@280ebfd0-de03-0410-8827-d642c229c3f4> | 2010-09-24 18:27:25 +0000 |
---|---|---|
committer | zimmerma <zimmerma@280ebfd0-de03-0410-8827-d642c229c3f4> | 2010-09-24 18:27:25 +0000 |
commit | cb1da31f3a0f2b5f6285240e2597b488240481ef (patch) | |
tree | 8e214229ae7cc3f910e06f9e073289e735d2a286 /src | |
parent | f4e772a8ee554dda49f92576cfcfc991360a19f0 (diff) | |
download | mpfr-cb1da31f3a0f2b5f6285240e2597b488240481ef.tar.gz |
now use Mulders' algorithm also for mpfr_sqr, provides nice speed improvement
in all functions that perform squarings
git-svn-id: svn://scm.gforge.inria.fr/svn/mpfr/trunk@7166 280ebfd0-de03-0410-8827-d642c229c3f4
Diffstat (limited to 'src')
-rw-r--r-- | src/mparam_h.in | 5 | ||||
-rw-r--r-- | src/mul.c | 27 | ||||
-rw-r--r-- | src/sqr.c | 3 |
3 files changed, 24 insertions, 11 deletions
diff --git a/src/mparam_h.in b/src/mparam_h.in index cb1a8169a..9c4a88d45 100644 --- a/src/mparam_h.in +++ b/src/mparam_h.in @@ -1409,7 +1409,10 @@ http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., # define MPFR_SQRHIGH_TAB -1,-1,-1,-1,-1,-1,-1,-1,0,0,0,0,0,0,0,0,0 #endif #ifndef MPFR_MUL_THRESHOLD -# define MPFR_MUL_THRESHOLD 40 +# define MPFR_MUL_THRESHOLD 20 +#endif +#ifndef MPFR_SQR_THRESHOLD +# define MPFR_SQR_THRESHOLD 20 #endif #ifndef MPFR_EXP_2_THRESHOLD # define MPFR_EXP_2_THRESHOLD 100 /* bits */ @@ -347,7 +347,8 @@ mpfr_mul (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode) else /* Mulders' mulhigh. Disable if squaring, since it is not tuned for such a case */ - if (MPFR_UNLIKELY (bn > MPFR_MUL_THRESHOLD && b != c)) + if (MPFR_UNLIKELY ((bn > MPFR_MUL_THRESHOLD && b != c) + || (bn > MPFR_SQR_THRESHOLD && b == c))) { mp_limb_t *bp, *cp; mp_size_t n; @@ -426,14 +427,17 @@ mpfr_mul (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode) bp[0] = 0; MPN_COPY (bp + 1, MPFR_MANT (b) + bn - n, n); } - if (cn > n) - cp --; /* FIXME: Could this happen? */ - else - { - cp = (mp_limb_t*) MPFR_TMP_ALLOC ((n+1) * sizeof (mp_limb_t)); - cp[0] = 0; - MPN_COPY (cp + 1, MPFR_MANT (c) + cn - n, n); - } + if (b != c) + { + if (cn > n) + cp --; /* FIXME: Could this happen? */ + else + { + cp = (mp_limb_t*) MPFR_TMP_ALLOC ((n+1) * sizeof (mp_limb_t)); + cp[0] = 0; + MPN_COPY (cp + 1, MPFR_MANT (c) + cn - n, n); + } + } /* We will compute with one extra limb */ n++; p = n * GMP_NUMB_BITS - MPFR_INT_CEIL_LOG2 (n + 2); @@ -448,7 +452,10 @@ mpfr_mul (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode) } MPFR_LOG_MSG (("Use mpfr_mulhigh (%lu VS %lu)\n", MPFR_PREC (a), p)); /* Compute an approximation of the product of b and c */ - mpfr_mulhigh_n (tmp + k - 2 * n, bp, cp, n); + if (b != c) + mpfr_mulhigh_n (tmp + k - 2 * n, bp, cp, n); + else + mpfr_sqrhigh_n (tmp + k - 2 * n, bp, n); /* now tmp[0]..tmp[k-1] contains the product of both mantissa, with tmp[k-1]>=2^(GMP_NUMB_BITS-2) */ b1 = tmp[k-1] >> (GMP_NUMB_BITS - 1); /* msb from the product */ @@ -60,6 +60,9 @@ mpfr_sqr (mpfr_ptr a, mpfr_srcptr b, mpfr_rnd_t rnd_mode) tn = 1 + (2 * bq - 1) / GMP_NUMB_BITS; /* number of limbs of square, 2*bn or 2*bn-1 */ + if (MPFR_UNLIKELY(bn > MPFR_SQR_THRESHOLD)) + return mpfr_mul (a, b, b, rnd_mode); + MPFR_TMP_MARK(marker); tmp = (mp_limb_t *) MPFR_TMP_ALLOC((size_t) 2 * bn * BYTES_PER_MP_LIMB); |