summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorzimmerma <zimmerma@280ebfd0-de03-0410-8827-d642c229c3f4>2010-09-24 18:27:25 +0000
committerzimmerma <zimmerma@280ebfd0-de03-0410-8827-d642c229c3f4>2010-09-24 18:27:25 +0000
commitcb1da31f3a0f2b5f6285240e2597b488240481ef (patch)
tree8e214229ae7cc3f910e06f9e073289e735d2a286 /src
parentf4e772a8ee554dda49f92576cfcfc991360a19f0 (diff)
downloadmpfr-cb1da31f3a0f2b5f6285240e2597b488240481ef.tar.gz
now use Mulders' algorithm also for mpfr_sqr, provides nice speed improvement
in all functions that perform squarings git-svn-id: svn://scm.gforge.inria.fr/svn/mpfr/trunk@7166 280ebfd0-de03-0410-8827-d642c229c3f4
Diffstat (limited to 'src')
-rw-r--r--src/mparam_h.in5
-rw-r--r--src/mul.c27
-rw-r--r--src/sqr.c3
3 files changed, 24 insertions, 11 deletions
diff --git a/src/mparam_h.in b/src/mparam_h.in
index cb1a8169a..9c4a88d45 100644
--- a/src/mparam_h.in
+++ b/src/mparam_h.in
@@ -1409,7 +1409,10 @@ http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
# define MPFR_SQRHIGH_TAB -1,-1,-1,-1,-1,-1,-1,-1,0,0,0,0,0,0,0,0,0
#endif
#ifndef MPFR_MUL_THRESHOLD
-# define MPFR_MUL_THRESHOLD 40
+# define MPFR_MUL_THRESHOLD 20
+#endif
+#ifndef MPFR_SQR_THRESHOLD
+# define MPFR_SQR_THRESHOLD 20
#endif
#ifndef MPFR_EXP_2_THRESHOLD
# define MPFR_EXP_2_THRESHOLD 100 /* bits */
diff --git a/src/mul.c b/src/mul.c
index a02ad788f..4a4715462 100644
--- a/src/mul.c
+++ b/src/mul.c
@@ -347,7 +347,8 @@ mpfr_mul (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode)
else
/* Mulders' mulhigh. Disable if squaring, since it is not tuned for
such a case */
- if (MPFR_UNLIKELY (bn > MPFR_MUL_THRESHOLD && b != c))
+ if (MPFR_UNLIKELY ((bn > MPFR_MUL_THRESHOLD && b != c)
+ || (bn > MPFR_SQR_THRESHOLD && b == c)))
{
mp_limb_t *bp, *cp;
mp_size_t n;
@@ -426,14 +427,17 @@ mpfr_mul (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode)
bp[0] = 0;
MPN_COPY (bp + 1, MPFR_MANT (b) + bn - n, n);
}
- if (cn > n)
- cp --; /* FIXME: Could this happen? */
- else
- {
- cp = (mp_limb_t*) MPFR_TMP_ALLOC ((n+1) * sizeof (mp_limb_t));
- cp[0] = 0;
- MPN_COPY (cp + 1, MPFR_MANT (c) + cn - n, n);
- }
+ if (b != c)
+ {
+ if (cn > n)
+ cp --; /* FIXME: Could this happen? */
+ else
+ {
+ cp = (mp_limb_t*) MPFR_TMP_ALLOC ((n+1) * sizeof (mp_limb_t));
+ cp[0] = 0;
+ MPN_COPY (cp + 1, MPFR_MANT (c) + cn - n, n);
+ }
+ }
/* We will compute with one extra limb */
n++;
p = n * GMP_NUMB_BITS - MPFR_INT_CEIL_LOG2 (n + 2);
@@ -448,7 +452,10 @@ mpfr_mul (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode)
}
MPFR_LOG_MSG (("Use mpfr_mulhigh (%lu VS %lu)\n", MPFR_PREC (a), p));
/* Compute an approximation of the product of b and c */
- mpfr_mulhigh_n (tmp + k - 2 * n, bp, cp, n);
+ if (b != c)
+ mpfr_mulhigh_n (tmp + k - 2 * n, bp, cp, n);
+ else
+ mpfr_sqrhigh_n (tmp + k - 2 * n, bp, n);
/* now tmp[0]..tmp[k-1] contains the product of both mantissa,
with tmp[k-1]>=2^(GMP_NUMB_BITS-2) */
b1 = tmp[k-1] >> (GMP_NUMB_BITS - 1); /* msb from the product */
diff --git a/src/sqr.c b/src/sqr.c
index 2ac7d0b2f..0db9803e5 100644
--- a/src/sqr.c
+++ b/src/sqr.c
@@ -60,6 +60,9 @@ mpfr_sqr (mpfr_ptr a, mpfr_srcptr b, mpfr_rnd_t rnd_mode)
tn = 1 + (2 * bq - 1) / GMP_NUMB_BITS; /* number of limbs of square,
2*bn or 2*bn-1 */
+ if (MPFR_UNLIKELY(bn > MPFR_SQR_THRESHOLD))
+ return mpfr_mul (a, b, b, rnd_mode);
+
MPFR_TMP_MARK(marker);
tmp = (mp_limb_t *) MPFR_TMP_ALLOC((size_t) 2 * bn * BYTES_PER_MP_LIMB);