summaryrefslogtreecommitdiff
path: root/tune
diff options
context:
space:
mode:
authorzimmerma <zimmerma@280ebfd0-de03-0410-8827-d642c229c3f4>2010-09-24 18:27:25 +0000
committerzimmerma <zimmerma@280ebfd0-de03-0410-8827-d642c229c3f4>2010-09-24 18:27:25 +0000
commitcb1da31f3a0f2b5f6285240e2597b488240481ef (patch)
tree8e214229ae7cc3f910e06f9e073289e735d2a286 /tune
parentf4e772a8ee554dda49f92576cfcfc991360a19f0 (diff)
downloadmpfr-cb1da31f3a0f2b5f6285240e2597b488240481ef.tar.gz
now use Mulders' algorithm also for mpfr_sqr, provides nice speed improvement
in all functions that perform squarings git-svn-id: svn://scm.gforge.inria.fr/svn/mpfr/trunk@7166 280ebfd0-de03-0410-8827-d642c229c3f4
Diffstat (limited to 'tune')
-rw-r--r--tune/tuneup.c61
1 files changed, 57 insertions, 4 deletions
diff --git a/tune/tuneup.c b/tune/tuneup.c
index c60bd5c50..8e37ca14b 100644
--- a/tune/tuneup.c
+++ b/tune/tuneup.c
@@ -32,10 +32,10 @@ http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
int verbose;
+/* template for an unary function */
/* s->size: precision of both input and output
s->xp : Mantissa of first input
s->yp : mantissa of second input */
-
#define SPEED_MPFR_FUNC(mean_fun) \
do \
{ \
@@ -113,6 +113,7 @@ int verbose;
} \
while (0)
+/* template for a function like mpfr_mul */
#define SPEED_MPFR_OP(mean_fun) \
do \
{ \
@@ -154,6 +155,44 @@ int verbose;
} \
while (0)
+/* special template for mpfr_mul(a,b,b) */
+#define SPEED_MPFR_SQR(mean_fun) \
+ do \
+ { \
+ unsigned i; \
+ mp_ptr wp; \
+ double t; \
+ mpfr_t w, x; \
+ mp_size_t size; \
+ MPFR_TMP_DECL (marker); \
+ \
+ SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN); \
+ SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX); \
+ MPFR_TMP_MARK (marker); \
+ \
+ size = (s->size-1)/GMP_NUMB_BITS+1; \
+ s->xp[size-1] |= MPFR_LIMB_HIGHBIT; \
+ MPFR_TMP_INIT1 (s->xp, x, s->size); \
+ MPFR_SET_EXP (x, 0); \
+ \
+ MPFR_TMP_INIT (wp, w, s->size, size); \
+ \
+ speed_operand_src (s, s->xp, size); \
+ speed_operand_dst (s, wp, size); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ mean_fun (w, x, x, MPFR_RNDN); \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ MPFR_TMP_FREE (marker); \
+ return t; \
+ } \
+ while (0)
+
/* s->size: precision of both input and output
s->xp : Mantissa of first input
s->r : exponent
@@ -234,18 +273,24 @@ speed_mpfr_sincos (struct speed_params *s)
SPEED_MPFR_FUNC2 (mpfr_sin_cos);
}
-/* Setup mpfr_mul */
+/* Setup mpfr_mul and mpfr_sqr */
mpfr_prec_t mpfr_mul_threshold;
+mpfr_prec_t mpfr_sqr_threshold;
#undef MPFR_MUL_THRESHOLD
#define MPFR_MUL_THRESHOLD mpfr_mul_threshold
+#undef MPFR_SQR_THRESHOLD
+#define MPFR_SQR_THRESHOLD mpfr_sqr_threshold
#include "mul.c"
static double
speed_mpfr_mul (struct speed_params *s)
{
SPEED_MPFR_OP (mpfr_mul);
}
-
-
+static double
+speed_mpfr_sqr (struct speed_params *s)
+{
+ SPEED_MPFR_SQR (mpfr_mul);
+}
/************************************************
* Common functions (inspired by GMP function) *
@@ -913,6 +958,14 @@ all (const char *filename)
fprintf (f, "#define MPFR_MUL_THRESHOLD %lu /* limbs */\n",
(unsigned long) (mpfr_mul_threshold - 1) / GMP_NUMB_BITS + 1);
+ /* Tune mpfr_sqr (threshold is in limbs, but it doesn't matter too much) */
+ if (verbose)
+ printf ("Tuning mpfr_sqr...\n");
+ tune_simple_func (&mpfr_sqr_threshold, speed_mpfr_sqr,
+ 2*GMP_NUMB_BITS+1);
+ fprintf (f, "#define MPFR_SQR_THRESHOLD %lu /* limbs */\n",
+ (unsigned long) (mpfr_sqr_threshold - 1) / GMP_NUMB_BITS + 1);
+
/* Tune mpfr_exp_2 */
if (verbose)
printf ("Tuning mpfr_exp_2...\n");