summaryrefslogtreecommitdiff
path: root/tune
diff options
context:
space:
mode:
authorvlefevre <vlefevre@280ebfd0-de03-0410-8827-d642c229c3f4>2010-08-17 09:10:13 +0000
committervlefevre <vlefevre@280ebfd0-de03-0410-8827-d642c229c3f4>2010-08-17 09:10:13 +0000
commitc9583bdfe064e1069828e518533f7bc29a8fdddb (patch)
tree2400842d4095628b8486fbeabaf7bc7b8af4ed02 /tune
parent50ac5b5985174201c7fa6e20496cd2b096107001 (diff)
downloadmpfr-c9583bdfe064e1069828e518533f7bc29a8fdddb.tar.gz
Source reorganization. In short:
* Added directories and moved related files into them: - src for the MPFR source files (to build the library). - doc for documentation files (except INSTALL, README...). - tools for various tools (scripts) and mbench. - tune for tuneup-related source files. - other for other source files (not distributed in tarballs). Existing directories: - tests for the source files of the test suite (make check). - examples for examples. - m4 for m4 files. * Renamed configure.in to configure.ac. * Added/updated Makefile.am files where needed. * Updated acinclude.m4 and configure.ac (AC_CONFIG_FILES line). * Updated the documentation (INSTALL, README, doc/README.dev and doc/mpfr.texi). * Updated NEWS and TODO. * Updated the scripts now in tools. The following script was used: #!/usr/bin/env zsh svn mkdir doc other src tools tune svn mv ${${(M)$(sed -n '/libmpfr_la_SOURCES/,/[^\]$/p' \ Makefile.am):#*.[ch]}:#get_patches.c} mparam_h.in \ round_raw_generic.c jyn_asympt.c src svn mv mbench check_inits_clears coverage get_patches.sh mpfrlint \ nightly-test update-patchv update-version tools svn mv bidimensional_sample.c speed.c tuneup.c tune svn mv *.{c,h} other svn mv FAQ.html README.dev algorithm* faq.xsl fdl.texi mpfr.texi \ update-faq doc svn mv configure.in configure.ac svn cp Makefile.am src/Makefile.am svn rm replace_all [Modifying some files, see above] svn add doc/Makefile.am svn add tune/Makefile.am git-svn-id: svn://scm.gforge.inria.fr/svn/mpfr/trunk@7087 280ebfd0-de03-0410-8827-d642c229c3f4
Diffstat (limited to 'tune')
-rw-r--r--tune/Makefile.am38
-rw-r--r--tune/bidimensional_sample.c468
-rw-r--r--tune/speed.c283
-rw-r--r--tune/tuneup.c1017
4 files changed, 1806 insertions, 0 deletions
diff --git a/tune/Makefile.am b/tune/Makefile.am
new file mode 100644
index 000000000..a39af71cd
--- /dev/null
+++ b/tune/Makefile.am
@@ -0,0 +1,38 @@
+# Copyright 2010 Free Software Foundation, Inc.
+# This Makefile.am is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+
+EXTRA_PROGRAMS = tuneup speed bidimensional_sample
+
+tuneup_SOURCES = tuneup.c
+tuneup_LDADD = -lspeed $(top_builddir)/src/libmpfr.la
+tuneup_LDFLAGS = -static
+
+speed_SOURCES = speed.c
+speed_LDADD = -lspeed $(top_builddir)/src/libmpfr.la
+speed_LDFLAGS = -static
+
+bidimensional_sample_SOURCES = bidimensional_sample.c
+bidimensional_sample_LDADD = -lspeed $(top_builddir)/src/libmpfr.la
+bidimensional_sample_LDFLAGS = -static
+
+INCLUDES = -I$(top_srcdir)/src -I$(top_builddir)/src
+
+tune:
+ $(MAKE) $(AM_MAKEFLAGS) tuneup$(EXEEXT)
+ ./tuneup$(EXEEXT) -v
+ mv mparam.h $(top_builddir)/src/
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) clean
+ cd $(top_builddir)/src && $(MAKE) $(AM_MAKEFLAGS) libmpfr.la
+
+$(top_builddir)/src/libmpfr.la:
+ cd $(top_builddir)/src && $(MAKE) $(AM_MAKEFLAGS) libmpfr.la
+
+CLEANFILES = $(EXTRA_PROGRAMS) mparam.h
diff --git a/tune/bidimensional_sample.c b/tune/bidimensional_sample.c
new file mode 100644
index 000000000..f738e5b9c
--- /dev/null
+++ b/tune/bidimensional_sample.c
@@ -0,0 +1,468 @@
+/* [Description]
+
+Copyright 2010 Free Software Foundation, Inc.
+Contributed by the Arenaire and Caramel projects, INRIA.
+
+This file is part of the GNU MPFR Library.
+
+The GNU MPFR Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MPFR Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see
+http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
+51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
+
+#include <stdlib.h>
+#include <time.h>
+
+#define MPFR_NEED_LONGLONG_H
+#include "mpfr-impl.h"
+
+#undef _PROTO
+#define _PROTO __GMP_PROTO
+#include "speed.h"
+
+/* Let f be a function for which we have several implementations f1, f2... */
+/* We wish to have a quick overview of which implementation is the best */
+/* in function of the point x where f(x) is computed and of the prectision */
+/* prec requested by the user. */
+/* This is performed by drawing a 2D graphic with color indicating which */
+/* method is the best. */
+/* For building this graphic, the following structur is used (see the core */
+/* of generate_2D_sample for an explanation of each field. */
+struct speed_params2D
+{
+ /* x-window: [min_x, max_x] or [2^min_x, 2^max_x] */
+ /* or [-2^(max_x), -2^(min_x)] U [2^min_x, 2^max_x] */
+ /* depending on the value of logarithmic_scale_x */
+ double min_x;
+ double max_x;
+
+ /* prec-window: [min_prec, max_prec] */
+ mpfr_prec_t min_prec;
+ mpfr_prec_t max_prec;
+
+ /* number of sample points for the x-axis and the prec-axis */
+ int nb_points_x;
+ int nb_points_prec;
+
+ /* should the sample points be logarithmically scaled or not */
+ int logarithmic_scale_x;
+ int logarithmic_scale_prec;
+
+ /* list of functions g1, g2... measuring the execution time of f1, f2... */
+ /* when given a point x and a precision prec stored in s. */
+ /* We use s->xp to store the significant of x, s->r to store its exponent */
+ /* s->align_xp to store its sign, and s->size to store prec. */
+ double (**speed_funcs) (struct speed_params *s);
+};
+
+/* Given an array t of nb_functions double indicating the timings of several */
+/* implementations, return i, such that t[i] is the best timing. */
+int
+find_best (double *t, int nb_functions)
+{
+ int i, ibest;
+ double best;
+
+ if (nb_functions<=0)
+ {
+ fprintf (stderr, "There is no function\n");
+ abort ();
+ }
+
+ ibest = 0;
+ best = t[0];
+ for (i=1; i<nb_functions; i++)
+ {
+ if (t[i]<best)
+ {
+ best = t[i];
+ ibest = i;
+ }
+ }
+
+ return ibest;
+}
+
+void generate_2D_sample (FILE *output, struct speed_params2D param)
+{
+ mpfr_t temp;
+ double incr_prec;
+ mpfr_t incr_x;
+ mpfr_t x, x2;
+ double prec;
+ struct speed_params s;
+ int i;
+ int test;
+ int nb_functions;
+ double *t; /* store the timing of each implementation */
+
+ /* We first determine how many implementations we have */
+ nb_functions = 0;
+ while (param.speed_funcs[nb_functions] != NULL)
+ nb_functions++;
+
+ t = malloc (nb_functions * sizeof (double));
+ if (t == NULL)
+ {
+ fprintf (stderr, "Can't allocate memory.\n");
+ abort ();
+ }
+
+
+ mpfr_init2 (temp, MPFR_SMALL_PRECISION);
+
+ /* The precision is sampled from min_prec to max_prec with */
+ /* approximately nb_points_prec points. If logarithmic_scale_prec */
+ /* is true, the precision is multiplied by incr_prec at each */
+ /* step. Otherwise, incr_prec is added at each step. */
+ if (param.logarithmic_scale_prec)
+ {
+ mpfr_set_ui (temp, (unsigned long int)param.max_prec, MPFR_RNDU);
+ mpfr_div_ui (temp, temp, (unsigned long int)param.min_prec, MPFR_RNDU);
+ mpfr_root (temp, temp,
+ (unsigned long int)param.nb_points_prec, MPFR_RNDU);
+ incr_prec = mpfr_get_d (temp, MPFR_RNDU);
+ }
+ else
+ {
+ incr_prec = (double)param.max_prec - (double)param.min_prec;
+ incr_prec = incr_prec/((double)param.nb_points_prec);
+ }
+
+ /* The points x are sampled according to the following rule: */
+ /* If logarithmic_scale_x = 0: */
+ /* nb_points_x points are equally distributed between min_x and max_x */
+ /* If logarithmic_scale_x = 1: */
+ /* nb_points_x points are sampled from 2^(min_x) to 2^(max_x). At */
+ /* each step, the current point is multiplied by incr_x. */
+ /* If logarithmic_scale_x = -1: */
+ /* nb_points_x/2 points are sampled from -2^(max_x) to -2^(min_x) */
+ /* (at each step, the current point is divided by incr_x); and */
+ /* nb_points_x/2 points are sampled from 2^(min_x) to 2^(max_x) */
+ /* (at each step, the current point is multiplied by incr_x). */
+ mpfr_init2 (incr_x, param.max_prec);
+ if (param.logarithmic_scale_x == 0)
+ {
+ mpfr_set_d (incr_x,
+ (param.max_x - param.min_x)/(double)param.nb_points_x,
+ MPFR_RNDU);
+ }
+ else if (param.logarithmic_scale_x == -1)
+ {
+ mpfr_set_d (incr_x,
+ 2.*(param.max_x - param.min_x)/(double)param.nb_points_x,
+ MPFR_RNDU);
+ mpfr_exp2 (incr_x, incr_x, MPFR_RNDU);
+ }
+ else
+ { /* other values of param.logarithmic_scale_x are considered as 1 */
+ mpfr_set_d (incr_x,
+ (param.max_x - param.min_x)/(double)param.nb_points_x,
+ MPFR_RNDU);
+ mpfr_exp2 (incr_x, incr_x, MPFR_RNDU);
+ }
+
+ /* Main loop */
+ mpfr_init2 (x, param.max_prec);
+ mpfr_init2 (x2, param.max_prec);
+ prec = (double)param.min_prec;
+ while (prec <= param.max_prec)
+ {
+ printf ("prec = %d\n", (int)prec);
+ if (param.logarithmic_scale_x == 0)
+ mpfr_set_d (temp, param.min_x, MPFR_RNDU);
+ else if (param.logarithmic_scale_x == -1)
+ {
+ mpfr_set_d (temp, param.max_x, MPFR_RNDD);
+ mpfr_exp2 (temp, temp, MPFR_RNDD);
+ mpfr_neg (temp, temp, MPFR_RNDU);
+ }
+ else
+ {
+ mpfr_set_d (temp, param.min_x, MPFR_RNDD);
+ mpfr_exp2 (temp, temp, MPFR_RNDD);
+ }
+
+ /* We perturb x a little bit, in order to avoid trailing zeros that */
+ /* might change the behavior of algorithms. */
+ mpfr_const_pi (x, MPFR_RNDN);
+ mpfr_div_2ui (x, x, 7, MPFR_RNDN);
+ mpfr_add_ui (x, x, 1, MPFR_RNDN);
+ mpfr_mul (x, x, temp, MPFR_RNDN);
+
+ test = 1;
+ while (test)
+ {
+ mpfr_fprintf (output, "%e\t", mpfr_get_d (x, MPFR_RNDN));
+ mpfr_fprintf (output, "%Pu\t", (mpfr_prec_t)prec);
+
+ s.r = (mp_limb_t)mpfr_get_exp (x);
+ s.size = (mpfr_prec_t)prec;
+ s.align_xp = (mpfr_sgn (x) > 0)?1:2;
+ mpfr_set_prec (x2, (mpfr_prec_t)prec);
+ mpfr_set (x2, x, MPFR_RNDU);
+ s.xp = x2->_mpfr_d;
+
+ for (i=0; i<nb_functions; i++)
+ {
+ t[i] = speed_measure (param.speed_funcs[i], &s);
+ mpfr_fprintf (output, "%e\t", t[i]);
+ }
+ fprintf (output, "%d\n", 1 + find_best (t, nb_functions));
+
+ if (param.logarithmic_scale_x == 0)
+ {
+ mpfr_add (x, x, incr_x, MPFR_RNDU);
+ if (mpfr_cmp_d (x, param.max_x) > 0)
+ test=0;
+ }
+ else
+ {
+ if (mpfr_sgn (x) < 0 )
+ { /* if x<0, it means that logarithmic_scale_x=-1 */
+ mpfr_div (x, x, incr_x, MPFR_RNDU);
+ mpfr_abs (temp, x, MPFR_RNDD);
+ mpfr_log2 (temp, temp, MPFR_RNDD);
+ if (mpfr_cmp_d (temp, param.min_x) < 0)
+ mpfr_neg (x, x, MPFR_RNDN);
+ }
+ else
+ {
+ mpfr_mul (x, x, incr_x, MPFR_RNDU);
+ mpfr_set (temp, x, MPFR_RNDD);
+ mpfr_log2 (temp, temp, MPFR_RNDD);
+ if (mpfr_cmp_d (temp, param.max_x) > 0)
+ test=0;
+ }
+ }
+ }
+
+ prec = ( (param.logarithmic_scale_prec) ? (prec * incr_prec)
+ : (prec + incr_prec) );
+ fprintf (output, "\n");
+ }
+
+ free (t);
+ mpfr_clear (incr_x);
+ mpfr_clear (x);
+ mpfr_clear (x2);
+ mpfr_clear (temp);
+
+ return;
+}
+
+#define SPEED_MPFR_FUNC_2D(mean_func) \
+ do \
+ { \
+ double t; \
+ unsigned i; \
+ mpfr_t w, x; \
+ mp_size_t size; \
+ \
+ SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN); \
+ SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX); \
+ \
+ size = (s->size-1)/GMP_NUMB_BITS+1; \
+ s->xp[size-1] |= MPFR_LIMB_HIGHBIT; \
+ MPFR_TMP_INIT1 (s->xp, x, s->size); \
+ MPFR_SET_EXP (x, (mpfr_exp_t) s->r); \
+ if (s->align_xp == 2) MPFR_SET_NEG (x); \
+ \
+ mpfr_init2 (w, s->size); \
+ speed_starttime (); \
+ i = s->reps; \
+ \
+ do \
+ mean_func (w, x, MPFR_RNDN); \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ mpfr_clear (w); \
+ return t; \
+ } \
+ while (0)
+
+mpfr_prec_t mpfr_exp_2_threshold;
+mpfr_prec_t old_threshold = MPFR_EXP_2_THRESHOLD;
+#undef MPFR_EXP_2_THRESHOLD
+#define MPFR_EXP_2_THRESHOLD mpfr_exp_2_threshold
+#include "exp_2.c"
+
+double
+timing_exp1 (struct speed_params *s)
+{
+ mpfr_exp_2_threshold = s->size+1;
+ SPEED_MPFR_FUNC_2D (mpfr_exp_2);
+}
+
+double
+timing_exp2 (struct speed_params *s)
+{
+ mpfr_exp_2_threshold = s->size-1;
+ SPEED_MPFR_FUNC_2D (mpfr_exp_2);
+}
+
+double
+timing_exp3 (struct speed_params *s)
+{
+ SPEED_MPFR_FUNC_2D (mpfr_exp_3);
+}
+
+
+#include "ai.c"
+double
+timing_ai1 (struct speed_params *s)
+{
+ SPEED_MPFR_FUNC_2D (mpfr_ai1);
+}
+
+double
+timing_ai2 (struct speed_params *s)
+{
+ SPEED_MPFR_FUNC_2D (mpfr_ai2);
+}
+
+/* These functions are for testing purpose only */
+/* They are used to draw which method is actually used */
+double
+virtual_timing_ai1 (struct speed_params *s)
+{
+ double t;
+ unsigned i;
+ mpfr_t w, x;
+ mp_size_t size;
+ mpfr_t temp1, temp2;
+
+ SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN);
+ SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX);
+
+ size = (s->size-1)/GMP_NUMB_BITS+1;
+ s->xp[size-1] |= MPFR_LIMB_HIGHBIT;
+ MPFR_TMP_INIT1 (s->xp, x, s->size);
+ MPFR_SET_EXP (x, (mpfr_exp_t) s->r);
+ if (s->align_xp == 2) MPFR_SET_NEG (x);
+
+ mpfr_init2 (w, s->size);
+ speed_starttime ();
+ i = s->reps;
+
+ mpfr_init2 (temp1, MPFR_SMALL_PRECISION);
+ mpfr_init2 (temp2, MPFR_SMALL_PRECISION);
+
+ mpfr_set (temp1, x, MPFR_SMALL_PRECISION);
+ mpfr_set_si (temp2, MPFR_AI_THRESHOLD2, MPFR_RNDN);
+ mpfr_mul_ui (temp2, temp2, (unsigned int)MPFR_PREC (w), MPFR_RNDN);
+
+ if (MPFR_IS_NEG (x))
+ mpfr_mul_si (temp1, temp1, MPFR_AI_THRESHOLD1, MPFR_RNDN);
+ else
+ mpfr_mul_si (temp1, temp1, MPFR_AI_THRESHOLD3, MPFR_RNDN);
+
+ mpfr_add (temp1, temp1, temp2, MPFR_RNDN);
+
+ if (mpfr_cmp_si (temp1, MPFR_AI_SCALE) > 0)
+ t = 1000.;
+ else
+ t = 1.;
+
+ mpfr_clear (temp1);
+ mpfr_clear (temp2);
+
+ return t;
+}
+
+double
+virtual_timing_ai2 (struct speed_params *s)
+{
+ double t;
+ unsigned i;
+ mpfr_t w, x;
+ mp_size_t size;
+ mpfr_t temp1, temp2;
+
+ SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN);
+ SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX);
+
+ size = (s->size-1)/GMP_NUMB_BITS+1;
+ s->xp[size-1] |= MPFR_LIMB_HIGHBIT;
+ MPFR_TMP_INIT1 (s->xp, x, s->size);
+ MPFR_SET_EXP (x, (mpfr_exp_t) s->r);
+ if (s->align_xp == 2) MPFR_SET_NEG (x);
+
+ mpfr_init2 (w, s->size);
+ speed_starttime ();
+ i = s->reps;
+
+ mpfr_init2 (temp1, MPFR_SMALL_PRECISION);
+ mpfr_init2 (temp2, MPFR_SMALL_PRECISION);
+
+ mpfr_set (temp1, x, MPFR_SMALL_PRECISION);
+ mpfr_set_si (temp2, MPFR_AI_THRESHOLD2, MPFR_RNDN);
+ mpfr_mul_ui (temp2, temp2, (unsigned int)MPFR_PREC (w), MPFR_RNDN);
+
+ if (MPFR_IS_NEG (x))
+ mpfr_mul_si (temp1, temp1, MPFR_AI_THRESHOLD1, MPFR_RNDN);
+ else
+ mpfr_mul_si (temp1, temp1, MPFR_AI_THRESHOLD3, MPFR_RNDN);
+
+ mpfr_add (temp1, temp1, temp2, MPFR_RNDN);
+
+ if (mpfr_cmp_si (temp1, MPFR_AI_SCALE) > 0)
+ t = 1.;
+ else
+ t = 1000.;
+
+ mpfr_clear (temp1);
+ mpfr_clear (temp2);
+
+ return t;
+}
+
+int
+main (void)
+{
+ FILE *output;
+ struct speed_params2D param;
+ double (*speed_funcs[3]) (struct speed_params *s);
+
+ /* char filename[256] = "virtual_timing_ai.dat"; */
+ /* speed_funcs[0] = virtual_timing_ai1; */
+ /* speed_funcs[1] = virtual_timing_ai2; */
+
+ char filename[256] = "airy.dat";
+ speed_funcs[0] = timing_ai1;
+ speed_funcs[1] = timing_ai2;
+
+ speed_funcs[2] = NULL;
+ output = fopen (filename, "w");
+ if (output == NULL)
+ {
+ fprintf (stderr, "Can't open file '%s' for writing.\n", filename);
+ abort ();
+ }
+ param.min_x = -80;
+ param.max_x = 60;
+ param.min_prec = 50;
+ param.max_prec = 1500;
+ param.nb_points_x = 200;
+ param.nb_points_prec = 200;
+ param.logarithmic_scale_x = 0;
+ param.logarithmic_scale_prec = 0;
+ param.speed_funcs = speed_funcs;
+
+ generate_2D_sample (output, param);
+
+ fclose (output);
+ mpfr_free_cache ();
+ return 0;
+}
diff --git a/tune/speed.c b/tune/speed.c
new file mode 100644
index 000000000..80969aedf
--- /dev/null
+++ b/tune/speed.c
@@ -0,0 +1,283 @@
+/* Tune various threshold of MPFR
+
+Copyright 2005, 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
+Contributed by the Arenaire and Caramel projects, INRIA.
+
+This file is part of the GNU MPFR Library.
+
+The GNU MPFR Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MPFR Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see
+http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
+51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
+
+#include <stdlib.h>
+#include <time.h>
+
+#define MPFR_NEED_LONGLONG_H
+#include "mpfr-impl.h"
+
+/* extracted from mulders.c */
+#ifdef MPFR_MULHIGH_TAB_SIZE
+static short mulhigh_ktab[MPFR_MULHIGH_TAB_SIZE];
+#else
+static short mulhigh_ktab[] = {MPFR_MULHIGH_TAB};
+#define MPFR_MULHIGH_TAB_SIZE \
+ ((mp_size_t) (sizeof(mulhigh_ktab) / sizeof(mulhigh_ktab[0])))
+#endif
+
+#undef _PROTO
+#define _PROTO __GMP_PROTO
+#include "speed.h"
+
+int verbose;
+
+/* s->size: precision of both input and output
+ s->xp : Mantissa of first input
+ s->yp : mantissa of second input */
+
+#define SPEED_MPFR_FUNC(mean_fun) do { \
+ unsigned i; \
+ mp_ptr wp; \
+ double t; \
+ mpfr_t w, x; \
+ mp_size_t size; \
+ MPFR_TMP_DECL (marker); \
+ \
+ SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN); \
+ SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX); \
+ MPFR_TMP_MARK (marker); \
+ \
+ size = (s->size-1)/GMP_NUMB_BITS+1; \
+ s->xp[size-1] |= MPFR_LIMB_HIGHBIT; \
+ MPFR_TMP_INIT1 (s->xp, x, s->size); \
+ MPFR_SET_EXP (x, 0); \
+ \
+ MPFR_TMP_INIT (wp, w, s->size, size); \
+ \
+ speed_operand_src (s, s->xp, size); \
+ speed_operand_dst (s, wp, size); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ mean_fun (w, x, MPFR_RNDN); \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ MPFR_TMP_FREE (marker); \
+ return t; \
+} while (0)
+
+#define SPEED_MPFR_OP(mean_fun) do { \
+ unsigned i; \
+ mp_ptr wp; \
+ double t; \
+ mpfr_t w, x, y; \
+ mp_size_t size; \
+ MPFR_TMP_DECL (marker); \
+ \
+ SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN); \
+ SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX); \
+ MPFR_TMP_MARK (marker); \
+ \
+ size = (s->size-1)/GMP_NUMB_BITS+1; \
+ s->xp[size-1] |= MPFR_LIMB_HIGHBIT; \
+ MPFR_TMP_INIT1 (s->xp, x, s->size); \
+ MPFR_SET_EXP (x, 0); \
+ s->yp[size-1] |= MPFR_LIMB_HIGHBIT; \
+ MPFR_TMP_INIT1 (s->yp, y, s->size); \
+ MPFR_SET_EXP (y, 0); \
+ \
+ MPFR_TMP_INIT (wp, w, s->size, size); \
+ \
+ speed_operand_src (s, s->xp, size); \
+ speed_operand_src (s, s->yp, size); \
+ speed_operand_dst (s, wp, size); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ mean_fun (w, x, y, MPFR_RNDN); \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ MPFR_TMP_FREE (marker); \
+ return t; \
+} while (0)
+
+
+/* First we include all the functions we want to tune inside this program.
+ We can't use GNU MPFR library since the THRESHOLD can't vary */
+
+/* Setup mpfr_mul */
+mpfr_prec_t mpfr_mul_threshold = MPFR_MUL_THRESHOLD;
+static double speed_mpfr_mul (struct speed_params *s) {
+ SPEED_MPFR_OP (mpfr_mul);
+}
+
+
+
+/************************************************
+ * Common functions (inspired by GMP function) *
+ ************************************************/
+#define THRESHOLD_WINDOW 16
+#define THRESHOLD_FINAL_WINDOW 128
+static double domeasure (mpfr_prec_t *threshold,
+ double (*func) (struct speed_params *),
+ mpfr_prec_t p)
+{
+ struct speed_params s;
+ mp_size_t size;
+ double t;
+
+ s.align_xp = s.align_yp = s.align_wp = 64;
+ s.size = p;
+ size = (p - 1)/GMP_NUMB_BITS+1;
+ s.xp = malloc (2*size*sizeof (mp_limb_t));
+ if (s.xp == NULL)
+ {
+ fprintf (stderr, "Can't allocate memory.\n");
+ abort ();
+ }
+ mpn_random (s.xp, size);
+ s.yp = s.xp + size;
+ mpn_random (s.yp, size);
+ t = speed_measure (func, &s);
+ if (t == -1.0)
+ {
+ fprintf (stderr, "Failed to measure function!\n");
+ abort ();
+ }
+ free (s.xp);
+ return t;
+}
+
+/* Tune a function with a simple THRESHOLD
+ The function doesn't depend on another threshold.
+ It assumes that it uses algo1 if p < THRESHOLD
+ and algo2 otherwise.
+ if algo2 is better for low prec, and algo1 better for high prec,
+ the behaviour of this function is undefined. */
+static void
+tune_simple_func (mpfr_prec_t *threshold,
+ double (*func) (struct speed_params *),
+ mpfr_prec_t pstart, mpfr_prec_t pend)
+{
+ double measure;
+ mpfr_prec_t p = pstart;
+ mp_size_t k, n;
+
+ while (p <= pend)
+ {
+ measure = domeasure (threshold, func, p);
+ printf ("prec=%lu mpfr_mul=%e ", p, measure);
+ n = 1 + (p - 1) / GMP_NUMB_BITS;
+ if (n <= MPFR_MUL_THRESHOLD)
+ k = MUL_FFT_THRESHOLD + 1;
+ else if (n < MPFR_MULHIGH_TAB_SIZE)
+ k = mulhigh_ktab[n];
+ else
+ k = 2*n/3;
+ if (k < 0)
+ printf ("[mpn_mul_basecase]\n");
+ else if (k == 0)
+ printf ("[mpfr_mulhigh_n_basecase]\n");
+ else if (k > MUL_FFT_THRESHOLD)
+ printf ("[mpn_mul_n]\n");
+ else
+ printf ("[mpfr_mulhigh_n]\n");
+ p = p + p / 10;
+ }
+}
+
+/*******************************************************
+ * Tune all the threshold of MPFR *
+ * Warning: tune the function in their dependent order!*
+ *******************************************************/
+static void
+all (void)
+{
+ FILE *f = stdout;
+ time_t start_time, end_time;
+ struct tm *tp;
+
+ speed_time_init ();
+ if (verbose) {
+ printf ("Using: %s\n", speed_time_string);
+ printf ("speed_precision %d", speed_precision);
+ if (speed_unittime == 1.0)
+ printf (", speed_unittime 1 cycle");
+ else
+ printf (", speed_unittime %.2e secs", speed_unittime);
+ if (speed_cycletime == 1.0 || speed_cycletime == 0.0)
+ printf (", CPU freq unknown\n");
+ else
+ printf (", CPU freq %.2f MHz\n\n", 1e-6/speed_cycletime);
+ }
+
+ time (&start_time);
+ tp = localtime (&start_time);
+ fprintf (f, "/* Generated by MPFR's tuneup.c, %d-%02d-%02d, ",
+ tp->tm_year+1900, tp->tm_mon+1, tp->tm_mday);
+
+#ifdef __ICC
+ fprintf (f, "icc %d.%d.%d */\n", __ICC / 100, __ICC / 10 % 10, __ICC % 10);
+#elif defined(__GNUC__)
+ fprintf (f, "gcc %d.%d */\n", __GNUC__, __GNUC_MINOR__);
+#elif defined (__SUNPRO_C)
+ fprintf (f, "Sun C %d.%d */\n", __SUNPRO_C / 0x100, __SUNPRO_C % 0x100);
+#elif defined (__sgi) && defined (_COMPILER_VERSION)
+ fprintf (f, "MIPSpro C %d.%d.%d */\n",
+ _COMPILER_VERSION / 100,
+ _COMPILER_VERSION / 10 % 10,
+ _COMPILER_VERSION % 10);
+#elif defined (__DECC) && defined (__DECC_VER)
+ fprintf (f, "DEC C %d */\n", __DECC_VER);
+#else
+ fprintf (f, "system compiler */\n");
+#endif
+ fprintf (f, "\n");
+
+ /* Tune mpfr_mul (threshold is in limbs, but it doesn't matter too much) */
+ if (verbose)
+ printf ("Measuring mpfr_mul with mpfr_mul_threshold=%lu...\n",
+ mpfr_mul_threshold);
+ tune_simple_func (&mpfr_mul_threshold, speed_mpfr_mul,
+ 2*GMP_NUMB_BITS+1, 1000);
+
+ /* End of tuning */
+ time (&end_time);
+ if (verbose)
+ printf ("Complete (took %ld seconds).\n", end_time - start_time);
+}
+
+
+/* Main function */
+int main (int argc, char *argv[])
+{
+ /* Unbuffered so if output is redirected to a file it isn't lost if the
+ program is killed part way through. */
+ setbuf (stdout, NULL);
+ setbuf (stderr, NULL);
+
+ verbose = argc > 1;
+
+ if (verbose)
+ printf ("Tuning MPFR (Coffee time?)...\n");
+
+ all ();
+
+ return 0;
+}
diff --git a/tune/tuneup.c b/tune/tuneup.c
new file mode 100644
index 000000000..c60bd5c50
--- /dev/null
+++ b/tune/tuneup.c
@@ -0,0 +1,1017 @@
+/* Tune various threshold of MPFR
+
+Copyright 2005, 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
+Contributed by the Arenaire and Caramel projects, INRIA.
+
+This file is part of the GNU MPFR Library.
+
+The GNU MPFR Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MPFR Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see
+http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
+51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
+
+#include <stdlib.h>
+#include <time.h>
+
+#define MPFR_NEED_LONGLONG_H
+#include "mpfr-impl.h"
+
+#undef _PROTO
+#define _PROTO __GMP_PROTO
+#include "speed.h"
+
+int verbose;
+
+/* s->size: precision of both input and output
+ s->xp : Mantissa of first input
+ s->yp : mantissa of second input */
+
+#define SPEED_MPFR_FUNC(mean_fun) \
+ do \
+ { \
+ unsigned i; \
+ mp_ptr wp; \
+ double t; \
+ mpfr_t w, x; \
+ mp_size_t size; \
+ MPFR_TMP_DECL (marker); \
+ \
+ SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN); \
+ SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX); \
+ MPFR_TMP_MARK (marker); \
+ \
+ size = (s->size-1)/GMP_NUMB_BITS+1; \
+ s->xp[size-1] |= MPFR_LIMB_HIGHBIT; \
+ MPFR_TMP_INIT1 (s->xp, x, s->size); \
+ MPFR_SET_EXP (x, 0); \
+ \
+ MPFR_TMP_INIT (wp, w, s->size, size); \
+ \
+ speed_operand_src (s, s->xp, size); \
+ speed_operand_dst (s, wp, size); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ mean_fun (w, x, MPFR_RNDN); \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ MPFR_TMP_FREE (marker); \
+ return t; \
+ } \
+ while (0)
+
+/* same as SPEED_MPFR_FUNC, but for say mpfr_sin_cos (y, z, x, r) */
+#define SPEED_MPFR_FUNC2(mean_fun) \
+ do \
+ { \
+ unsigned i; \
+ mp_ptr vp, wp; \
+ double t; \
+ mpfr_t v, w, x; \
+ mp_size_t size; \
+ MPFR_TMP_DECL (marker); \
+ \
+ SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN); \
+ SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX); \
+ MPFR_TMP_MARK (marker); \
+ \
+ size = (s->size-1)/GMP_NUMB_BITS+1; \
+ s->xp[size-1] |= MPFR_LIMB_HIGHBIT; \
+ MPFR_TMP_INIT1 (s->xp, x, s->size); \
+ MPFR_SET_EXP (x, 0); \
+ \
+ MPFR_TMP_INIT (vp, v, s->size, size); \
+ MPFR_TMP_INIT (wp, w, s->size, size); \
+ \
+ speed_operand_src (s, s->xp, size); \
+ speed_operand_dst (s, vp, size); \
+ speed_operand_dst (s, wp, size); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ mean_fun (v, w, x, MPFR_RNDN); \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ MPFR_TMP_FREE (marker); \
+ return t; \
+ } \
+ while (0)
+
+#define SPEED_MPFR_OP(mean_fun) \
+ do \
+ { \
+ unsigned i; \
+ mp_ptr wp; \
+ double t; \
+ mpfr_t w, x, y; \
+ mp_size_t size; \
+ MPFR_TMP_DECL (marker); \
+ \
+ SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN); \
+ SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX); \
+ MPFR_TMP_MARK (marker); \
+ \
+ size = (s->size-1)/GMP_NUMB_BITS+1; \
+ s->xp[size-1] |= MPFR_LIMB_HIGHBIT; \
+ MPFR_TMP_INIT1 (s->xp, x, s->size); \
+ MPFR_SET_EXP (x, 0); \
+ s->yp[size-1] |= MPFR_LIMB_HIGHBIT; \
+ MPFR_TMP_INIT1 (s->yp, y, s->size); \
+ MPFR_SET_EXP (y, 0); \
+ \
+ MPFR_TMP_INIT (wp, w, s->size, size); \
+ \
+ speed_operand_src (s, s->xp, size); \
+ speed_operand_src (s, s->yp, size); \
+ speed_operand_dst (s, wp, size); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ mean_fun (w, x, y, MPFR_RNDN); \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ MPFR_TMP_FREE (marker); \
+ return t; \
+ } \
+ while (0)
+
+/* s->size: precision of both input and output
+ s->xp : Mantissa of first input
+ s->r : exponent
+ s->align_xp : sign (1 means positive, 2 means negative)
+*/
+#define SPEED_MPFR_FUNC_WITH_EXPONENT(mean_fun) \
+ do \
+ { \
+ unsigned i; \
+ mp_ptr wp; \
+ double t; \
+ mpfr_t w, x; \
+ mp_size_t size; \
+ MPFR_TMP_DECL (marker); \
+ \
+ SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN); \
+ SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX); \
+ MPFR_TMP_MARK (marker); \
+ \
+ size = (s->size-1)/GMP_NUMB_BITS+1; \
+ s->xp[size-1] |= MPFR_LIMB_HIGHBIT; \
+ MPFR_TMP_INIT1 (s->xp, x, s->size); \
+ MPFR_SET_EXP (x, s->r); \
+ if (s->align_xp == 2) MPFR_SET_NEG (x); \
+ \
+ MPFR_TMP_INIT (wp, w, s->size, size); \
+ \
+ speed_operand_src (s, s->xp, size); \
+ speed_operand_dst (s, wp, size); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ mean_fun (w, x, MPFR_RNDN); \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ MPFR_TMP_FREE (marker); \
+ return t; \
+ } \
+ while (0)
+
+/* First we include all the functions we want to tune inside this program.
+ We can't use the GNU MPFR library since the thresholds are fixed macros. */
+
+/* Setup mpfr_exp_2 */
+mpfr_prec_t mpfr_exp_2_threshold;
+#undef MPFR_EXP_2_THRESHOLD
+#define MPFR_EXP_2_THRESHOLD mpfr_exp_2_threshold
+#include "exp_2.c"
+static double
+speed_mpfr_exp_2 (struct speed_params *s)
+{
+ SPEED_MPFR_FUNC (mpfr_exp_2);
+}
+
+/* Setup mpfr_exp */
+mpfr_prec_t mpfr_exp_threshold;
+#undef MPFR_EXP_THRESHOLD
+#define MPFR_EXP_THRESHOLD mpfr_exp_threshold
+#include "exp.c"
+static double
+speed_mpfr_exp (struct speed_params *s)
+{
+ SPEED_MPFR_FUNC (mpfr_exp);
+}
+
+/* Setup mpfr_sin_cos */
+mpfr_prec_t mpfr_sincos_threshold;
+#undef MPFR_SINCOS_THRESHOLD
+#define MPFR_SINCOS_THRESHOLD mpfr_sincos_threshold
+#include "sin_cos.c"
+#include "cos.c"
+static double
+speed_mpfr_sincos (struct speed_params *s)
+{
+ SPEED_MPFR_FUNC2 (mpfr_sin_cos);
+}
+
+/* Setup mpfr_mul */
+mpfr_prec_t mpfr_mul_threshold;
+#undef MPFR_MUL_THRESHOLD
+#define MPFR_MUL_THRESHOLD mpfr_mul_threshold
+#include "mul.c"
+static double
+speed_mpfr_mul (struct speed_params *s)
+{
+ SPEED_MPFR_OP (mpfr_mul);
+}
+
+
+
+/************************************************
+ * Common functions (inspired by GMP function) *
+ ************************************************/
+static int
+analyze_data (double *dat, int ndat)
+{
+ double x, min_x;
+ int j, min_j;
+
+ x = 0.0;
+ for (j = 0; j < ndat; j++)
+ if (dat[j] > 0.0)
+ x += dat[j];
+
+ min_x = x;
+ min_j = 0;
+
+ for (j = 0; j < ndat; x -= dat[j], j++)
+ {
+ if (x < min_x)
+ {
+ min_x = x;
+ min_j = j;
+ }
+ }
+ return min_j;
+}
+
+#define THRESHOLD_WINDOW 16
+#define THRESHOLD_FINAL_WINDOW 128
+static double
+domeasure (mpfr_prec_t *threshold,
+ double (*func) (struct speed_params *),
+ mpfr_prec_t p)
+{
+ struct speed_params s;
+ mp_size_t size;
+ double t1, t2, d;
+
+ s.align_xp = s.align_yp = s.align_wp = 64;
+ s.size = p;
+ size = (p - 1)/GMP_NUMB_BITS+1;
+ s.xp = malloc (2*size*sizeof (mp_limb_t));
+ if (s.xp == NULL)
+ {
+ fprintf (stderr, "Can't allocate memory.\n");
+ abort ();
+ }
+ mpn_random (s.xp, size);
+ s.yp = s.xp + size;
+ mpn_random (s.yp, size);
+ *threshold = MPFR_PREC_MAX;
+ t1 = speed_measure (func, &s);
+ if (t1 == -1.0)
+ {
+ fprintf (stderr, "Failed to measure function 1!\n");
+ abort ();
+ }
+ *threshold = 1;
+ t2 = speed_measure (func, &s);
+ if (t2 == -1.0)
+ {
+ fprintf (stderr, "Failed to measure function 2!\n");
+ abort ();
+ }
+ free (s.xp);
+ /* t1 is the time of the first algo (used for low prec) */
+ if (t2 >= t1)
+ d = (t2 - t1) / t2;
+ else
+ d = (t2 - t1) / t1;
+ /* d > 0 if we have to use algo 1.
+ d < 0 if we have to use algo 2 */
+ return d;
+}
+
+/* Performs measures when both the precision and the point of evaluation
+ shall vary. s.yp is ignored and not initialized.
+ It assumes that func depends on three thresholds with a boundary of the
+ form threshold1*x + threshold2*p = some scaling factor, if x<0,
+ and threshold3*x + threshold2*p = some scaling factor, if x>=0.
+*/
+static double
+domeasure2 (long int *threshold1, long int *threshold2, long int *threshold3,
+ double (*func) (struct speed_params *),
+ mpfr_prec_t p,
+ mpfr_t x)
+{
+ struct speed_params s;
+ mp_size_t size;
+ double t1, t2, d;
+ mpfr_t xtmp;
+
+ if (MPFR_IS_SINGULAR (x))
+ {
+ mpfr_fprintf (stderr, "x=%RNf is not a regular number.\n");
+ abort ();
+ }
+ if (MPFR_IS_NEG (x))
+ s.align_xp = 2;
+ else
+ s.align_xp = 1;
+
+ s.align_yp = s.align_wp = 64;
+ s.size = p;
+ size = (p - 1)/GMP_NUMB_BITS+1;
+
+ mpfr_init2 (xtmp, p);
+ mpn_random (xtmp->_mpfr_d, size);
+ xtmp->_mpfr_d[size-1] |= MPFR_LIMB_HIGHBIT;
+ MPFR_SET_EXP (xtmp, -53);
+ mpfr_add_ui (xtmp, xtmp, 1, MPFR_RNDN);
+ mpfr_mul (xtmp, xtmp, x, MPFR_RNDN); /* xtmp = x*(1+perturb) */
+ /* where perturb ~ 2^(-53) is */
+ /* randomly chosen. */
+ s.xp = xtmp->_mpfr_d;
+ s.r = MPFR_GET_EXP (xtmp);
+
+ *threshold1 = 0;
+ *threshold2 = 0;
+ *threshold3 = 0;
+ t1 = speed_measure (func, &s);
+ if (t1 == -1.0)
+ {
+ fprintf (stderr, "Failed to measure function 1!\n");
+ abort ();
+ }
+
+ if (MPFR_IS_NEG (x))
+ *threshold1 = INT_MIN;
+ else
+ *threshold3 = INT_MAX;
+ *threshold2 = INT_MAX;
+ t2 = speed_measure (func, &s);
+ if (t2 == -1.0)
+ {
+ fprintf (stderr, "Failed to measure function 2!\n");
+ abort ();
+ }
+
+ /* t1 is the time of the first algo (used for low prec) */
+ if (t2 >= t1)
+ d = (t2 - t1) / t2;
+ else
+ d = (t2 - t1) / t1;
+ /* d > 0 if we have to use algo 1.
+ d < 0 if we have to use algo 2 */
+ mpfr_clear (xtmp);
+ return d;
+}
+
+/* Tune a function with a simple THRESHOLD
+ The function doesn't depend on another threshold.
+ It assumes that it uses algo1 if p < THRESHOLD
+ and algo2 otherwise.
+ if algo2 is better for low prec, and algo1 better for high prec,
+ the behaviour of this function is undefined. */
+static void
+tune_simple_func (mpfr_prec_t *threshold,
+ double (*func) (struct speed_params *),
+ mpfr_prec_t pstart)
+{
+ double measure[THRESHOLD_FINAL_WINDOW+1];
+ double d;
+ mpfr_prec_t pstep;
+ int i, numpos, numneg, try;
+ mpfr_prec_t pmin, pmax, p;
+
+ /* first look for a lower bound within 10% */
+ pmin = p = pstart;
+ d = domeasure (threshold, func, pmin);
+ if (d < 0.0)
+ {
+ if (verbose)
+ printf ("Oops: even for %lu, algo 2 seems to be faster!\n",
+ (unsigned long) pmin);
+ *threshold = MPFR_PREC_MIN;
+ return;
+ }
+ if (d >= 1.00)
+ for (;;)
+ {
+ d = domeasure (threshold, func, pmin);
+ if (d < 1.00)
+ break;
+ p = pmin;
+ pmin += pmin/2;
+ }
+ pmin = p;
+ for (;;)
+ {
+ d = domeasure (threshold, func, pmin);
+ if (d < 0.10)
+ break;
+ pmin += GMP_NUMB_BITS;
+ }
+
+ /* then look for an upper bound within 20% */
+ pmax = pmin * 2;
+ for (;;)
+ {
+ d = domeasure (threshold, func, pmax);
+ if (d < -0.20)
+ break;
+ pmax += pmin / 2; /* don't increase too rapidly */
+ }
+
+ /* The threshold is between pmin and pmax. Affine them */
+ try = 0;
+ while ((pmax-pmin) >= THRESHOLD_FINAL_WINDOW)
+ {
+ pstep = MAX(MIN(GMP_NUMB_BITS/2,(pmax-pmin)/(2*THRESHOLD_WINDOW)),1);
+ if (verbose)
+ printf ("Pmin = %8lu Pmax = %8lu Pstep=%lu\n", pmin, pmax, pstep);
+ p = (pmin + pmax) / 2;
+ for (i = numpos = numneg = 0 ; i < THRESHOLD_WINDOW + 1 ; i++)
+ {
+ measure[i] = domeasure (threshold, func,
+ p+(i-THRESHOLD_WINDOW/2)*pstep);
+ if (measure[i] > 0)
+ numpos ++;
+ else if (measure[i] < 0)
+ numneg ++;
+ }
+ if (numpos > numneg)
+ /* We use more often algo 1 than algo 2 */
+ pmin = p - THRESHOLD_WINDOW/2*pstep;
+ else if (numpos < numneg)
+ pmax = p + THRESHOLD_WINDOW/2*pstep;
+ else
+ /* numpos == numneg ... */
+ if (++ try > 2)
+ {
+ *threshold = p;
+ if (verbose)
+ printf ("Quick find: %lu\n", *threshold);
+ return ;
+ }
+ }
+
+ /* Final tune... */
+ if (verbose)
+ printf ("Finalizing in [%lu, %lu]... ", pmin, pmax);
+ for (i = 0 ; i < THRESHOLD_FINAL_WINDOW+1 ; i++)
+ measure[i] = domeasure (threshold, func, pmin+i);
+ i = analyze_data (measure, THRESHOLD_FINAL_WINDOW+1);
+ *threshold = pmin + i;
+ if (verbose)
+ printf ("%lu\n", *threshold);
+ return;
+}
+
+/* Tune a function which behavior depends on both p and x,
+ in a given direction.
+ It assumes that for (x,p) close to zero, algo1 is used
+ and algo2 is used when (x,p) is far from zero.
+ If algo2 is better for low prec, and algo1 better for high prec,
+ the behaviour of this function is undefined.
+ This tuning function tries couples (x,p) of the form (ell*dirx, ell*dirp)
+ until it finds a point on the boundary. It returns ell.
+ */
+static void
+tune_simple_func_in_some_direction (long int *threshold1,
+ long int *threshold2,
+ long int *threshold3,
+ double (*func) (struct speed_params *),
+ mpfr_prec_t pstart,
+ int dirx, int dirp,
+ mpfr_t xres, mpfr_prec_t *pres)
+{
+ double measure[THRESHOLD_FINAL_WINDOW+1];
+ double d;
+ mpfr_prec_t pstep;
+ int i, numpos, numneg, try;
+ mpfr_prec_t pmin, pmax, p;
+ mpfr_t xmin, xmax, x;
+ mpfr_t ratio;
+
+ mpfr_init2 (ratio, MPFR_SMALL_PRECISION);
+ mpfr_set_si (ratio, dirx, MPFR_RNDN);
+ mpfr_div_si (ratio, ratio, dirp, MPFR_RNDN);
+
+ mpfr_init2 (xmin, MPFR_SMALL_PRECISION);
+ mpfr_init2 (xmax, MPFR_SMALL_PRECISION);
+ mpfr_init2 (x, MPFR_SMALL_PRECISION);
+
+ /* first look for a lower bound within 10% */
+ pmin = p = pstart;
+ mpfr_mul_ui (xmin, ratio, (unsigned int)pmin, MPFR_RNDN);
+ mpfr_set (x, xmin, MPFR_RNDN);
+
+ d = domeasure2 (threshold1, threshold2, threshold3, func, pmin, xmin);
+ if (d < 0.0)
+ {
+ if (verbose)
+ printf ("Oops: even for %lu, algo 2 seems to be faster!\n",
+ (unsigned long) pmin);
+ *pres = MPFR_PREC_MIN;
+ mpfr_mul_ui (xres, ratio, (unsigned int)*pres, MPFR_RNDN);
+ mpfr_clear (ratio); mpfr_clear (x); mpfr_clear (xmin); mpfr_clear (xmax);
+ return;
+ }
+ if (d >= 1.00)
+ for (;;)
+ {
+ d = domeasure2 (threshold1, threshold2, threshold3, func, pmin, xmin);
+ if (d < 1.00)
+ break;
+ p = pmin;
+ mpfr_set (x, xmin, MPFR_RNDN);
+ pmin += pmin/2;
+ mpfr_mul_ui (xmin, ratio, (unsigned int)pmin, MPFR_RNDN);
+ }
+ pmin = p;
+ mpfr_set (xmin, x, MPFR_RNDN);
+ for (;;)
+ {
+ d = domeasure2 (threshold1, threshold2, threshold3, func, pmin, xmin);
+ if (d < 0.10)
+ break;
+ pmin += GMP_NUMB_BITS;
+ mpfr_mul_ui (xmin, ratio, (unsigned int)pmin, MPFR_RNDN);
+ }
+
+ /* then look for an upper bound within 20% */
+ pmax = pmin * 2;
+ mpfr_mul_ui (xmax, ratio, (unsigned int)pmax, MPFR_RNDN);
+ for (;;)
+ {
+ d = domeasure2 (threshold1, threshold2, threshold3, func, pmax, xmax);
+ if (d < -0.20)
+ break;
+ pmax += pmin / 2; /* don't increase too rapidly */
+ mpfr_mul_ui (xmax, ratio, (unsigned int)pmax, MPFR_RNDN);
+ }
+
+ /* The threshold is between pmin and pmax. Affine them */
+ try = 0;
+ while ((pmax-pmin) >= THRESHOLD_FINAL_WINDOW)
+ {
+ pstep = MAX(MIN(GMP_NUMB_BITS/2,(pmax-pmin)/(2*THRESHOLD_WINDOW)),1);
+ if (verbose)
+ printf ("Pmin = %8lu Pmax = %8lu Pstep=%lu\n", pmin, pmax, pstep);
+ p = (pmin + pmax) / 2;
+ mpfr_mul_ui (x, ratio, (unsigned int)p, MPFR_RNDN);
+ for (i = numpos = numneg = 0 ; i < THRESHOLD_WINDOW + 1 ; i++)
+ {
+ *pres = p+(i-THRESHOLD_WINDOW/2)*pstep;
+ mpfr_mul_ui (xres, ratio, (unsigned int)*pres, MPFR_RNDN);
+ measure[i] = domeasure2 (threshold1, threshold2, threshold3,
+ func, *pres, xres);
+ if (measure[i] > 0)
+ numpos ++;
+ else if (measure[i] < 0)
+ numneg ++;
+ }
+ if (numpos > numneg)
+ {
+ /* We use more often algo 1 than algo 2 */
+ pmin = p - THRESHOLD_WINDOW/2*pstep;
+ mpfr_mul_ui (xmin, ratio, (unsigned int)pmin, MPFR_RNDN);
+ }
+ else if (numpos < numneg)
+ {
+ pmax = p + THRESHOLD_WINDOW/2*pstep;
+ mpfr_mul_ui (xmax, ratio, (unsigned int)pmax, MPFR_RNDN);
+ }
+ else
+ /* numpos == numneg ... */
+ if (++ try > 2)
+ {
+ *pres = p;
+ mpfr_mul_ui (xres, ratio, (unsigned int)*pres, MPFR_RNDN);
+ if (verbose)
+ printf ("Quick find: %lu\n", *pres);
+ mpfr_clear (ratio);
+ mpfr_clear (x); mpfr_clear (xmin); mpfr_clear (xmax);
+ return ;
+ }
+ }
+
+ /* Final tune... */
+ if (verbose)
+ printf ("Finalizing in [%lu, %lu]... ", pmin, pmax);
+ for (i = 0 ; i < THRESHOLD_FINAL_WINDOW+1 ; i++)
+ {
+ *pres = pmin+i;
+ mpfr_mul_ui (xres, ratio, (unsigned int)*pres, MPFR_RNDN);
+ measure[i] = domeasure2 (threshold1, threshold2, threshold3,
+ func, *pres, xres);
+ }
+ i = analyze_data (measure, THRESHOLD_FINAL_WINDOW+1);
+ *pres = pmin + i;
+ mpfr_mul_ui (xres, ratio, (unsigned int)*pres, MPFR_RNDN);
+ if (verbose)
+ printf ("%lu\n", *pres);
+ mpfr_clear (ratio); mpfr_clear (x); mpfr_clear (xmin); mpfr_clear (xmax);
+ return;
+}
+
+/************************************
+ * Tune Mulders' mulhigh function *
+ ************************************/
+#define TOLERANCE 1.02
+#ifndef MPFR_MULHIGH_SIZE
+# define MPFR_MULHIGH_SIZE 1024
+#endif
+#ifndef MPFR_SQRHIGH_SIZE
+# define MPFR_SQRHIGH_SIZE 1024
+#endif
+#define MPFR_MULHIGH_TAB_SIZE MPFR_MULHIGH_SIZE
+#define MPFR_SQRHIGH_TAB_SIZE MPFR_SQRHIGH_SIZE
+#include "mulders.c"
+
+static double
+speed_mpfr_mulhigh (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_MUL_N (mpfr_mulhigh_n);
+}
+
+static double
+speed_mpfr_sqrhigh (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_SQR (mpfr_sqrhigh_n);
+}
+
+#define MAX_STEPS 32 /* maximum number of values of k tried for a given n */
+
+/* Tune size N */
+static mp_size_t
+tune_mulders_upto (mp_size_t n)
+{
+ struct speed_params s;
+ mp_size_t k, kbest, step;
+ double t, tbest;
+ MPFR_TMP_DECL (marker);
+
+ if (n == 0)
+ return -1;
+
+ MPFR_TMP_MARK (marker);
+ s.align_xp = s.align_yp = s.align_wp = 64;
+ s.size = n;
+ s.xp = MPFR_TMP_ALLOC (n*sizeof (mp_limb_t));
+ s.yp = MPFR_TMP_ALLOC (n*sizeof (mp_limb_t));
+ mpn_random (s.xp, n);
+ mpn_random (s.yp, n);
+
+ /* Check k == -1, mpn_mul_basecase */
+ mulhigh_ktab[n] = -1;
+ kbest = -1;
+ tbest = speed_measure (speed_mpfr_mulhigh, &s);
+
+ /* Check k == 0, mpn_mulhigh_n_basecase */
+ mulhigh_ktab[n] = 0;
+ t = speed_measure (speed_mpfr_mulhigh, &s);
+ if (t * TOLERANCE < tbest)
+ kbest = 0, tbest = t;
+
+ /* Check Mulders with cutoff point k */
+ step = 1 + n / (2 * MAX_STEPS);
+ for (k = n / 2 + 1 ; k < n ; k += step)
+ {
+ mulhigh_ktab[n] = k;
+ t = speed_measure (speed_mpfr_mulhigh, &s);
+ if (t * TOLERANCE < tbest)
+ kbest = k, tbest = t;
+ }
+
+ mulhigh_ktab[n] = kbest;
+
+ MPFR_TMP_FREE (marker);
+ return kbest;
+}
+
+/* Tune size N */
+static mp_size_t
+tune_sqr_mulders_upto (mp_size_t n)
+{
+ struct speed_params s;
+ mp_size_t k, kbest, step;
+ double t, tbest;
+ MPFR_TMP_DECL (marker);
+
+ if (n == 0)
+ return -1;
+
+ MPFR_TMP_MARK (marker);
+ s.align_xp = s.align_wp = 64;
+ s.size = n;
+ s.xp = MPFR_TMP_ALLOC (n*sizeof (mp_limb_t));
+ mpn_random (s.xp, n);
+
+ /* Check k == -1, mpn_sqr_basecase */
+ sqrhigh_ktab[n] = -1;
+ kbest = -1;
+ tbest = speed_measure (speed_mpfr_sqrhigh, &s);
+
+ /* Check k == 0, mpfr_mulhigh_n_basecase */
+ sqrhigh_ktab[n] = 0;
+ t = speed_measure (speed_mpfr_sqrhigh, &s);
+ if (t * TOLERANCE < tbest)
+ kbest = 0, tbest = t;
+
+ /* Check Mulders */
+ step = 1 + n / (2 * MAX_STEPS);
+ for (k = n / 2 + 1 ; k < n ; k += step)
+ {
+ sqrhigh_ktab[n] = k;
+ t = speed_measure (speed_mpfr_sqrhigh, &s);
+ if (t * TOLERANCE < tbest)
+ kbest = k, tbest = t;
+ }
+
+ sqrhigh_ktab[n] = kbest;
+
+ MPFR_TMP_FREE (marker);
+ return kbest;
+}
+
+static void
+tune_mulders (FILE *f)
+{
+ mp_size_t k;
+
+ if (verbose)
+ printf ("Tuning mpfr_mulhigh_n[%d]", (int) MPFR_MULHIGH_TAB_SIZE);
+ fprintf (f, "#define MPFR_MULHIGH_TAB \\\n ");
+ for (k = 0 ; k < MPFR_MULHIGH_TAB_SIZE ; k++)
+ {
+ fprintf (f, "%d", (int) tune_mulders_upto (k));
+ if (k != MPFR_MULHIGH_TAB_SIZE-1)
+ fputc (',', f);
+ if ((k+1) % 16 == 0)
+ fprintf (f, " \\\n ");
+ if (verbose)
+ putchar ('.');
+ }
+ fprintf (f, " \n");
+ if (verbose)
+ putchar ('\n');
+}
+
+static void
+tune_sqr_mulders (FILE *f)
+{
+ mp_size_t k;
+
+ if (verbose)
+ printf ("Tuning mpfr_sqrhigh_n[%d]", (int) MPFR_SQRHIGH_TAB_SIZE);
+ fprintf (f, "#define MPFR_SQRHIGH_TAB \\\n ");
+ for (k = 0 ; k < MPFR_SQRHIGH_TAB_SIZE ; k++)
+ {
+ fprintf (f, "%d", (int) tune_sqr_mulders_upto (k));
+ if (k != MPFR_SQRHIGH_TAB_SIZE-1)
+ fputc (',', f);
+ if ((k+1) % 16 == 0)
+ fprintf (f, " \\\n ");
+ if (verbose)
+ putchar ('.');
+ }
+ fprintf (f, " \n");
+ if (verbose)
+ putchar ('\n');
+}
+
+/*******************************************************
+ * Tuning functions for mpfr_ai *
+ *******************************************************/
+
+long int mpfr_ai_threshold1;
+long int mpfr_ai_threshold2;
+long int mpfr_ai_threshold3;
+#undef MPFR_AI_THRESHOLD1
+#define MPFR_AI_THRESHOLD1 mpfr_ai_threshold1
+#undef MPFR_AI_THRESHOLD2
+#define MPFR_AI_THRESHOLD2 mpfr_ai_threshold2
+#undef MPFR_AI_THRESHOLD3
+#define MPFR_AI_THRESHOLD3 mpfr_ai_threshold3
+
+#include "ai.c"
+
+static double
+speed_mpfr_ai (struct speed_params *s)
+{
+ SPEED_MPFR_FUNC_WITH_EXPONENT (mpfr_ai);
+}
+
+
+/*******************************************************
+ * Tune all the threshold of MPFR *
+ * Warning: tune the function in their dependent order!*
+ *******************************************************/
+static void
+all (const char *filename)
+{
+ FILE *f;
+ time_t start_time, end_time;
+ struct tm *tp;
+ mpfr_t x1, x2, x3, tmp1, tmp2;
+ mpfr_prec_t p1, p2, p3;
+
+ f = fopen (filename, "w");
+ if (f == NULL)
+ {
+ fprintf (stderr, "Can't open file '%s' for writing.\n", filename);
+ abort ();
+ }
+
+ speed_time_init ();
+ if (verbose)
+ {
+ printf ("Using: %s\n", speed_time_string);
+ printf ("speed_precision %d", speed_precision);
+ if (speed_unittime == 1.0)
+ printf (", speed_unittime 1 cycle");
+ else
+ printf (", speed_unittime %.2e secs", speed_unittime);
+ if (speed_cycletime == 1.0 || speed_cycletime == 0.0)
+ printf (", CPU freq unknown\n");
+ else
+ printf (", CPU freq %.2f MHz\n\n", 1e-6/speed_cycletime);
+ }
+
+ time (&start_time);
+ tp = localtime (&start_time);
+ fprintf (f, "/* Generated by MPFR's tuneup.c, %d-%02d-%02d, ",
+ tp->tm_year+1900, tp->tm_mon+1, tp->tm_mday);
+
+#ifdef __ICC
+ fprintf (f, "icc %d.%d.%d */\n", __ICC / 100, __ICC / 10 % 10, __ICC % 10);
+#elif defined(__GNUC__)
+#ifdef __GNUC_PATCHLEVEL__
+ fprintf (f, "gcc %d.%d.%d */\n", __GNUC__, __GNUC_MINOR__,
+ __GNUC_PATCHLEVEL__);
+#else
+ fprintf (f, "gcc %d.%d */\n", __GNUC__, __GNUC_MINOR__);
+#endif
+#elif defined (__SUNPRO_C)
+ fprintf (f, "Sun C %d.%d */\n", __SUNPRO_C / 0x100, __SUNPRO_C % 0x100);
+#elif defined (__sgi) && defined (_COMPILER_VERSION)
+ fprintf (f, "MIPSpro C %d.%d.%d */\n",
+ _COMPILER_VERSION / 100,
+ _COMPILER_VERSION / 10 % 10,
+ _COMPILER_VERSION % 10);
+#elif defined (__DECC) && defined (__DECC_VER)
+ fprintf (f, "DEC C %d */\n", __DECC_VER);
+#else
+ fprintf (f, "system compiler */\n");
+#endif
+ fprintf (f, "\n");
+
+ /* Tune mulhigh */
+ tune_mulders (f);
+
+ /* Tune sqrhigh */
+ tune_sqr_mulders (f);
+
+ /* Tune mpfr_mul (threshold is in limbs, but it doesn't matter too much) */
+ if (verbose)
+ printf ("Tuning mpfr_mul...\n");
+ tune_simple_func (&mpfr_mul_threshold, speed_mpfr_mul,
+ 2*GMP_NUMB_BITS+1);
+ fprintf (f, "#define MPFR_MUL_THRESHOLD %lu /* limbs */\n",
+ (unsigned long) (mpfr_mul_threshold - 1) / GMP_NUMB_BITS + 1);
+
+ /* Tune mpfr_exp_2 */
+ if (verbose)
+ printf ("Tuning mpfr_exp_2...\n");
+ tune_simple_func (&mpfr_exp_2_threshold, speed_mpfr_exp_2,
+ MPFR_PREC_MIN);
+ mpfr_exp_2_threshold = MAX (GMP_NUMB_BITS, mpfr_exp_2_threshold);
+ fprintf (f, "#define MPFR_EXP_2_THRESHOLD %lu /* bits */\n",
+ (unsigned long) mpfr_exp_2_threshold);
+
+ /* Tune mpfr_exp */
+ if (verbose)
+ printf ("Tuning mpfr_exp...\n");
+ tune_simple_func (&mpfr_exp_threshold, speed_mpfr_exp,
+ MPFR_PREC_MIN+3*GMP_NUMB_BITS);
+ fprintf (f, "#define MPFR_EXP_THRESHOLD %lu /* bits */\n",
+ (unsigned long) mpfr_exp_threshold);
+
+ /* Tune mpfr_sin_cos */
+ if (verbose)
+ printf ("Tuning mpfr_sin_cos...\n");
+ tune_simple_func (&mpfr_sincos_threshold, speed_mpfr_sincos,
+ MPFR_PREC_MIN+3*GMP_NUMB_BITS);
+ fprintf (f, "#define MPFR_SINCOS_THRESHOLD %lu /* bits */\n",
+ (unsigned long) mpfr_sincos_threshold);
+
+ /* Tune mpfr_ai */
+ if (verbose)
+ printf ("Tuning mpfr_ai...\n");
+ mpfr_init2 (x1, MPFR_SMALL_PRECISION);
+ mpfr_init2 (x2, MPFR_SMALL_PRECISION);
+ mpfr_init2 (x3, MPFR_SMALL_PRECISION);
+ mpfr_init2 (tmp1, MPFR_SMALL_PRECISION);
+ mpfr_init2 (tmp2, MPFR_SMALL_PRECISION);
+
+ tune_simple_func_in_some_direction (&mpfr_ai_threshold1, &mpfr_ai_threshold2,
+ &mpfr_ai_threshold3, speed_mpfr_ai,
+ MPFR_PREC_MIN+GMP_NUMB_BITS,
+ -60, 200, x1, &p1);
+ tune_simple_func_in_some_direction (&mpfr_ai_threshold1, &mpfr_ai_threshold2,
+ &mpfr_ai_threshold3, speed_mpfr_ai,
+ MPFR_PREC_MIN+GMP_NUMB_BITS,
+ -20, 500, x2, &p2);
+ tune_simple_func_in_some_direction (&mpfr_ai_threshold1, &mpfr_ai_threshold2,
+ &mpfr_ai_threshold3, speed_mpfr_ai,
+ MPFR_PREC_MIN+GMP_NUMB_BITS,
+ 40, 200, x3, &p3);
+
+ mpfr_mul_ui (tmp1, x2, (unsigned long)p1, MPFR_RNDN);
+ mpfr_mul_ui (tmp2, x1, (unsigned long)p2, MPFR_RNDN);
+ mpfr_sub (tmp1, tmp1, tmp2, MPFR_RNDN);
+ mpfr_div_ui (tmp1, tmp1, MPFR_AI_SCALE, MPFR_RNDN);
+
+ mpfr_set_ui (tmp2, (unsigned long)p1, MPFR_RNDN);
+ mpfr_sub_ui (tmp2, tmp2, (unsigned long)p2, MPFR_RNDN);
+ mpfr_div (tmp2, tmp2, tmp1, MPFR_RNDN);
+ mpfr_ai_threshold1 = mpfr_get_si (tmp2, MPFR_RNDN);
+
+ mpfr_sub (tmp2, x2, x1, MPFR_RNDN);
+ mpfr_div (tmp2, tmp2, tmp1, MPFR_RNDN);
+ mpfr_ai_threshold2 = mpfr_get_si (tmp2, MPFR_RNDN);
+
+ mpfr_set_ui (tmp1, (unsigned long)p3, MPFR_RNDN);
+ mpfr_mul_si (tmp1, tmp1, mpfr_ai_threshold2, MPFR_RNDN);
+ mpfr_ui_sub (tmp1, MPFR_AI_SCALE, tmp1, MPFR_RNDN);
+ mpfr_div (tmp1, tmp1, x3, MPFR_RNDN);
+ mpfr_ai_threshold3 = mpfr_get_si (tmp1, MPFR_RNDN);
+
+ fprintf (f, "#define MPFR_AI_THRESHOLD1 %ld\n", mpfr_ai_threshold1);
+ fprintf (f, "#define MPFR_AI_THRESHOLD2 %ld\n", mpfr_ai_threshold2);
+ fprintf (f, "#define MPFR_AI_THRESHOLD3 %ld\n", mpfr_ai_threshold3);
+
+ mpfr_clear (x1); mpfr_clear (x2); mpfr_clear (x3);
+ mpfr_clear (tmp1); mpfr_clear (tmp2);
+
+ /* End of tuning */
+ time (&end_time);
+ fprintf (f, "/* Tuneup completed successfully, took %ld seconds */\n",
+ end_time - start_time);
+ if (verbose)
+ printf ("Complete (took %ld seconds).\n", end_time - start_time);
+
+ fclose (f);
+}
+
+
+/* Main function */
+int main (int argc, char *argv[])
+{
+ /* Unbuffered so if output is redirected to a file it isn't lost if the
+ program is killed part way through. */
+ setbuf (stdout, NULL);
+ setbuf (stderr, NULL);
+
+ verbose = argc > 1;
+
+ if (verbose)
+ printf ("Tuning MPFR (Coffee time?)...\n");
+
+ all ("mparam.h");
+
+ return 0;
+}