diff options
author | vlefevre <vlefevre@280ebfd0-de03-0410-8827-d642c229c3f4> | 2010-08-17 09:10:13 +0000 |
---|---|---|
committer | vlefevre <vlefevre@280ebfd0-de03-0410-8827-d642c229c3f4> | 2010-08-17 09:10:13 +0000 |
commit | c9583bdfe064e1069828e518533f7bc29a8fdddb (patch) | |
tree | 2400842d4095628b8486fbeabaf7bc7b8af4ed02 /tune | |
parent | 50ac5b5985174201c7fa6e20496cd2b096107001 (diff) | |
download | mpfr-c9583bdfe064e1069828e518533f7bc29a8fdddb.tar.gz |
Source reorganization. In short:
* Added directories and moved related files into them:
- src for the MPFR source files (to build the library).
- doc for documentation files (except INSTALL, README...).
- tools for various tools (scripts) and mbench.
- tune for tuneup-related source files.
- other for other source files (not distributed in tarballs).
Existing directories:
- tests for the source files of the test suite (make check).
- examples for examples.
- m4 for m4 files.
* Renamed configure.in to configure.ac.
* Added/updated Makefile.am files where needed.
* Updated acinclude.m4 and configure.ac (AC_CONFIG_FILES line).
* Updated the documentation (INSTALL, README, doc/README.dev and
doc/mpfr.texi).
* Updated NEWS and TODO.
* Updated the scripts now in tools.
The following script was used:
#!/usr/bin/env zsh
svn mkdir doc other src tools tune
svn mv ${${(M)$(sed -n '/libmpfr_la_SOURCES/,/[^\]$/p' \
Makefile.am):#*.[ch]}:#get_patches.c} mparam_h.in \
round_raw_generic.c jyn_asympt.c src
svn mv mbench check_inits_clears coverage get_patches.sh mpfrlint \
nightly-test update-patchv update-version tools
svn mv bidimensional_sample.c speed.c tuneup.c tune
svn mv *.{c,h} other
svn mv FAQ.html README.dev algorithm* faq.xsl fdl.texi mpfr.texi \
update-faq doc
svn mv configure.in configure.ac
svn cp Makefile.am src/Makefile.am
svn rm replace_all
[Modifying some files, see above]
svn add doc/Makefile.am
svn add tune/Makefile.am
git-svn-id: svn://scm.gforge.inria.fr/svn/mpfr/trunk@7087 280ebfd0-de03-0410-8827-d642c229c3f4
Diffstat (limited to 'tune')
-rw-r--r-- | tune/Makefile.am | 38 | ||||
-rw-r--r-- | tune/bidimensional_sample.c | 468 | ||||
-rw-r--r-- | tune/speed.c | 283 | ||||
-rw-r--r-- | tune/tuneup.c | 1017 |
4 files changed, 1806 insertions, 0 deletions
diff --git a/tune/Makefile.am b/tune/Makefile.am new file mode 100644 index 000000000..a39af71cd --- /dev/null +++ b/tune/Makefile.am @@ -0,0 +1,38 @@ +# Copyright 2010 Free Software Foundation, Inc. +# This Makefile.am is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + + +EXTRA_PROGRAMS = tuneup speed bidimensional_sample + +tuneup_SOURCES = tuneup.c +tuneup_LDADD = -lspeed $(top_builddir)/src/libmpfr.la +tuneup_LDFLAGS = -static + +speed_SOURCES = speed.c +speed_LDADD = -lspeed $(top_builddir)/src/libmpfr.la +speed_LDFLAGS = -static + +bidimensional_sample_SOURCES = bidimensional_sample.c +bidimensional_sample_LDADD = -lspeed $(top_builddir)/src/libmpfr.la +bidimensional_sample_LDFLAGS = -static + +INCLUDES = -I$(top_srcdir)/src -I$(top_builddir)/src + +tune: + $(MAKE) $(AM_MAKEFLAGS) tuneup$(EXEEXT) + ./tuneup$(EXEEXT) -v + mv mparam.h $(top_builddir)/src/ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) clean + cd $(top_builddir)/src && $(MAKE) $(AM_MAKEFLAGS) libmpfr.la + +$(top_builddir)/src/libmpfr.la: + cd $(top_builddir)/src && $(MAKE) $(AM_MAKEFLAGS) libmpfr.la + +CLEANFILES = $(EXTRA_PROGRAMS) mparam.h diff --git a/tune/bidimensional_sample.c b/tune/bidimensional_sample.c new file mode 100644 index 000000000..f738e5b9c --- /dev/null +++ b/tune/bidimensional_sample.c @@ -0,0 +1,468 @@ +/* [Description] + +Copyright 2010 Free Software Foundation, Inc. +Contributed by the Arenaire and Caramel projects, INRIA. + +This file is part of the GNU MPFR Library. + +The GNU MPFR Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +The GNU MPFR Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see +http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., +51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ + +#include <stdlib.h> +#include <time.h> + +#define MPFR_NEED_LONGLONG_H +#include "mpfr-impl.h" + +#undef _PROTO +#define _PROTO __GMP_PROTO +#include "speed.h" + +/* Let f be a function for which we have several implementations f1, f2... */ +/* We wish to have a quick overview of which implementation is the best */ +/* in function of the point x where f(x) is computed and of the prectision */ +/* prec requested by the user. */ +/* This is performed by drawing a 2D graphic with color indicating which */ +/* method is the best. */ +/* For building this graphic, the following structur is used (see the core */ +/* of generate_2D_sample for an explanation of each field. */ +struct speed_params2D +{ + /* x-window: [min_x, max_x] or [2^min_x, 2^max_x] */ + /* or [-2^(max_x), -2^(min_x)] U [2^min_x, 2^max_x] */ + /* depending on the value of logarithmic_scale_x */ + double min_x; + double max_x; + + /* prec-window: [min_prec, max_prec] */ + mpfr_prec_t min_prec; + mpfr_prec_t max_prec; + + /* number of sample points for the x-axis and the prec-axis */ + int nb_points_x; + int nb_points_prec; + + /* should the sample points be logarithmically scaled or not */ + int logarithmic_scale_x; + int logarithmic_scale_prec; + + /* list of functions g1, g2... measuring the execution time of f1, f2... */ + /* when given a point x and a precision prec stored in s. */ + /* We use s->xp to store the significant of x, s->r to store its exponent */ + /* s->align_xp to store its sign, and s->size to store prec. */ + double (**speed_funcs) (struct speed_params *s); +}; + +/* Given an array t of nb_functions double indicating the timings of several */ +/* implementations, return i, such that t[i] is the best timing. */ +int +find_best (double *t, int nb_functions) +{ + int i, ibest; + double best; + + if (nb_functions<=0) + { + fprintf (stderr, "There is no function\n"); + abort (); + } + + ibest = 0; + best = t[0]; + for (i=1; i<nb_functions; i++) + { + if (t[i]<best) + { + best = t[i]; + ibest = i; + } + } + + return ibest; +} + +void generate_2D_sample (FILE *output, struct speed_params2D param) +{ + mpfr_t temp; + double incr_prec; + mpfr_t incr_x; + mpfr_t x, x2; + double prec; + struct speed_params s; + int i; + int test; + int nb_functions; + double *t; /* store the timing of each implementation */ + + /* We first determine how many implementations we have */ + nb_functions = 0; + while (param.speed_funcs[nb_functions] != NULL) + nb_functions++; + + t = malloc (nb_functions * sizeof (double)); + if (t == NULL) + { + fprintf (stderr, "Can't allocate memory.\n"); + abort (); + } + + + mpfr_init2 (temp, MPFR_SMALL_PRECISION); + + /* The precision is sampled from min_prec to max_prec with */ + /* approximately nb_points_prec points. If logarithmic_scale_prec */ + /* is true, the precision is multiplied by incr_prec at each */ + /* step. Otherwise, incr_prec is added at each step. */ + if (param.logarithmic_scale_prec) + { + mpfr_set_ui (temp, (unsigned long int)param.max_prec, MPFR_RNDU); + mpfr_div_ui (temp, temp, (unsigned long int)param.min_prec, MPFR_RNDU); + mpfr_root (temp, temp, + (unsigned long int)param.nb_points_prec, MPFR_RNDU); + incr_prec = mpfr_get_d (temp, MPFR_RNDU); + } + else + { + incr_prec = (double)param.max_prec - (double)param.min_prec; + incr_prec = incr_prec/((double)param.nb_points_prec); + } + + /* The points x are sampled according to the following rule: */ + /* If logarithmic_scale_x = 0: */ + /* nb_points_x points are equally distributed between min_x and max_x */ + /* If logarithmic_scale_x = 1: */ + /* nb_points_x points are sampled from 2^(min_x) to 2^(max_x). At */ + /* each step, the current point is multiplied by incr_x. */ + /* If logarithmic_scale_x = -1: */ + /* nb_points_x/2 points are sampled from -2^(max_x) to -2^(min_x) */ + /* (at each step, the current point is divided by incr_x); and */ + /* nb_points_x/2 points are sampled from 2^(min_x) to 2^(max_x) */ + /* (at each step, the current point is multiplied by incr_x). */ + mpfr_init2 (incr_x, param.max_prec); + if (param.logarithmic_scale_x == 0) + { + mpfr_set_d (incr_x, + (param.max_x - param.min_x)/(double)param.nb_points_x, + MPFR_RNDU); + } + else if (param.logarithmic_scale_x == -1) + { + mpfr_set_d (incr_x, + 2.*(param.max_x - param.min_x)/(double)param.nb_points_x, + MPFR_RNDU); + mpfr_exp2 (incr_x, incr_x, MPFR_RNDU); + } + else + { /* other values of param.logarithmic_scale_x are considered as 1 */ + mpfr_set_d (incr_x, + (param.max_x - param.min_x)/(double)param.nb_points_x, + MPFR_RNDU); + mpfr_exp2 (incr_x, incr_x, MPFR_RNDU); + } + + /* Main loop */ + mpfr_init2 (x, param.max_prec); + mpfr_init2 (x2, param.max_prec); + prec = (double)param.min_prec; + while (prec <= param.max_prec) + { + printf ("prec = %d\n", (int)prec); + if (param.logarithmic_scale_x == 0) + mpfr_set_d (temp, param.min_x, MPFR_RNDU); + else if (param.logarithmic_scale_x == -1) + { + mpfr_set_d (temp, param.max_x, MPFR_RNDD); + mpfr_exp2 (temp, temp, MPFR_RNDD); + mpfr_neg (temp, temp, MPFR_RNDU); + } + else + { + mpfr_set_d (temp, param.min_x, MPFR_RNDD); + mpfr_exp2 (temp, temp, MPFR_RNDD); + } + + /* We perturb x a little bit, in order to avoid trailing zeros that */ + /* might change the behavior of algorithms. */ + mpfr_const_pi (x, MPFR_RNDN); + mpfr_div_2ui (x, x, 7, MPFR_RNDN); + mpfr_add_ui (x, x, 1, MPFR_RNDN); + mpfr_mul (x, x, temp, MPFR_RNDN); + + test = 1; + while (test) + { + mpfr_fprintf (output, "%e\t", mpfr_get_d (x, MPFR_RNDN)); + mpfr_fprintf (output, "%Pu\t", (mpfr_prec_t)prec); + + s.r = (mp_limb_t)mpfr_get_exp (x); + s.size = (mpfr_prec_t)prec; + s.align_xp = (mpfr_sgn (x) > 0)?1:2; + mpfr_set_prec (x2, (mpfr_prec_t)prec); + mpfr_set (x2, x, MPFR_RNDU); + s.xp = x2->_mpfr_d; + + for (i=0; i<nb_functions; i++) + { + t[i] = speed_measure (param.speed_funcs[i], &s); + mpfr_fprintf (output, "%e\t", t[i]); + } + fprintf (output, "%d\n", 1 + find_best (t, nb_functions)); + + if (param.logarithmic_scale_x == 0) + { + mpfr_add (x, x, incr_x, MPFR_RNDU); + if (mpfr_cmp_d (x, param.max_x) > 0) + test=0; + } + else + { + if (mpfr_sgn (x) < 0 ) + { /* if x<0, it means that logarithmic_scale_x=-1 */ + mpfr_div (x, x, incr_x, MPFR_RNDU); + mpfr_abs (temp, x, MPFR_RNDD); + mpfr_log2 (temp, temp, MPFR_RNDD); + if (mpfr_cmp_d (temp, param.min_x) < 0) + mpfr_neg (x, x, MPFR_RNDN); + } + else + { + mpfr_mul (x, x, incr_x, MPFR_RNDU); + mpfr_set (temp, x, MPFR_RNDD); + mpfr_log2 (temp, temp, MPFR_RNDD); + if (mpfr_cmp_d (temp, param.max_x) > 0) + test=0; + } + } + } + + prec = ( (param.logarithmic_scale_prec) ? (prec * incr_prec) + : (prec + incr_prec) ); + fprintf (output, "\n"); + } + + free (t); + mpfr_clear (incr_x); + mpfr_clear (x); + mpfr_clear (x2); + mpfr_clear (temp); + + return; +} + +#define SPEED_MPFR_FUNC_2D(mean_func) \ + do \ + { \ + double t; \ + unsigned i; \ + mpfr_t w, x; \ + mp_size_t size; \ + \ + SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN); \ + SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX); \ + \ + size = (s->size-1)/GMP_NUMB_BITS+1; \ + s->xp[size-1] |= MPFR_LIMB_HIGHBIT; \ + MPFR_TMP_INIT1 (s->xp, x, s->size); \ + MPFR_SET_EXP (x, (mpfr_exp_t) s->r); \ + if (s->align_xp == 2) MPFR_SET_NEG (x); \ + \ + mpfr_init2 (w, s->size); \ + speed_starttime (); \ + i = s->reps; \ + \ + do \ + mean_func (w, x, MPFR_RNDN); \ + while (--i != 0); \ + t = speed_endtime (); \ + \ + mpfr_clear (w); \ + return t; \ + } \ + while (0) + +mpfr_prec_t mpfr_exp_2_threshold; +mpfr_prec_t old_threshold = MPFR_EXP_2_THRESHOLD; +#undef MPFR_EXP_2_THRESHOLD +#define MPFR_EXP_2_THRESHOLD mpfr_exp_2_threshold +#include "exp_2.c" + +double +timing_exp1 (struct speed_params *s) +{ + mpfr_exp_2_threshold = s->size+1; + SPEED_MPFR_FUNC_2D (mpfr_exp_2); +} + +double +timing_exp2 (struct speed_params *s) +{ + mpfr_exp_2_threshold = s->size-1; + SPEED_MPFR_FUNC_2D (mpfr_exp_2); +} + +double +timing_exp3 (struct speed_params *s) +{ + SPEED_MPFR_FUNC_2D (mpfr_exp_3); +} + + +#include "ai.c" +double +timing_ai1 (struct speed_params *s) +{ + SPEED_MPFR_FUNC_2D (mpfr_ai1); +} + +double +timing_ai2 (struct speed_params *s) +{ + SPEED_MPFR_FUNC_2D (mpfr_ai2); +} + +/* These functions are for testing purpose only */ +/* They are used to draw which method is actually used */ +double +virtual_timing_ai1 (struct speed_params *s) +{ + double t; + unsigned i; + mpfr_t w, x; + mp_size_t size; + mpfr_t temp1, temp2; + + SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN); + SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX); + + size = (s->size-1)/GMP_NUMB_BITS+1; + s->xp[size-1] |= MPFR_LIMB_HIGHBIT; + MPFR_TMP_INIT1 (s->xp, x, s->size); + MPFR_SET_EXP (x, (mpfr_exp_t) s->r); + if (s->align_xp == 2) MPFR_SET_NEG (x); + + mpfr_init2 (w, s->size); + speed_starttime (); + i = s->reps; + + mpfr_init2 (temp1, MPFR_SMALL_PRECISION); + mpfr_init2 (temp2, MPFR_SMALL_PRECISION); + + mpfr_set (temp1, x, MPFR_SMALL_PRECISION); + mpfr_set_si (temp2, MPFR_AI_THRESHOLD2, MPFR_RNDN); + mpfr_mul_ui (temp2, temp2, (unsigned int)MPFR_PREC (w), MPFR_RNDN); + + if (MPFR_IS_NEG (x)) + mpfr_mul_si (temp1, temp1, MPFR_AI_THRESHOLD1, MPFR_RNDN); + else + mpfr_mul_si (temp1, temp1, MPFR_AI_THRESHOLD3, MPFR_RNDN); + + mpfr_add (temp1, temp1, temp2, MPFR_RNDN); + + if (mpfr_cmp_si (temp1, MPFR_AI_SCALE) > 0) + t = 1000.; + else + t = 1.; + + mpfr_clear (temp1); + mpfr_clear (temp2); + + return t; +} + +double +virtual_timing_ai2 (struct speed_params *s) +{ + double t; + unsigned i; + mpfr_t w, x; + mp_size_t size; + mpfr_t temp1, temp2; + + SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN); + SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX); + + size = (s->size-1)/GMP_NUMB_BITS+1; + s->xp[size-1] |= MPFR_LIMB_HIGHBIT; + MPFR_TMP_INIT1 (s->xp, x, s->size); + MPFR_SET_EXP (x, (mpfr_exp_t) s->r); + if (s->align_xp == 2) MPFR_SET_NEG (x); + + mpfr_init2 (w, s->size); + speed_starttime (); + i = s->reps; + + mpfr_init2 (temp1, MPFR_SMALL_PRECISION); + mpfr_init2 (temp2, MPFR_SMALL_PRECISION); + + mpfr_set (temp1, x, MPFR_SMALL_PRECISION); + mpfr_set_si (temp2, MPFR_AI_THRESHOLD2, MPFR_RNDN); + mpfr_mul_ui (temp2, temp2, (unsigned int)MPFR_PREC (w), MPFR_RNDN); + + if (MPFR_IS_NEG (x)) + mpfr_mul_si (temp1, temp1, MPFR_AI_THRESHOLD1, MPFR_RNDN); + else + mpfr_mul_si (temp1, temp1, MPFR_AI_THRESHOLD3, MPFR_RNDN); + + mpfr_add (temp1, temp1, temp2, MPFR_RNDN); + + if (mpfr_cmp_si (temp1, MPFR_AI_SCALE) > 0) + t = 1.; + else + t = 1000.; + + mpfr_clear (temp1); + mpfr_clear (temp2); + + return t; +} + +int +main (void) +{ + FILE *output; + struct speed_params2D param; + double (*speed_funcs[3]) (struct speed_params *s); + + /* char filename[256] = "virtual_timing_ai.dat"; */ + /* speed_funcs[0] = virtual_timing_ai1; */ + /* speed_funcs[1] = virtual_timing_ai2; */ + + char filename[256] = "airy.dat"; + speed_funcs[0] = timing_ai1; + speed_funcs[1] = timing_ai2; + + speed_funcs[2] = NULL; + output = fopen (filename, "w"); + if (output == NULL) + { + fprintf (stderr, "Can't open file '%s' for writing.\n", filename); + abort (); + } + param.min_x = -80; + param.max_x = 60; + param.min_prec = 50; + param.max_prec = 1500; + param.nb_points_x = 200; + param.nb_points_prec = 200; + param.logarithmic_scale_x = 0; + param.logarithmic_scale_prec = 0; + param.speed_funcs = speed_funcs; + + generate_2D_sample (output, param); + + fclose (output); + mpfr_free_cache (); + return 0; +} diff --git a/tune/speed.c b/tune/speed.c new file mode 100644 index 000000000..80969aedf --- /dev/null +++ b/tune/speed.c @@ -0,0 +1,283 @@ +/* Tune various threshold of MPFR + +Copyright 2005, 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc. +Contributed by the Arenaire and Caramel projects, INRIA. + +This file is part of the GNU MPFR Library. + +The GNU MPFR Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +The GNU MPFR Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see +http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., +51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ + +#include <stdlib.h> +#include <time.h> + +#define MPFR_NEED_LONGLONG_H +#include "mpfr-impl.h" + +/* extracted from mulders.c */ +#ifdef MPFR_MULHIGH_TAB_SIZE +static short mulhigh_ktab[MPFR_MULHIGH_TAB_SIZE]; +#else +static short mulhigh_ktab[] = {MPFR_MULHIGH_TAB}; +#define MPFR_MULHIGH_TAB_SIZE \ + ((mp_size_t) (sizeof(mulhigh_ktab) / sizeof(mulhigh_ktab[0]))) +#endif + +#undef _PROTO +#define _PROTO __GMP_PROTO +#include "speed.h" + +int verbose; + +/* s->size: precision of both input and output + s->xp : Mantissa of first input + s->yp : mantissa of second input */ + +#define SPEED_MPFR_FUNC(mean_fun) do { \ + unsigned i; \ + mp_ptr wp; \ + double t; \ + mpfr_t w, x; \ + mp_size_t size; \ + MPFR_TMP_DECL (marker); \ + \ + SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN); \ + SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX); \ + MPFR_TMP_MARK (marker); \ + \ + size = (s->size-1)/GMP_NUMB_BITS+1; \ + s->xp[size-1] |= MPFR_LIMB_HIGHBIT; \ + MPFR_TMP_INIT1 (s->xp, x, s->size); \ + MPFR_SET_EXP (x, 0); \ + \ + MPFR_TMP_INIT (wp, w, s->size, size); \ + \ + speed_operand_src (s, s->xp, size); \ + speed_operand_dst (s, wp, size); \ + speed_cache_fill (s); \ + \ + speed_starttime (); \ + i = s->reps; \ + do \ + mean_fun (w, x, MPFR_RNDN); \ + while (--i != 0); \ + t = speed_endtime (); \ + \ + MPFR_TMP_FREE (marker); \ + return t; \ +} while (0) + +#define SPEED_MPFR_OP(mean_fun) do { \ + unsigned i; \ + mp_ptr wp; \ + double t; \ + mpfr_t w, x, y; \ + mp_size_t size; \ + MPFR_TMP_DECL (marker); \ + \ + SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN); \ + SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX); \ + MPFR_TMP_MARK (marker); \ + \ + size = (s->size-1)/GMP_NUMB_BITS+1; \ + s->xp[size-1] |= MPFR_LIMB_HIGHBIT; \ + MPFR_TMP_INIT1 (s->xp, x, s->size); \ + MPFR_SET_EXP (x, 0); \ + s->yp[size-1] |= MPFR_LIMB_HIGHBIT; \ + MPFR_TMP_INIT1 (s->yp, y, s->size); \ + MPFR_SET_EXP (y, 0); \ + \ + MPFR_TMP_INIT (wp, w, s->size, size); \ + \ + speed_operand_src (s, s->xp, size); \ + speed_operand_src (s, s->yp, size); \ + speed_operand_dst (s, wp, size); \ + speed_cache_fill (s); \ + \ + speed_starttime (); \ + i = s->reps; \ + do \ + mean_fun (w, x, y, MPFR_RNDN); \ + while (--i != 0); \ + t = speed_endtime (); \ + \ + MPFR_TMP_FREE (marker); \ + return t; \ +} while (0) + + +/* First we include all the functions we want to tune inside this program. + We can't use GNU MPFR library since the THRESHOLD can't vary */ + +/* Setup mpfr_mul */ +mpfr_prec_t mpfr_mul_threshold = MPFR_MUL_THRESHOLD; +static double speed_mpfr_mul (struct speed_params *s) { + SPEED_MPFR_OP (mpfr_mul); +} + + + +/************************************************ + * Common functions (inspired by GMP function) * + ************************************************/ +#define THRESHOLD_WINDOW 16 +#define THRESHOLD_FINAL_WINDOW 128 +static double domeasure (mpfr_prec_t *threshold, + double (*func) (struct speed_params *), + mpfr_prec_t p) +{ + struct speed_params s; + mp_size_t size; + double t; + + s.align_xp = s.align_yp = s.align_wp = 64; + s.size = p; + size = (p - 1)/GMP_NUMB_BITS+1; + s.xp = malloc (2*size*sizeof (mp_limb_t)); + if (s.xp == NULL) + { + fprintf (stderr, "Can't allocate memory.\n"); + abort (); + } + mpn_random (s.xp, size); + s.yp = s.xp + size; + mpn_random (s.yp, size); + t = speed_measure (func, &s); + if (t == -1.0) + { + fprintf (stderr, "Failed to measure function!\n"); + abort (); + } + free (s.xp); + return t; +} + +/* Tune a function with a simple THRESHOLD + The function doesn't depend on another threshold. + It assumes that it uses algo1 if p < THRESHOLD + and algo2 otherwise. + if algo2 is better for low prec, and algo1 better for high prec, + the behaviour of this function is undefined. */ +static void +tune_simple_func (mpfr_prec_t *threshold, + double (*func) (struct speed_params *), + mpfr_prec_t pstart, mpfr_prec_t pend) +{ + double measure; + mpfr_prec_t p = pstart; + mp_size_t k, n; + + while (p <= pend) + { + measure = domeasure (threshold, func, p); + printf ("prec=%lu mpfr_mul=%e ", p, measure); + n = 1 + (p - 1) / GMP_NUMB_BITS; + if (n <= MPFR_MUL_THRESHOLD) + k = MUL_FFT_THRESHOLD + 1; + else if (n < MPFR_MULHIGH_TAB_SIZE) + k = mulhigh_ktab[n]; + else + k = 2*n/3; + if (k < 0) + printf ("[mpn_mul_basecase]\n"); + else if (k == 0) + printf ("[mpfr_mulhigh_n_basecase]\n"); + else if (k > MUL_FFT_THRESHOLD) + printf ("[mpn_mul_n]\n"); + else + printf ("[mpfr_mulhigh_n]\n"); + p = p + p / 10; + } +} + +/******************************************************* + * Tune all the threshold of MPFR * + * Warning: tune the function in their dependent order!* + *******************************************************/ +static void +all (void) +{ + FILE *f = stdout; + time_t start_time, end_time; + struct tm *tp; + + speed_time_init (); + if (verbose) { + printf ("Using: %s\n", speed_time_string); + printf ("speed_precision %d", speed_precision); + if (speed_unittime == 1.0) + printf (", speed_unittime 1 cycle"); + else + printf (", speed_unittime %.2e secs", speed_unittime); + if (speed_cycletime == 1.0 || speed_cycletime == 0.0) + printf (", CPU freq unknown\n"); + else + printf (", CPU freq %.2f MHz\n\n", 1e-6/speed_cycletime); + } + + time (&start_time); + tp = localtime (&start_time); + fprintf (f, "/* Generated by MPFR's tuneup.c, %d-%02d-%02d, ", + tp->tm_year+1900, tp->tm_mon+1, tp->tm_mday); + +#ifdef __ICC + fprintf (f, "icc %d.%d.%d */\n", __ICC / 100, __ICC / 10 % 10, __ICC % 10); +#elif defined(__GNUC__) + fprintf (f, "gcc %d.%d */\n", __GNUC__, __GNUC_MINOR__); +#elif defined (__SUNPRO_C) + fprintf (f, "Sun C %d.%d */\n", __SUNPRO_C / 0x100, __SUNPRO_C % 0x100); +#elif defined (__sgi) && defined (_COMPILER_VERSION) + fprintf (f, "MIPSpro C %d.%d.%d */\n", + _COMPILER_VERSION / 100, + _COMPILER_VERSION / 10 % 10, + _COMPILER_VERSION % 10); +#elif defined (__DECC) && defined (__DECC_VER) + fprintf (f, "DEC C %d */\n", __DECC_VER); +#else + fprintf (f, "system compiler */\n"); +#endif + fprintf (f, "\n"); + + /* Tune mpfr_mul (threshold is in limbs, but it doesn't matter too much) */ + if (verbose) + printf ("Measuring mpfr_mul with mpfr_mul_threshold=%lu...\n", + mpfr_mul_threshold); + tune_simple_func (&mpfr_mul_threshold, speed_mpfr_mul, + 2*GMP_NUMB_BITS+1, 1000); + + /* End of tuning */ + time (&end_time); + if (verbose) + printf ("Complete (took %ld seconds).\n", end_time - start_time); +} + + +/* Main function */ +int main (int argc, char *argv[]) +{ + /* Unbuffered so if output is redirected to a file it isn't lost if the + program is killed part way through. */ + setbuf (stdout, NULL); + setbuf (stderr, NULL); + + verbose = argc > 1; + + if (verbose) + printf ("Tuning MPFR (Coffee time?)...\n"); + + all (); + + return 0; +} diff --git a/tune/tuneup.c b/tune/tuneup.c new file mode 100644 index 000000000..c60bd5c50 --- /dev/null +++ b/tune/tuneup.c @@ -0,0 +1,1017 @@ +/* Tune various threshold of MPFR + +Copyright 2005, 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc. +Contributed by the Arenaire and Caramel projects, INRIA. + +This file is part of the GNU MPFR Library. + +The GNU MPFR Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +The GNU MPFR Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see +http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., +51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ + +#include <stdlib.h> +#include <time.h> + +#define MPFR_NEED_LONGLONG_H +#include "mpfr-impl.h" + +#undef _PROTO +#define _PROTO __GMP_PROTO +#include "speed.h" + +int verbose; + +/* s->size: precision of both input and output + s->xp : Mantissa of first input + s->yp : mantissa of second input */ + +#define SPEED_MPFR_FUNC(mean_fun) \ + do \ + { \ + unsigned i; \ + mp_ptr wp; \ + double t; \ + mpfr_t w, x; \ + mp_size_t size; \ + MPFR_TMP_DECL (marker); \ + \ + SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN); \ + SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX); \ + MPFR_TMP_MARK (marker); \ + \ + size = (s->size-1)/GMP_NUMB_BITS+1; \ + s->xp[size-1] |= MPFR_LIMB_HIGHBIT; \ + MPFR_TMP_INIT1 (s->xp, x, s->size); \ + MPFR_SET_EXP (x, 0); \ + \ + MPFR_TMP_INIT (wp, w, s->size, size); \ + \ + speed_operand_src (s, s->xp, size); \ + speed_operand_dst (s, wp, size); \ + speed_cache_fill (s); \ + \ + speed_starttime (); \ + i = s->reps; \ + do \ + mean_fun (w, x, MPFR_RNDN); \ + while (--i != 0); \ + t = speed_endtime (); \ + \ + MPFR_TMP_FREE (marker); \ + return t; \ + } \ + while (0) + +/* same as SPEED_MPFR_FUNC, but for say mpfr_sin_cos (y, z, x, r) */ +#define SPEED_MPFR_FUNC2(mean_fun) \ + do \ + { \ + unsigned i; \ + mp_ptr vp, wp; \ + double t; \ + mpfr_t v, w, x; \ + mp_size_t size; \ + MPFR_TMP_DECL (marker); \ + \ + SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN); \ + SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX); \ + MPFR_TMP_MARK (marker); \ + \ + size = (s->size-1)/GMP_NUMB_BITS+1; \ + s->xp[size-1] |= MPFR_LIMB_HIGHBIT; \ + MPFR_TMP_INIT1 (s->xp, x, s->size); \ + MPFR_SET_EXP (x, 0); \ + \ + MPFR_TMP_INIT (vp, v, s->size, size); \ + MPFR_TMP_INIT (wp, w, s->size, size); \ + \ + speed_operand_src (s, s->xp, size); \ + speed_operand_dst (s, vp, size); \ + speed_operand_dst (s, wp, size); \ + speed_cache_fill (s); \ + \ + speed_starttime (); \ + i = s->reps; \ + do \ + mean_fun (v, w, x, MPFR_RNDN); \ + while (--i != 0); \ + t = speed_endtime (); \ + \ + MPFR_TMP_FREE (marker); \ + return t; \ + } \ + while (0) + +#define SPEED_MPFR_OP(mean_fun) \ + do \ + { \ + unsigned i; \ + mp_ptr wp; \ + double t; \ + mpfr_t w, x, y; \ + mp_size_t size; \ + MPFR_TMP_DECL (marker); \ + \ + SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN); \ + SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX); \ + MPFR_TMP_MARK (marker); \ + \ + size = (s->size-1)/GMP_NUMB_BITS+1; \ + s->xp[size-1] |= MPFR_LIMB_HIGHBIT; \ + MPFR_TMP_INIT1 (s->xp, x, s->size); \ + MPFR_SET_EXP (x, 0); \ + s->yp[size-1] |= MPFR_LIMB_HIGHBIT; \ + MPFR_TMP_INIT1 (s->yp, y, s->size); \ + MPFR_SET_EXP (y, 0); \ + \ + MPFR_TMP_INIT (wp, w, s->size, size); \ + \ + speed_operand_src (s, s->xp, size); \ + speed_operand_src (s, s->yp, size); \ + speed_operand_dst (s, wp, size); \ + speed_cache_fill (s); \ + \ + speed_starttime (); \ + i = s->reps; \ + do \ + mean_fun (w, x, y, MPFR_RNDN); \ + while (--i != 0); \ + t = speed_endtime (); \ + \ + MPFR_TMP_FREE (marker); \ + return t; \ + } \ + while (0) + +/* s->size: precision of both input and output + s->xp : Mantissa of first input + s->r : exponent + s->align_xp : sign (1 means positive, 2 means negative) +*/ +#define SPEED_MPFR_FUNC_WITH_EXPONENT(mean_fun) \ + do \ + { \ + unsigned i; \ + mp_ptr wp; \ + double t; \ + mpfr_t w, x; \ + mp_size_t size; \ + MPFR_TMP_DECL (marker); \ + \ + SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN); \ + SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX); \ + MPFR_TMP_MARK (marker); \ + \ + size = (s->size-1)/GMP_NUMB_BITS+1; \ + s->xp[size-1] |= MPFR_LIMB_HIGHBIT; \ + MPFR_TMP_INIT1 (s->xp, x, s->size); \ + MPFR_SET_EXP (x, s->r); \ + if (s->align_xp == 2) MPFR_SET_NEG (x); \ + \ + MPFR_TMP_INIT (wp, w, s->size, size); \ + \ + speed_operand_src (s, s->xp, size); \ + speed_operand_dst (s, wp, size); \ + speed_cache_fill (s); \ + \ + speed_starttime (); \ + i = s->reps; \ + do \ + mean_fun (w, x, MPFR_RNDN); \ + while (--i != 0); \ + t = speed_endtime (); \ + \ + MPFR_TMP_FREE (marker); \ + return t; \ + } \ + while (0) + +/* First we include all the functions we want to tune inside this program. + We can't use the GNU MPFR library since the thresholds are fixed macros. */ + +/* Setup mpfr_exp_2 */ +mpfr_prec_t mpfr_exp_2_threshold; +#undef MPFR_EXP_2_THRESHOLD +#define MPFR_EXP_2_THRESHOLD mpfr_exp_2_threshold +#include "exp_2.c" +static double +speed_mpfr_exp_2 (struct speed_params *s) +{ + SPEED_MPFR_FUNC (mpfr_exp_2); +} + +/* Setup mpfr_exp */ +mpfr_prec_t mpfr_exp_threshold; +#undef MPFR_EXP_THRESHOLD +#define MPFR_EXP_THRESHOLD mpfr_exp_threshold +#include "exp.c" +static double +speed_mpfr_exp (struct speed_params *s) +{ + SPEED_MPFR_FUNC (mpfr_exp); +} + +/* Setup mpfr_sin_cos */ +mpfr_prec_t mpfr_sincos_threshold; +#undef MPFR_SINCOS_THRESHOLD +#define MPFR_SINCOS_THRESHOLD mpfr_sincos_threshold +#include "sin_cos.c" +#include "cos.c" +static double +speed_mpfr_sincos (struct speed_params *s) +{ + SPEED_MPFR_FUNC2 (mpfr_sin_cos); +} + +/* Setup mpfr_mul */ +mpfr_prec_t mpfr_mul_threshold; +#undef MPFR_MUL_THRESHOLD +#define MPFR_MUL_THRESHOLD mpfr_mul_threshold +#include "mul.c" +static double +speed_mpfr_mul (struct speed_params *s) +{ + SPEED_MPFR_OP (mpfr_mul); +} + + + +/************************************************ + * Common functions (inspired by GMP function) * + ************************************************/ +static int +analyze_data (double *dat, int ndat) +{ + double x, min_x; + int j, min_j; + + x = 0.0; + for (j = 0; j < ndat; j++) + if (dat[j] > 0.0) + x += dat[j]; + + min_x = x; + min_j = 0; + + for (j = 0; j < ndat; x -= dat[j], j++) + { + if (x < min_x) + { + min_x = x; + min_j = j; + } + } + return min_j; +} + +#define THRESHOLD_WINDOW 16 +#define THRESHOLD_FINAL_WINDOW 128 +static double +domeasure (mpfr_prec_t *threshold, + double (*func) (struct speed_params *), + mpfr_prec_t p) +{ + struct speed_params s; + mp_size_t size; + double t1, t2, d; + + s.align_xp = s.align_yp = s.align_wp = 64; + s.size = p; + size = (p - 1)/GMP_NUMB_BITS+1; + s.xp = malloc (2*size*sizeof (mp_limb_t)); + if (s.xp == NULL) + { + fprintf (stderr, "Can't allocate memory.\n"); + abort (); + } + mpn_random (s.xp, size); + s.yp = s.xp + size; + mpn_random (s.yp, size); + *threshold = MPFR_PREC_MAX; + t1 = speed_measure (func, &s); + if (t1 == -1.0) + { + fprintf (stderr, "Failed to measure function 1!\n"); + abort (); + } + *threshold = 1; + t2 = speed_measure (func, &s); + if (t2 == -1.0) + { + fprintf (stderr, "Failed to measure function 2!\n"); + abort (); + } + free (s.xp); + /* t1 is the time of the first algo (used for low prec) */ + if (t2 >= t1) + d = (t2 - t1) / t2; + else + d = (t2 - t1) / t1; + /* d > 0 if we have to use algo 1. + d < 0 if we have to use algo 2 */ + return d; +} + +/* Performs measures when both the precision and the point of evaluation + shall vary. s.yp is ignored and not initialized. + It assumes that func depends on three thresholds with a boundary of the + form threshold1*x + threshold2*p = some scaling factor, if x<0, + and threshold3*x + threshold2*p = some scaling factor, if x>=0. +*/ +static double +domeasure2 (long int *threshold1, long int *threshold2, long int *threshold3, + double (*func) (struct speed_params *), + mpfr_prec_t p, + mpfr_t x) +{ + struct speed_params s; + mp_size_t size; + double t1, t2, d; + mpfr_t xtmp; + + if (MPFR_IS_SINGULAR (x)) + { + mpfr_fprintf (stderr, "x=%RNf is not a regular number.\n"); + abort (); + } + if (MPFR_IS_NEG (x)) + s.align_xp = 2; + else + s.align_xp = 1; + + s.align_yp = s.align_wp = 64; + s.size = p; + size = (p - 1)/GMP_NUMB_BITS+1; + + mpfr_init2 (xtmp, p); + mpn_random (xtmp->_mpfr_d, size); + xtmp->_mpfr_d[size-1] |= MPFR_LIMB_HIGHBIT; + MPFR_SET_EXP (xtmp, -53); + mpfr_add_ui (xtmp, xtmp, 1, MPFR_RNDN); + mpfr_mul (xtmp, xtmp, x, MPFR_RNDN); /* xtmp = x*(1+perturb) */ + /* where perturb ~ 2^(-53) is */ + /* randomly chosen. */ + s.xp = xtmp->_mpfr_d; + s.r = MPFR_GET_EXP (xtmp); + + *threshold1 = 0; + *threshold2 = 0; + *threshold3 = 0; + t1 = speed_measure (func, &s); + if (t1 == -1.0) + { + fprintf (stderr, "Failed to measure function 1!\n"); + abort (); + } + + if (MPFR_IS_NEG (x)) + *threshold1 = INT_MIN; + else + *threshold3 = INT_MAX; + *threshold2 = INT_MAX; + t2 = speed_measure (func, &s); + if (t2 == -1.0) + { + fprintf (stderr, "Failed to measure function 2!\n"); + abort (); + } + + /* t1 is the time of the first algo (used for low prec) */ + if (t2 >= t1) + d = (t2 - t1) / t2; + else + d = (t2 - t1) / t1; + /* d > 0 if we have to use algo 1. + d < 0 if we have to use algo 2 */ + mpfr_clear (xtmp); + return d; +} + +/* Tune a function with a simple THRESHOLD + The function doesn't depend on another threshold. + It assumes that it uses algo1 if p < THRESHOLD + and algo2 otherwise. + if algo2 is better for low prec, and algo1 better for high prec, + the behaviour of this function is undefined. */ +static void +tune_simple_func (mpfr_prec_t *threshold, + double (*func) (struct speed_params *), + mpfr_prec_t pstart) +{ + double measure[THRESHOLD_FINAL_WINDOW+1]; + double d; + mpfr_prec_t pstep; + int i, numpos, numneg, try; + mpfr_prec_t pmin, pmax, p; + + /* first look for a lower bound within 10% */ + pmin = p = pstart; + d = domeasure (threshold, func, pmin); + if (d < 0.0) + { + if (verbose) + printf ("Oops: even for %lu, algo 2 seems to be faster!\n", + (unsigned long) pmin); + *threshold = MPFR_PREC_MIN; + return; + } + if (d >= 1.00) + for (;;) + { + d = domeasure (threshold, func, pmin); + if (d < 1.00) + break; + p = pmin; + pmin += pmin/2; + } + pmin = p; + for (;;) + { + d = domeasure (threshold, func, pmin); + if (d < 0.10) + break; + pmin += GMP_NUMB_BITS; + } + + /* then look for an upper bound within 20% */ + pmax = pmin * 2; + for (;;) + { + d = domeasure (threshold, func, pmax); + if (d < -0.20) + break; + pmax += pmin / 2; /* don't increase too rapidly */ + } + + /* The threshold is between pmin and pmax. Affine them */ + try = 0; + while ((pmax-pmin) >= THRESHOLD_FINAL_WINDOW) + { + pstep = MAX(MIN(GMP_NUMB_BITS/2,(pmax-pmin)/(2*THRESHOLD_WINDOW)),1); + if (verbose) + printf ("Pmin = %8lu Pmax = %8lu Pstep=%lu\n", pmin, pmax, pstep); + p = (pmin + pmax) / 2; + for (i = numpos = numneg = 0 ; i < THRESHOLD_WINDOW + 1 ; i++) + { + measure[i] = domeasure (threshold, func, + p+(i-THRESHOLD_WINDOW/2)*pstep); + if (measure[i] > 0) + numpos ++; + else if (measure[i] < 0) + numneg ++; + } + if (numpos > numneg) + /* We use more often algo 1 than algo 2 */ + pmin = p - THRESHOLD_WINDOW/2*pstep; + else if (numpos < numneg) + pmax = p + THRESHOLD_WINDOW/2*pstep; + else + /* numpos == numneg ... */ + if (++ try > 2) + { + *threshold = p; + if (verbose) + printf ("Quick find: %lu\n", *threshold); + return ; + } + } + + /* Final tune... */ + if (verbose) + printf ("Finalizing in [%lu, %lu]... ", pmin, pmax); + for (i = 0 ; i < THRESHOLD_FINAL_WINDOW+1 ; i++) + measure[i] = domeasure (threshold, func, pmin+i); + i = analyze_data (measure, THRESHOLD_FINAL_WINDOW+1); + *threshold = pmin + i; + if (verbose) + printf ("%lu\n", *threshold); + return; +} + +/* Tune a function which behavior depends on both p and x, + in a given direction. + It assumes that for (x,p) close to zero, algo1 is used + and algo2 is used when (x,p) is far from zero. + If algo2 is better for low prec, and algo1 better for high prec, + the behaviour of this function is undefined. + This tuning function tries couples (x,p) of the form (ell*dirx, ell*dirp) + until it finds a point on the boundary. It returns ell. + */ +static void +tune_simple_func_in_some_direction (long int *threshold1, + long int *threshold2, + long int *threshold3, + double (*func) (struct speed_params *), + mpfr_prec_t pstart, + int dirx, int dirp, + mpfr_t xres, mpfr_prec_t *pres) +{ + double measure[THRESHOLD_FINAL_WINDOW+1]; + double d; + mpfr_prec_t pstep; + int i, numpos, numneg, try; + mpfr_prec_t pmin, pmax, p; + mpfr_t xmin, xmax, x; + mpfr_t ratio; + + mpfr_init2 (ratio, MPFR_SMALL_PRECISION); + mpfr_set_si (ratio, dirx, MPFR_RNDN); + mpfr_div_si (ratio, ratio, dirp, MPFR_RNDN); + + mpfr_init2 (xmin, MPFR_SMALL_PRECISION); + mpfr_init2 (xmax, MPFR_SMALL_PRECISION); + mpfr_init2 (x, MPFR_SMALL_PRECISION); + + /* first look for a lower bound within 10% */ + pmin = p = pstart; + mpfr_mul_ui (xmin, ratio, (unsigned int)pmin, MPFR_RNDN); + mpfr_set (x, xmin, MPFR_RNDN); + + d = domeasure2 (threshold1, threshold2, threshold3, func, pmin, xmin); + if (d < 0.0) + { + if (verbose) + printf ("Oops: even for %lu, algo 2 seems to be faster!\n", + (unsigned long) pmin); + *pres = MPFR_PREC_MIN; + mpfr_mul_ui (xres, ratio, (unsigned int)*pres, MPFR_RNDN); + mpfr_clear (ratio); mpfr_clear (x); mpfr_clear (xmin); mpfr_clear (xmax); + return; + } + if (d >= 1.00) + for (;;) + { + d = domeasure2 (threshold1, threshold2, threshold3, func, pmin, xmin); + if (d < 1.00) + break; + p = pmin; + mpfr_set (x, xmin, MPFR_RNDN); + pmin += pmin/2; + mpfr_mul_ui (xmin, ratio, (unsigned int)pmin, MPFR_RNDN); + } + pmin = p; + mpfr_set (xmin, x, MPFR_RNDN); + for (;;) + { + d = domeasure2 (threshold1, threshold2, threshold3, func, pmin, xmin); + if (d < 0.10) + break; + pmin += GMP_NUMB_BITS; + mpfr_mul_ui (xmin, ratio, (unsigned int)pmin, MPFR_RNDN); + } + + /* then look for an upper bound within 20% */ + pmax = pmin * 2; + mpfr_mul_ui (xmax, ratio, (unsigned int)pmax, MPFR_RNDN); + for (;;) + { + d = domeasure2 (threshold1, threshold2, threshold3, func, pmax, xmax); + if (d < -0.20) + break; + pmax += pmin / 2; /* don't increase too rapidly */ + mpfr_mul_ui (xmax, ratio, (unsigned int)pmax, MPFR_RNDN); + } + + /* The threshold is between pmin and pmax. Affine them */ + try = 0; + while ((pmax-pmin) >= THRESHOLD_FINAL_WINDOW) + { + pstep = MAX(MIN(GMP_NUMB_BITS/2,(pmax-pmin)/(2*THRESHOLD_WINDOW)),1); + if (verbose) + printf ("Pmin = %8lu Pmax = %8lu Pstep=%lu\n", pmin, pmax, pstep); + p = (pmin + pmax) / 2; + mpfr_mul_ui (x, ratio, (unsigned int)p, MPFR_RNDN); + for (i = numpos = numneg = 0 ; i < THRESHOLD_WINDOW + 1 ; i++) + { + *pres = p+(i-THRESHOLD_WINDOW/2)*pstep; + mpfr_mul_ui (xres, ratio, (unsigned int)*pres, MPFR_RNDN); + measure[i] = domeasure2 (threshold1, threshold2, threshold3, + func, *pres, xres); + if (measure[i] > 0) + numpos ++; + else if (measure[i] < 0) + numneg ++; + } + if (numpos > numneg) + { + /* We use more often algo 1 than algo 2 */ + pmin = p - THRESHOLD_WINDOW/2*pstep; + mpfr_mul_ui (xmin, ratio, (unsigned int)pmin, MPFR_RNDN); + } + else if (numpos < numneg) + { + pmax = p + THRESHOLD_WINDOW/2*pstep; + mpfr_mul_ui (xmax, ratio, (unsigned int)pmax, MPFR_RNDN); + } + else + /* numpos == numneg ... */ + if (++ try > 2) + { + *pres = p; + mpfr_mul_ui (xres, ratio, (unsigned int)*pres, MPFR_RNDN); + if (verbose) + printf ("Quick find: %lu\n", *pres); + mpfr_clear (ratio); + mpfr_clear (x); mpfr_clear (xmin); mpfr_clear (xmax); + return ; + } + } + + /* Final tune... */ + if (verbose) + printf ("Finalizing in [%lu, %lu]... ", pmin, pmax); + for (i = 0 ; i < THRESHOLD_FINAL_WINDOW+1 ; i++) + { + *pres = pmin+i; + mpfr_mul_ui (xres, ratio, (unsigned int)*pres, MPFR_RNDN); + measure[i] = domeasure2 (threshold1, threshold2, threshold3, + func, *pres, xres); + } + i = analyze_data (measure, THRESHOLD_FINAL_WINDOW+1); + *pres = pmin + i; + mpfr_mul_ui (xres, ratio, (unsigned int)*pres, MPFR_RNDN); + if (verbose) + printf ("%lu\n", *pres); + mpfr_clear (ratio); mpfr_clear (x); mpfr_clear (xmin); mpfr_clear (xmax); + return; +} + +/************************************ + * Tune Mulders' mulhigh function * + ************************************/ +#define TOLERANCE 1.02 +#ifndef MPFR_MULHIGH_SIZE +# define MPFR_MULHIGH_SIZE 1024 +#endif +#ifndef MPFR_SQRHIGH_SIZE +# define MPFR_SQRHIGH_SIZE 1024 +#endif +#define MPFR_MULHIGH_TAB_SIZE MPFR_MULHIGH_SIZE +#define MPFR_SQRHIGH_TAB_SIZE MPFR_SQRHIGH_SIZE +#include "mulders.c" + +static double +speed_mpfr_mulhigh (struct speed_params *s) +{ + SPEED_ROUTINE_MPN_MUL_N (mpfr_mulhigh_n); +} + +static double +speed_mpfr_sqrhigh (struct speed_params *s) +{ + SPEED_ROUTINE_MPN_SQR (mpfr_sqrhigh_n); +} + +#define MAX_STEPS 32 /* maximum number of values of k tried for a given n */ + +/* Tune size N */ +static mp_size_t +tune_mulders_upto (mp_size_t n) +{ + struct speed_params s; + mp_size_t k, kbest, step; + double t, tbest; + MPFR_TMP_DECL (marker); + + if (n == 0) + return -1; + + MPFR_TMP_MARK (marker); + s.align_xp = s.align_yp = s.align_wp = 64; + s.size = n; + s.xp = MPFR_TMP_ALLOC (n*sizeof (mp_limb_t)); + s.yp = MPFR_TMP_ALLOC (n*sizeof (mp_limb_t)); + mpn_random (s.xp, n); + mpn_random (s.yp, n); + + /* Check k == -1, mpn_mul_basecase */ + mulhigh_ktab[n] = -1; + kbest = -1; + tbest = speed_measure (speed_mpfr_mulhigh, &s); + + /* Check k == 0, mpn_mulhigh_n_basecase */ + mulhigh_ktab[n] = 0; + t = speed_measure (speed_mpfr_mulhigh, &s); + if (t * TOLERANCE < tbest) + kbest = 0, tbest = t; + + /* Check Mulders with cutoff point k */ + step = 1 + n / (2 * MAX_STEPS); + for (k = n / 2 + 1 ; k < n ; k += step) + { + mulhigh_ktab[n] = k; + t = speed_measure (speed_mpfr_mulhigh, &s); + if (t * TOLERANCE < tbest) + kbest = k, tbest = t; + } + + mulhigh_ktab[n] = kbest; + + MPFR_TMP_FREE (marker); + return kbest; +} + +/* Tune size N */ +static mp_size_t +tune_sqr_mulders_upto (mp_size_t n) +{ + struct speed_params s; + mp_size_t k, kbest, step; + double t, tbest; + MPFR_TMP_DECL (marker); + + if (n == 0) + return -1; + + MPFR_TMP_MARK (marker); + s.align_xp = s.align_wp = 64; + s.size = n; + s.xp = MPFR_TMP_ALLOC (n*sizeof (mp_limb_t)); + mpn_random (s.xp, n); + + /* Check k == -1, mpn_sqr_basecase */ + sqrhigh_ktab[n] = -1; + kbest = -1; + tbest = speed_measure (speed_mpfr_sqrhigh, &s); + + /* Check k == 0, mpfr_mulhigh_n_basecase */ + sqrhigh_ktab[n] = 0; + t = speed_measure (speed_mpfr_sqrhigh, &s); + if (t * TOLERANCE < tbest) + kbest = 0, tbest = t; + + /* Check Mulders */ + step = 1 + n / (2 * MAX_STEPS); + for (k = n / 2 + 1 ; k < n ; k += step) + { + sqrhigh_ktab[n] = k; + t = speed_measure (speed_mpfr_sqrhigh, &s); + if (t * TOLERANCE < tbest) + kbest = k, tbest = t; + } + + sqrhigh_ktab[n] = kbest; + + MPFR_TMP_FREE (marker); + return kbest; +} + +static void +tune_mulders (FILE *f) +{ + mp_size_t k; + + if (verbose) + printf ("Tuning mpfr_mulhigh_n[%d]", (int) MPFR_MULHIGH_TAB_SIZE); + fprintf (f, "#define MPFR_MULHIGH_TAB \\\n "); + for (k = 0 ; k < MPFR_MULHIGH_TAB_SIZE ; k++) + { + fprintf (f, "%d", (int) tune_mulders_upto (k)); + if (k != MPFR_MULHIGH_TAB_SIZE-1) + fputc (',', f); + if ((k+1) % 16 == 0) + fprintf (f, " \\\n "); + if (verbose) + putchar ('.'); + } + fprintf (f, " \n"); + if (verbose) + putchar ('\n'); +} + +static void +tune_sqr_mulders (FILE *f) +{ + mp_size_t k; + + if (verbose) + printf ("Tuning mpfr_sqrhigh_n[%d]", (int) MPFR_SQRHIGH_TAB_SIZE); + fprintf (f, "#define MPFR_SQRHIGH_TAB \\\n "); + for (k = 0 ; k < MPFR_SQRHIGH_TAB_SIZE ; k++) + { + fprintf (f, "%d", (int) tune_sqr_mulders_upto (k)); + if (k != MPFR_SQRHIGH_TAB_SIZE-1) + fputc (',', f); + if ((k+1) % 16 == 0) + fprintf (f, " \\\n "); + if (verbose) + putchar ('.'); + } + fprintf (f, " \n"); + if (verbose) + putchar ('\n'); +} + +/******************************************************* + * Tuning functions for mpfr_ai * + *******************************************************/ + +long int mpfr_ai_threshold1; +long int mpfr_ai_threshold2; +long int mpfr_ai_threshold3; +#undef MPFR_AI_THRESHOLD1 +#define MPFR_AI_THRESHOLD1 mpfr_ai_threshold1 +#undef MPFR_AI_THRESHOLD2 +#define MPFR_AI_THRESHOLD2 mpfr_ai_threshold2 +#undef MPFR_AI_THRESHOLD3 +#define MPFR_AI_THRESHOLD3 mpfr_ai_threshold3 + +#include "ai.c" + +static double +speed_mpfr_ai (struct speed_params *s) +{ + SPEED_MPFR_FUNC_WITH_EXPONENT (mpfr_ai); +} + + +/******************************************************* + * Tune all the threshold of MPFR * + * Warning: tune the function in their dependent order!* + *******************************************************/ +static void +all (const char *filename) +{ + FILE *f; + time_t start_time, end_time; + struct tm *tp; + mpfr_t x1, x2, x3, tmp1, tmp2; + mpfr_prec_t p1, p2, p3; + + f = fopen (filename, "w"); + if (f == NULL) + { + fprintf (stderr, "Can't open file '%s' for writing.\n", filename); + abort (); + } + + speed_time_init (); + if (verbose) + { + printf ("Using: %s\n", speed_time_string); + printf ("speed_precision %d", speed_precision); + if (speed_unittime == 1.0) + printf (", speed_unittime 1 cycle"); + else + printf (", speed_unittime %.2e secs", speed_unittime); + if (speed_cycletime == 1.0 || speed_cycletime == 0.0) + printf (", CPU freq unknown\n"); + else + printf (", CPU freq %.2f MHz\n\n", 1e-6/speed_cycletime); + } + + time (&start_time); + tp = localtime (&start_time); + fprintf (f, "/* Generated by MPFR's tuneup.c, %d-%02d-%02d, ", + tp->tm_year+1900, tp->tm_mon+1, tp->tm_mday); + +#ifdef __ICC + fprintf (f, "icc %d.%d.%d */\n", __ICC / 100, __ICC / 10 % 10, __ICC % 10); +#elif defined(__GNUC__) +#ifdef __GNUC_PATCHLEVEL__ + fprintf (f, "gcc %d.%d.%d */\n", __GNUC__, __GNUC_MINOR__, + __GNUC_PATCHLEVEL__); +#else + fprintf (f, "gcc %d.%d */\n", __GNUC__, __GNUC_MINOR__); +#endif +#elif defined (__SUNPRO_C) + fprintf (f, "Sun C %d.%d */\n", __SUNPRO_C / 0x100, __SUNPRO_C % 0x100); +#elif defined (__sgi) && defined (_COMPILER_VERSION) + fprintf (f, "MIPSpro C %d.%d.%d */\n", + _COMPILER_VERSION / 100, + _COMPILER_VERSION / 10 % 10, + _COMPILER_VERSION % 10); +#elif defined (__DECC) && defined (__DECC_VER) + fprintf (f, "DEC C %d */\n", __DECC_VER); +#else + fprintf (f, "system compiler */\n"); +#endif + fprintf (f, "\n"); + + /* Tune mulhigh */ + tune_mulders (f); + + /* Tune sqrhigh */ + tune_sqr_mulders (f); + + /* Tune mpfr_mul (threshold is in limbs, but it doesn't matter too much) */ + if (verbose) + printf ("Tuning mpfr_mul...\n"); + tune_simple_func (&mpfr_mul_threshold, speed_mpfr_mul, + 2*GMP_NUMB_BITS+1); + fprintf (f, "#define MPFR_MUL_THRESHOLD %lu /* limbs */\n", + (unsigned long) (mpfr_mul_threshold - 1) / GMP_NUMB_BITS + 1); + + /* Tune mpfr_exp_2 */ + if (verbose) + printf ("Tuning mpfr_exp_2...\n"); + tune_simple_func (&mpfr_exp_2_threshold, speed_mpfr_exp_2, + MPFR_PREC_MIN); + mpfr_exp_2_threshold = MAX (GMP_NUMB_BITS, mpfr_exp_2_threshold); + fprintf (f, "#define MPFR_EXP_2_THRESHOLD %lu /* bits */\n", + (unsigned long) mpfr_exp_2_threshold); + + /* Tune mpfr_exp */ + if (verbose) + printf ("Tuning mpfr_exp...\n"); + tune_simple_func (&mpfr_exp_threshold, speed_mpfr_exp, + MPFR_PREC_MIN+3*GMP_NUMB_BITS); + fprintf (f, "#define MPFR_EXP_THRESHOLD %lu /* bits */\n", + (unsigned long) mpfr_exp_threshold); + + /* Tune mpfr_sin_cos */ + if (verbose) + printf ("Tuning mpfr_sin_cos...\n"); + tune_simple_func (&mpfr_sincos_threshold, speed_mpfr_sincos, + MPFR_PREC_MIN+3*GMP_NUMB_BITS); + fprintf (f, "#define MPFR_SINCOS_THRESHOLD %lu /* bits */\n", + (unsigned long) mpfr_sincos_threshold); + + /* Tune mpfr_ai */ + if (verbose) + printf ("Tuning mpfr_ai...\n"); + mpfr_init2 (x1, MPFR_SMALL_PRECISION); + mpfr_init2 (x2, MPFR_SMALL_PRECISION); + mpfr_init2 (x3, MPFR_SMALL_PRECISION); + mpfr_init2 (tmp1, MPFR_SMALL_PRECISION); + mpfr_init2 (tmp2, MPFR_SMALL_PRECISION); + + tune_simple_func_in_some_direction (&mpfr_ai_threshold1, &mpfr_ai_threshold2, + &mpfr_ai_threshold3, speed_mpfr_ai, + MPFR_PREC_MIN+GMP_NUMB_BITS, + -60, 200, x1, &p1); + tune_simple_func_in_some_direction (&mpfr_ai_threshold1, &mpfr_ai_threshold2, + &mpfr_ai_threshold3, speed_mpfr_ai, + MPFR_PREC_MIN+GMP_NUMB_BITS, + -20, 500, x2, &p2); + tune_simple_func_in_some_direction (&mpfr_ai_threshold1, &mpfr_ai_threshold2, + &mpfr_ai_threshold3, speed_mpfr_ai, + MPFR_PREC_MIN+GMP_NUMB_BITS, + 40, 200, x3, &p3); + + mpfr_mul_ui (tmp1, x2, (unsigned long)p1, MPFR_RNDN); + mpfr_mul_ui (tmp2, x1, (unsigned long)p2, MPFR_RNDN); + mpfr_sub (tmp1, tmp1, tmp2, MPFR_RNDN); + mpfr_div_ui (tmp1, tmp1, MPFR_AI_SCALE, MPFR_RNDN); + + mpfr_set_ui (tmp2, (unsigned long)p1, MPFR_RNDN); + mpfr_sub_ui (tmp2, tmp2, (unsigned long)p2, MPFR_RNDN); + mpfr_div (tmp2, tmp2, tmp1, MPFR_RNDN); + mpfr_ai_threshold1 = mpfr_get_si (tmp2, MPFR_RNDN); + + mpfr_sub (tmp2, x2, x1, MPFR_RNDN); + mpfr_div (tmp2, tmp2, tmp1, MPFR_RNDN); + mpfr_ai_threshold2 = mpfr_get_si (tmp2, MPFR_RNDN); + + mpfr_set_ui (tmp1, (unsigned long)p3, MPFR_RNDN); + mpfr_mul_si (tmp1, tmp1, mpfr_ai_threshold2, MPFR_RNDN); + mpfr_ui_sub (tmp1, MPFR_AI_SCALE, tmp1, MPFR_RNDN); + mpfr_div (tmp1, tmp1, x3, MPFR_RNDN); + mpfr_ai_threshold3 = mpfr_get_si (tmp1, MPFR_RNDN); + + fprintf (f, "#define MPFR_AI_THRESHOLD1 %ld\n", mpfr_ai_threshold1); + fprintf (f, "#define MPFR_AI_THRESHOLD2 %ld\n", mpfr_ai_threshold2); + fprintf (f, "#define MPFR_AI_THRESHOLD3 %ld\n", mpfr_ai_threshold3); + + mpfr_clear (x1); mpfr_clear (x2); mpfr_clear (x3); + mpfr_clear (tmp1); mpfr_clear (tmp2); + + /* End of tuning */ + time (&end_time); + fprintf (f, "/* Tuneup completed successfully, took %ld seconds */\n", + end_time - start_time); + if (verbose) + printf ("Complete (took %ld seconds).\n", end_time - start_time); + + fclose (f); +} + + +/* Main function */ +int main (int argc, char *argv[]) +{ + /* Unbuffered so if output is redirected to a file it isn't lost if the + program is killed part way through. */ + setbuf (stdout, NULL); + setbuf (stderr, NULL); + + verbose = argc > 1; + + if (verbose) + printf ("Tuning MPFR (Coffee time?)...\n"); + + all ("mparam.h"); + + return 0; +} |