diff options
author | pelissip <pelissip@280ebfd0-de03-0410-8827-d642c229c3f4> | 2005-03-07 11:40:39 +0000 |
---|---|---|
committer | pelissip <pelissip@280ebfd0-de03-0410-8827-d642c229c3f4> | 2005-03-07 11:40:39 +0000 |
commit | 521e97255c79f994149ca11670025c620b903705 (patch) | |
tree | 683c362f13e3593ba4143091a34205f92b82b958 | |
parent | ca8aadf5c9ea5351151e8c7425d27388a4d7e5dc (diff) | |
download | mpfr-521e97255c79f994149ca11670025c620b903705.tar.gz |
Add tune for mpfr_mulhigh
Add --with-mulhigh-size option at configure time.
git-svn-id: svn://scm.gforge.inria.fr/svn/mpfr/trunk@3363 280ebfd0-de03-0410-8827-d642c229c3f4
-rw-r--r-- | Makefile.am | 2 | ||||
-rw-r--r-- | configure.in | 4 | ||||
-rw-r--r-- | mpfr-gmp.h | 5 | ||||
-rw-r--r-- | mpfr-impl.h | 3 | ||||
-rw-r--r-- | mulders.c | 175 | ||||
-rw-r--r-- | tuneup.c | 105 |
6 files changed, 139 insertions, 155 deletions
diff --git a/Makefile.am b/Makefile.am index aa990149d..e6e3e5671 100644 --- a/Makefile.am +++ b/Makefile.am @@ -7,7 +7,7 @@ include_HEADERS = mpfr.h mpf2mpfr.h lib_LTLIBRARIES = libmpfr.la -libmpfr_la_SOURCES = mpfr.h mpf2mpfr.h mpfr-gmp.h mpfr-impl.h mpfr-longlong.h mpfr-test.h log_b2.h exceptions.c extract.c uceil_exp2.c uceil_log2.c ufloor_log2.c add.c add1.c add_one_ulp.c add_ui.c agm.c clear.c cmp.c cmp_abs.c cmp_si.c cmp_ui.c comparisons.c div_2exp.c div_2si.c div_2ui.c div.c div_ui.c dump.c eq.c exp10.c exp2.c exp3.c exp.c frac.c get_d.c get_exp.c get_str.c init.c inp_str.c isinteger.c isinf.c isnan.c isnum.c const_log2.c log.c mul_2exp.c mul_2si.c mul_2ui.c mul.c mul_ui.c neg.c next.c out_str.c const_pi.c pow.c pow_si.c pow_ui.c print_raw.c print_rnd_mode.c random2.c random.c reldiff.c round_prec.c set.c setmax.c setmin.c set_d.c set_dfl_prec.c set_exp.c set_rnd.c set_f.c set_prc_raw.c set_prec.c set_q.c set_si.c set_str.c set_str_raw.c set_ui.c set_z.c sqrt.c sqrt_ui.c sub.c sub1.c sub_one_ulp.c sub_ui.c rint.c ui_div.c ui_sub.c urandomb.c get_z_exp.c swap.c factorial.c cosh.c sinh.c tanh.c acosh.c asinh.c atanh.c atan.c cmp2.c exp_2.c asin.c const_euler.c cos.c sin.c tan.c fma.c hypot.c log1p.c expm1.c log2.c log10.c ui_pow.c ui_pow_ui.c minmax.c dim.c copysign.c gmp_op.c init2.c acos.c sin_cos.c set_nan.c set_inf.c powerof2.c gamma.c set_ld.c get_ld.c cbrt.c volatile.c fits_s.h fits_sshort.c fits_sint.c fits_slong.c fits_u.h fits_ushort.c fits_uint.c fits_ulong.c fits_uintmax.c fits_intmax.c get_si.c get_ui.c zeta.c cmp_d.c erf.c inits.c inits2.c clears.c sgn.c check.c sub1sp.c version.c mpn_exp.c mpfr-gmp.c mp_clz_tab.c sum.c add1sp.c free_cache.c si_op.c cmp_ld.c set_ui_2exp.c set_si_2exp.c set_uj.c set_sj.c get_sj.c get_uj.c get_z.c iszero.c cache.c sqr.c int_ceil_log2.c isqrt.c strtofr.c pow_z.c logging.c +libmpfr_la_SOURCES = mpfr.h mpf2mpfr.h mpfr-gmp.h mpfr-impl.h mpfr-longlong.h mpfr-test.h log_b2.h exceptions.c extract.c uceil_exp2.c uceil_log2.c ufloor_log2.c add.c add1.c add_one_ulp.c add_ui.c agm.c clear.c cmp.c cmp_abs.c cmp_si.c cmp_ui.c comparisons.c div_2exp.c div_2si.c div_2ui.c div.c div_ui.c dump.c eq.c exp10.c exp2.c exp3.c exp.c frac.c get_d.c get_exp.c get_str.c init.c inp_str.c isinteger.c isinf.c isnan.c isnum.c const_log2.c log.c mul_2exp.c mul_2si.c mul_2ui.c mul.c mul_ui.c neg.c next.c out_str.c const_pi.c pow.c pow_si.c pow_ui.c print_raw.c print_rnd_mode.c random2.c random.c reldiff.c round_prec.c set.c setmax.c setmin.c set_d.c set_dfl_prec.c set_exp.c set_rnd.c set_f.c set_prc_raw.c set_prec.c set_q.c set_si.c set_str.c set_str_raw.c set_ui.c set_z.c sqrt.c sqrt_ui.c sub.c sub1.c sub_one_ulp.c sub_ui.c rint.c ui_div.c ui_sub.c urandomb.c get_z_exp.c swap.c factorial.c cosh.c sinh.c tanh.c acosh.c asinh.c atanh.c atan.c cmp2.c exp_2.c asin.c const_euler.c cos.c sin.c tan.c fma.c hypot.c log1p.c expm1.c log2.c log10.c ui_pow.c ui_pow_ui.c minmax.c dim.c copysign.c gmp_op.c init2.c acos.c sin_cos.c set_nan.c set_inf.c powerof2.c gamma.c set_ld.c get_ld.c cbrt.c volatile.c fits_s.h fits_sshort.c fits_sint.c fits_slong.c fits_u.h fits_ushort.c fits_uint.c fits_ulong.c fits_uintmax.c fits_intmax.c get_si.c get_ui.c zeta.c cmp_d.c erf.c inits.c inits2.c clears.c sgn.c check.c sub1sp.c version.c mpn_exp.c mpfr-gmp.c mp_clz_tab.c sum.c add1sp.c free_cache.c si_op.c cmp_ld.c set_ui_2exp.c set_si_2exp.c set_uj.c set_sj.c get_sj.c get_uj.c get_z.c iszero.c cache.c sqr.c int_ceil_log2.c isqrt.c strtofr.c pow_z.c logging.c mulders.c libmpfr_la_LIBADD = @LIBOBJS@ diff --git a/configure.in b/configure.in index d1c1bef3e..07d6106fb 100644 --- a/configure.in +++ b/configure.in @@ -55,6 +55,10 @@ AC_ARG_WITH(irix64, [ --with-irix64=on/off Irix 32/64 bits support ], with_irix64=$withval, with_irix64=off) +AC_ARG_WITH(mulhigh_size, + [ --with-mulhigh-size=NUM Internal threshold table for mulhigh], + AC_DEFINE_UNQUOTED([MPFR_MULHIGH_SIZE],$withval, [Mulhigh size])) + AC_ARG_ENABLE(assert, [ --enable-assert enable ASSERT checking [[default=no]]], [ case $enableval in diff --git a/mpfr-gmp.h b/mpfr-gmp.h index 6ded1fb85..d01e62d13 100644 --- a/mpfr-gmp.h +++ b/mpfr-gmp.h @@ -127,10 +127,13 @@ extern "C" { #define MPN_SAME_OR_DECR_P(dst, src, size) \ MPN_SAME_OR_DECR2_P(dst, size, src, size) -/* If sqr_n is not exported, used mpn_mul instead */ +/* If sqr_n or mul_basecase are not exported, used mpn_mul instead */ #ifndef mpn_sqr_n # define mpn_sqr_n(dst,src,n) mpn_mul((dst),(src),(n),(src),(n)) #endif +#ifndef mpn_mul_basecase +# define mpn_mul_basecase(dst,s1,n1,s2,n2) mpn_mul((dst),(s1),(n1),(s2),(n2)) +#endif /* ASSERT */ __MPFR_DECLSPEC void mpfr_assert_fail _MPFR_PROTO((const char *, int, diff --git a/mpfr-impl.h b/mpfr-impl.h index 79fbcdba2..636a9f09e 100644 --- a/mpfr-impl.h +++ b/mpfr-impl.h @@ -1286,7 +1286,8 @@ __MPFR_DECLSPEC int mpfr_const_pi_internal _MPFR_PROTO ((mpfr_ptr,mp_rnd_t)); __MPFR_DECLSPEC int mpfr_const_log2_internal _MPFR_PROTO((mpfr_ptr,mp_rnd_t)); __MPFR_DECLSPEC int mpfr_const_euler_internal _MPFR_PROTO((mpfr_ptr, mp_rnd_t)); - +__MPFR_DECLSPEC void mpfr_mulhigh_n _MPFR_PROTO ((mp_ptr, mp_srcptr, + mp_srcptr, mp_size_t)); #if defined (__cplusplus) } #endif @@ -1,25 +1,34 @@ -#include <stdio.h> -#include <stdlib.h> +/* Mulder's MulHigh function + +Copyright 2005 Free Software Foundation. -#if 0 +This file is part of the MPFR Library. -#define MPFR_NEED_LONGLONG_H -#include "mpfr-impl.h" +The MPFR Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. -#define MPFR_MUL_BASECASE_THREEHOLD 5 -#define MPFR_MULHIGH_TAB_SIZE (sizeof(mulhigh_ktab) / sizeof(mulhigh_ktab[0])) -static short mulhigh_ktab[] = {MPFR_MULHIGH_TAB}; +The MPFR Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. -#else +You should have received a copy of the GNU Lesser General Public License +along with the MPFR Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ -#include "gmp.h" -#include "gmp-impl.h" -#include "longlong.h" -#define MPFR_MUL_BASECASE_THREEHOLD 5 -#define MPFR_MULHIGH_TAB_SIZE 1024 -static short mulhigh_ktab[MPFR_MULHIGH_TAB_SIZE]; +#define MPFR_NEED_LONGLONG_H +#include "mpfr-impl.h" +/* Don't MPFR_MULHIGH_SIZE since it is handled by tuneup */ +#ifdef MPFR_MULHIGH_TAB_SIZE +static short mulhigh_ktab[MPFR_MULHIGH_TAB_SIZE]; +#else +static short mulhigh_ktab[] = {MPFR_MULHIGH_TAB}; +#define MPFR_MULHIGH_TAB_SIZE (sizeof(mulhigh_ktab) / sizeof(mulhigh_ktab[0])) #endif @@ -42,8 +51,7 @@ mpfr_mulhigh_n (mp_ptr rp, mp_srcptr np, mp_srcptr mp, mp_size_t n) { mp_size_t k; - k = __builtin_expect (n < MPFR_MULHIGH_TAB_SIZE, 1) - ? mulhigh_ktab[n] : 2*n/3; + k = MPFR_LIKELY (n < MPFR_MULHIGH_TAB_SIZE) ? mulhigh_ktab[n] : 2*n/3; if (k < 0) mpn_mul_basecase (rp, np, n, mp, n); else if (k == 0) @@ -62,140 +70,11 @@ mpfr_mulhigh_n (mp_ptr rp, mp_srcptr np, mp_srcptr mp, mp_size_t n) } } - -#if 1 /* Tune program */ - -#include "timming.h" - -#define MAX_BASECASE_THREEHOLD 500 -#define TOLERANCE 102/100 - -/* Tune: to improve */ -mp_size_t -find_best_k (mp_size_t n) -{ - mp_limb_t a[n], b[n], c[2*n]; - mp_size_t k, kbest; - unsigned long long t, tbest; - - if (n == 0) - return -1; - - /* Amelioration: - Si n > 32 - Recherche min, max dans [n-33,n-1] - max = MAX(BITS_PER_MP_LIMB+max,n) - Recherche entre min et max - Marche pas pour P4 (Trop chaotique!) */ - - CALCUL_OVERHEAD; - mpn_random (a, n); - mpn_random (b, n); - - /* Check k == -1, mpn_mul_base_basecase */ - mulhigh_ktab[n] = -1; - kbest = -1; - tbest = MEASURE (mpfr_mulhigh_n (c, a, b, n) ); - - /* Check k == 0, mpn_mulhigh_basecase */ - mulhigh_ktab[n] = 0; - t = MEASURE (mpfr_mulhigh_n (c, a, b, n) ); - if (t * TOLERANCE < tbest) - kbest = 0, tbest = t; - - /* Check Mulder */ - for (k = (n+1)/2 ; k < n ; k++) { - mulhigh_ktab[n] = k; - t = MEASURE (mpfr_mulhigh_n (c, a, b, n)); - if (t *TOLERANCE < tbest) - kbest = k, tbest = t; - } - return kbest; -} - -void -tune (mp_size_t n) -{ - /* Find ThreeHold */ - mp_size_t k; - for (k = 0 ; k <= n ; k++) { - mulhigh_ktab[k] = find_best_k (k); - printf ("%d, ", mulhigh_ktab[k]); - fflush (stdout); - } - putchar ('\n'); -} - - -int -main (int argc, const char *argv[]) -{ - mp_limb_t *a, *b, *c, *r; - mp_size_t an, bn, cn, size, size_end, size_step; - int i; - unsigned long long t1, t2, t3; - - printf ("%s [SIZE_START [SIZE_END [SIZE_STEP]]]\n", argv[0]); - - size = 15; - if (argc >= 2) - size = atol (argv[1]); - size_end = size; - if (argc >= 3) - size_end = atol (argv[2]); - size_step = 1; - if (argc >= 4) - size_step = atol (argv[3]); - - printf ("Tune in progress...\n"); - tune (size_end); - - for ( ; size <= size_end ; size += size_step) { - printf ("Size= %4u ", size); - - a = malloc (sizeof(mp_limb_t)*size); - b = malloc (sizeof(mp_limb_t)*size); - c = malloc (sizeof(mp_limb_t)*size*3); - r = malloc (sizeof(mp_limb_t)*size*3); - - CALCUL_OVERHEAD; - mpn_random (a, size); - mpn_random (b, size); - - t1 = MEASURE (mpfr_mulhigh_n (c, a, b, size)); - printf ("mulhigh_n: %7Lu ", t1); - t2 = MEASURE (mpn_mul_n (r, a, b, size)); - printf ("mpn_mul_n: %7Lu ", t2); - t3 = MEASURE (mpfr_mulhigh_n_basecase (c, a, b, size)); - printf ("mulhigh_bc: %7Lu ", t3); - - printf ("Ratio: %f %c %c\n", (double) t2 / t1, - t1 < t2 ? '*' : ' ', - t1 < t3 ? '$' : ' ' ); - - if (size < 50 && size == size_end) - { - printf ("High: "); - for (i = 2*size-1 ; i>=size-1 ; i--) - printf ("%08X ", c[i]); - printf("\nmuln: "); - for (i = 2*size-1 ; i>=size-1 ; i--) - printf ("%08X ", r[i]); - printf("\n"); - } - free (a); - free (b); - free (c); - free (r); - } - - return 0; -} -#endif - #if 0 int mpfr_mul () { +#define MPFR_MUL_BASECASE_THREEHOLD 5 + /* multiplies two mantissa in temporary allocated space */ b1 = MPFR_LIKELY (bn >= cn) ? mpn_mul (tmp, MPFR_MANT (b), bn, MPFR_MANT (c), cn) @@ -79,7 +79,7 @@ int verbose; We can't use MPFR library since the THRESHOLD can't vary */ /* Setup mpfr_exp_2 */ -mp_prec_t mpfr_exp_2_threshold = MPFR_EXP_2_THRESHOLD; +mp_prec_t mpfr_exp_2_threshold; #undef MPFR_EXP_2_THRESHOLD #define MPFR_EXP_2_THRESHOLD mpfr_exp_2_threshold #include "exp_2.c" @@ -88,7 +88,7 @@ static double speed_mpfr_exp_2 (struct speed_params *s) { } /* Setup mpfr_exp */ -mp_prec_t mpfr_exp_threshold = MPFR_EXP_THRESHOLD; +mp_prec_t mpfr_exp_threshold; #undef MPFR_EXP_THRESHOLD #define MPFR_EXP_THRESHOLD mpfr_exp_threshold #include "exp.c" @@ -134,6 +134,11 @@ static double domeasure (mp_prec_t *threshold, s.size = p; size = (p - 1)/BITS_PER_MP_LIMB+1; s.xp = malloc (size*sizeof (mp_limb_t)); + if (s.xp == NULL) + { + fprintf (stderr, "Can't allocate memory.\n"); + abort (); + } mpn_random (s.xp, size); *threshold = MPFR_PREC_MAX; t1 = speed_measure (func, &s); @@ -238,10 +243,99 @@ tune_simple_func (mp_prec_t *threshold, i = analyze_data (measure, THRESHOLD_FINAL_WINDOW+1); *threshold = pmin + i; if (verbose) - printf ("Choosing %lu\n", *threshold); + printf ("Threshold = %lu\n", *threshold); return; } + + +/************************************ + * Tune Mulder's mulhigh function * + ************************************/ +#define TOLERANCE 1.015 +#ifndef MPFR_MULHIGH_SIZE +# define MPFR_MULHIGH_SIZE 512 +#endif +#define MPFR_MULHIGH_TAB_SIZE MPFR_MULHIGH_SIZE +#include "mulders.c" + +static double speed_mpfr_mulhigh (struct speed_params *s) { + SPEED_ROUTINE_MPN_MUL_N (mpfr_mulhigh_n); +} + +/* Tune size N: to speed up! */ +static mp_size_t +tune_mulder_upto (mp_size_t n) +{ + struct speed_params s; + mp_size_t k, kbest; + double t, tbest; + TMP_DECL (marker); + + if (n == 0) + return -1; + + TMP_MARK (marker); + s.align_xp = s.align_yp = s.align_wp = 64; + s.size = n; + s.xp = TMP_ALLOC (n*sizeof (mp_limb_t)); + s.yp = TMP_ALLOC (n*sizeof (mp_limb_t)); + mpn_random (s.xp, n); + mpn_random (s.yp, n); + + /* Check k == -1, mpn_mul_base_basecase */ + mulhigh_ktab[n] = -1; + kbest = -1; + tbest = speed_measure (speed_mpfr_mulhigh, &s); + + /* Check k == 0, mpn_mulhigh_basecase */ + mulhigh_ktab[n] = 0; + t = speed_measure (speed_mpfr_mulhigh, &s); + if (t * TOLERANCE < tbest) + kbest = 0, tbest = t; + + /* Check Mulder */ + for (k = (n+1)/2 ; k < n ; k++) { + mulhigh_ktab[n] = k; + t = speed_measure (speed_mpfr_mulhigh, &s); + if (t * TOLERANCE < tbest) + kbest = k, tbest = t; + } + + mulhigh_ktab[n] = kbest; + + TMP_FREE (marker); + return kbest; +} + +static void +tune_mulder (FILE *f) +{ + mp_size_t k; + + if (verbose) + printf ("Tuning mpfr_mulhigh_n[%d]", (int) MPFR_MULHIGH_TAB_SIZE); + fprintf (f, "#define MPFR_MULHIGH_TAB \\\n "); + for (k = 0 ; k < MPFR_MULHIGH_TAB_SIZE ; k++) + { + fprintf (f, "%d", (int) tune_mulder_upto (k)); + if (k != MPFR_MULHIGH_TAB_SIZE-1) + fputc (',', f); + if ((k+1) % 16 == 0) + fprintf (f, " \\\n "); + if (verbose) + putchar ('.'); + } + fprintf (f, " \n"); + if (verbose) + putchar ('\n'); +} + + + +/************************************************** + * Tune all the threshold of MPFR * + **************************************************/ static void all (const char *filename) { FILE *f; @@ -291,6 +385,9 @@ static void all (const char *filename) #endif fprintf (f, "\n"); + /* Tune mulhigh */ + tune_mulder (f); + /* Tune mpfr_exp_2 */ if (verbose) printf ("Tuning mpfr_exp_2...\n"); @@ -329,7 +426,7 @@ int main (int argc, char *argv[]) verbose = argc > 1; if (verbose) - printf ("Tuning MPFR. Warning: it may take hours!\n"); + printf ("Tuning MPFR.\n"); all ("mparam.h"); |