summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorpelissip <pelissip@280ebfd0-de03-0410-8827-d642c229c3f4>2005-03-07 11:40:39 +0000
committerpelissip <pelissip@280ebfd0-de03-0410-8827-d642c229c3f4>2005-03-07 11:40:39 +0000
commit521e97255c79f994149ca11670025c620b903705 (patch)
tree683c362f13e3593ba4143091a34205f92b82b958
parentca8aadf5c9ea5351151e8c7425d27388a4d7e5dc (diff)
downloadmpfr-521e97255c79f994149ca11670025c620b903705.tar.gz
Add tune for mpfr_mulhigh
Add --with-mulhigh-size option at configure time. git-svn-id: svn://scm.gforge.inria.fr/svn/mpfr/trunk@3363 280ebfd0-de03-0410-8827-d642c229c3f4
-rw-r--r--Makefile.am2
-rw-r--r--configure.in4
-rw-r--r--mpfr-gmp.h5
-rw-r--r--mpfr-impl.h3
-rw-r--r--mulders.c175
-rw-r--r--tuneup.c105
6 files changed, 139 insertions, 155 deletions
diff --git a/Makefile.am b/Makefile.am
index aa990149d..e6e3e5671 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -7,7 +7,7 @@ include_HEADERS = mpfr.h mpf2mpfr.h
lib_LTLIBRARIES = libmpfr.la
-libmpfr_la_SOURCES = mpfr.h mpf2mpfr.h mpfr-gmp.h mpfr-impl.h mpfr-longlong.h mpfr-test.h log_b2.h exceptions.c extract.c uceil_exp2.c uceil_log2.c ufloor_log2.c add.c add1.c add_one_ulp.c add_ui.c agm.c clear.c cmp.c cmp_abs.c cmp_si.c cmp_ui.c comparisons.c div_2exp.c div_2si.c div_2ui.c div.c div_ui.c dump.c eq.c exp10.c exp2.c exp3.c exp.c frac.c get_d.c get_exp.c get_str.c init.c inp_str.c isinteger.c isinf.c isnan.c isnum.c const_log2.c log.c mul_2exp.c mul_2si.c mul_2ui.c mul.c mul_ui.c neg.c next.c out_str.c const_pi.c pow.c pow_si.c pow_ui.c print_raw.c print_rnd_mode.c random2.c random.c reldiff.c round_prec.c set.c setmax.c setmin.c set_d.c set_dfl_prec.c set_exp.c set_rnd.c set_f.c set_prc_raw.c set_prec.c set_q.c set_si.c set_str.c set_str_raw.c set_ui.c set_z.c sqrt.c sqrt_ui.c sub.c sub1.c sub_one_ulp.c sub_ui.c rint.c ui_div.c ui_sub.c urandomb.c get_z_exp.c swap.c factorial.c cosh.c sinh.c tanh.c acosh.c asinh.c atanh.c atan.c cmp2.c exp_2.c asin.c const_euler.c cos.c sin.c tan.c fma.c hypot.c log1p.c expm1.c log2.c log10.c ui_pow.c ui_pow_ui.c minmax.c dim.c copysign.c gmp_op.c init2.c acos.c sin_cos.c set_nan.c set_inf.c powerof2.c gamma.c set_ld.c get_ld.c cbrt.c volatile.c fits_s.h fits_sshort.c fits_sint.c fits_slong.c fits_u.h fits_ushort.c fits_uint.c fits_ulong.c fits_uintmax.c fits_intmax.c get_si.c get_ui.c zeta.c cmp_d.c erf.c inits.c inits2.c clears.c sgn.c check.c sub1sp.c version.c mpn_exp.c mpfr-gmp.c mp_clz_tab.c sum.c add1sp.c free_cache.c si_op.c cmp_ld.c set_ui_2exp.c set_si_2exp.c set_uj.c set_sj.c get_sj.c get_uj.c get_z.c iszero.c cache.c sqr.c int_ceil_log2.c isqrt.c strtofr.c pow_z.c logging.c
+libmpfr_la_SOURCES = mpfr.h mpf2mpfr.h mpfr-gmp.h mpfr-impl.h mpfr-longlong.h mpfr-test.h log_b2.h exceptions.c extract.c uceil_exp2.c uceil_log2.c ufloor_log2.c add.c add1.c add_one_ulp.c add_ui.c agm.c clear.c cmp.c cmp_abs.c cmp_si.c cmp_ui.c comparisons.c div_2exp.c div_2si.c div_2ui.c div.c div_ui.c dump.c eq.c exp10.c exp2.c exp3.c exp.c frac.c get_d.c get_exp.c get_str.c init.c inp_str.c isinteger.c isinf.c isnan.c isnum.c const_log2.c log.c mul_2exp.c mul_2si.c mul_2ui.c mul.c mul_ui.c neg.c next.c out_str.c const_pi.c pow.c pow_si.c pow_ui.c print_raw.c print_rnd_mode.c random2.c random.c reldiff.c round_prec.c set.c setmax.c setmin.c set_d.c set_dfl_prec.c set_exp.c set_rnd.c set_f.c set_prc_raw.c set_prec.c set_q.c set_si.c set_str.c set_str_raw.c set_ui.c set_z.c sqrt.c sqrt_ui.c sub.c sub1.c sub_one_ulp.c sub_ui.c rint.c ui_div.c ui_sub.c urandomb.c get_z_exp.c swap.c factorial.c cosh.c sinh.c tanh.c acosh.c asinh.c atanh.c atan.c cmp2.c exp_2.c asin.c const_euler.c cos.c sin.c tan.c fma.c hypot.c log1p.c expm1.c log2.c log10.c ui_pow.c ui_pow_ui.c minmax.c dim.c copysign.c gmp_op.c init2.c acos.c sin_cos.c set_nan.c set_inf.c powerof2.c gamma.c set_ld.c get_ld.c cbrt.c volatile.c fits_s.h fits_sshort.c fits_sint.c fits_slong.c fits_u.h fits_ushort.c fits_uint.c fits_ulong.c fits_uintmax.c fits_intmax.c get_si.c get_ui.c zeta.c cmp_d.c erf.c inits.c inits2.c clears.c sgn.c check.c sub1sp.c version.c mpn_exp.c mpfr-gmp.c mp_clz_tab.c sum.c add1sp.c free_cache.c si_op.c cmp_ld.c set_ui_2exp.c set_si_2exp.c set_uj.c set_sj.c get_sj.c get_uj.c get_z.c iszero.c cache.c sqr.c int_ceil_log2.c isqrt.c strtofr.c pow_z.c logging.c mulders.c
libmpfr_la_LIBADD = @LIBOBJS@
diff --git a/configure.in b/configure.in
index d1c1bef3e..07d6106fb 100644
--- a/configure.in
+++ b/configure.in
@@ -55,6 +55,10 @@ AC_ARG_WITH(irix64,
[ --with-irix64=on/off Irix 32/64 bits support ],
with_irix64=$withval, with_irix64=off)
+AC_ARG_WITH(mulhigh_size,
+ [ --with-mulhigh-size=NUM Internal threshold table for mulhigh],
+ AC_DEFINE_UNQUOTED([MPFR_MULHIGH_SIZE],$withval, [Mulhigh size]))
+
AC_ARG_ENABLE(assert,
[ --enable-assert enable ASSERT checking [[default=no]]],
[ case $enableval in
diff --git a/mpfr-gmp.h b/mpfr-gmp.h
index 6ded1fb85..d01e62d13 100644
--- a/mpfr-gmp.h
+++ b/mpfr-gmp.h
@@ -127,10 +127,13 @@ extern "C" {
#define MPN_SAME_OR_DECR_P(dst, src, size) \
MPN_SAME_OR_DECR2_P(dst, size, src, size)
-/* If sqr_n is not exported, used mpn_mul instead */
+/* If sqr_n or mul_basecase are not exported, used mpn_mul instead */
#ifndef mpn_sqr_n
# define mpn_sqr_n(dst,src,n) mpn_mul((dst),(src),(n),(src),(n))
#endif
+#ifndef mpn_mul_basecase
+# define mpn_mul_basecase(dst,s1,n1,s2,n2) mpn_mul((dst),(s1),(n1),(s2),(n2))
+#endif
/* ASSERT */
__MPFR_DECLSPEC void mpfr_assert_fail _MPFR_PROTO((const char *, int,
diff --git a/mpfr-impl.h b/mpfr-impl.h
index 79fbcdba2..636a9f09e 100644
--- a/mpfr-impl.h
+++ b/mpfr-impl.h
@@ -1286,7 +1286,8 @@ __MPFR_DECLSPEC int mpfr_const_pi_internal _MPFR_PROTO ((mpfr_ptr,mp_rnd_t));
__MPFR_DECLSPEC int mpfr_const_log2_internal _MPFR_PROTO((mpfr_ptr,mp_rnd_t));
__MPFR_DECLSPEC int mpfr_const_euler_internal _MPFR_PROTO((mpfr_ptr,
mp_rnd_t));
-
+__MPFR_DECLSPEC void mpfr_mulhigh_n _MPFR_PROTO ((mp_ptr, mp_srcptr,
+ mp_srcptr, mp_size_t));
#if defined (__cplusplus)
}
#endif
diff --git a/mulders.c b/mulders.c
index 2b960eed6..c0323d677 100644
--- a/mulders.c
+++ b/mulders.c
@@ -1,25 +1,34 @@
-#include <stdio.h>
-#include <stdlib.h>
+/* Mulder's MulHigh function
+
+Copyright 2005 Free Software Foundation.
-#if 0
+This file is part of the MPFR Library.
-#define MPFR_NEED_LONGLONG_H
-#include "mpfr-impl.h"
+The MPFR Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
-#define MPFR_MUL_BASECASE_THREEHOLD 5
-#define MPFR_MULHIGH_TAB_SIZE (sizeof(mulhigh_ktab) / sizeof(mulhigh_ktab[0]))
-static short mulhigh_ktab[] = {MPFR_MULHIGH_TAB};
+The MPFR Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
-#else
+You should have received a copy of the GNU Lesser General Public License
+along with the MPFR Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
-#include "gmp.h"
-#include "gmp-impl.h"
-#include "longlong.h"
-#define MPFR_MUL_BASECASE_THREEHOLD 5
-#define MPFR_MULHIGH_TAB_SIZE 1024
-static short mulhigh_ktab[MPFR_MULHIGH_TAB_SIZE];
+#define MPFR_NEED_LONGLONG_H
+#include "mpfr-impl.h"
+/* Don't MPFR_MULHIGH_SIZE since it is handled by tuneup */
+#ifdef MPFR_MULHIGH_TAB_SIZE
+static short mulhigh_ktab[MPFR_MULHIGH_TAB_SIZE];
+#else
+static short mulhigh_ktab[] = {MPFR_MULHIGH_TAB};
+#define MPFR_MULHIGH_TAB_SIZE (sizeof(mulhigh_ktab) / sizeof(mulhigh_ktab[0]))
#endif
@@ -42,8 +51,7 @@ mpfr_mulhigh_n (mp_ptr rp, mp_srcptr np, mp_srcptr mp, mp_size_t n)
{
mp_size_t k;
- k = __builtin_expect (n < MPFR_MULHIGH_TAB_SIZE, 1)
- ? mulhigh_ktab[n] : 2*n/3;
+ k = MPFR_LIKELY (n < MPFR_MULHIGH_TAB_SIZE) ? mulhigh_ktab[n] : 2*n/3;
if (k < 0)
mpn_mul_basecase (rp, np, n, mp, n);
else if (k == 0)
@@ -62,140 +70,11 @@ mpfr_mulhigh_n (mp_ptr rp, mp_srcptr np, mp_srcptr mp, mp_size_t n)
}
}
-
-#if 1 /* Tune program */
-
-#include "timming.h"
-
-#define MAX_BASECASE_THREEHOLD 500
-#define TOLERANCE 102/100
-
-/* Tune: to improve */
-mp_size_t
-find_best_k (mp_size_t n)
-{
- mp_limb_t a[n], b[n], c[2*n];
- mp_size_t k, kbest;
- unsigned long long t, tbest;
-
- if (n == 0)
- return -1;
-
- /* Amelioration:
- Si n > 32
- Recherche min, max dans [n-33,n-1]
- max = MAX(BITS_PER_MP_LIMB+max,n)
- Recherche entre min et max
- Marche pas pour P4 (Trop chaotique!) */
-
- CALCUL_OVERHEAD;
- mpn_random (a, n);
- mpn_random (b, n);
-
- /* Check k == -1, mpn_mul_base_basecase */
- mulhigh_ktab[n] = -1;
- kbest = -1;
- tbest = MEASURE (mpfr_mulhigh_n (c, a, b, n) );
-
- /* Check k == 0, mpn_mulhigh_basecase */
- mulhigh_ktab[n] = 0;
- t = MEASURE (mpfr_mulhigh_n (c, a, b, n) );
- if (t * TOLERANCE < tbest)
- kbest = 0, tbest = t;
-
- /* Check Mulder */
- for (k = (n+1)/2 ; k < n ; k++) {
- mulhigh_ktab[n] = k;
- t = MEASURE (mpfr_mulhigh_n (c, a, b, n));
- if (t *TOLERANCE < tbest)
- kbest = k, tbest = t;
- }
- return kbest;
-}
-
-void
-tune (mp_size_t n)
-{
- /* Find ThreeHold */
- mp_size_t k;
- for (k = 0 ; k <= n ; k++) {
- mulhigh_ktab[k] = find_best_k (k);
- printf ("%d, ", mulhigh_ktab[k]);
- fflush (stdout);
- }
- putchar ('\n');
-}
-
-
-int
-main (int argc, const char *argv[])
-{
- mp_limb_t *a, *b, *c, *r;
- mp_size_t an, bn, cn, size, size_end, size_step;
- int i;
- unsigned long long t1, t2, t3;
-
- printf ("%s [SIZE_START [SIZE_END [SIZE_STEP]]]\n", argv[0]);
-
- size = 15;
- if (argc >= 2)
- size = atol (argv[1]);
- size_end = size;
- if (argc >= 3)
- size_end = atol (argv[2]);
- size_step = 1;
- if (argc >= 4)
- size_step = atol (argv[3]);
-
- printf ("Tune in progress...\n");
- tune (size_end);
-
- for ( ; size <= size_end ; size += size_step) {
- printf ("Size= %4u ", size);
-
- a = malloc (sizeof(mp_limb_t)*size);
- b = malloc (sizeof(mp_limb_t)*size);
- c = malloc (sizeof(mp_limb_t)*size*3);
- r = malloc (sizeof(mp_limb_t)*size*3);
-
- CALCUL_OVERHEAD;
- mpn_random (a, size);
- mpn_random (b, size);
-
- t1 = MEASURE (mpfr_mulhigh_n (c, a, b, size));
- printf ("mulhigh_n: %7Lu ", t1);
- t2 = MEASURE (mpn_mul_n (r, a, b, size));
- printf ("mpn_mul_n: %7Lu ", t2);
- t3 = MEASURE (mpfr_mulhigh_n_basecase (c, a, b, size));
- printf ("mulhigh_bc: %7Lu ", t3);
-
- printf ("Ratio: %f %c %c\n", (double) t2 / t1,
- t1 < t2 ? '*' : ' ',
- t1 < t3 ? '$' : ' ' );
-
- if (size < 50 && size == size_end)
- {
- printf ("High: ");
- for (i = 2*size-1 ; i>=size-1 ; i--)
- printf ("%08X ", c[i]);
- printf("\nmuln: ");
- for (i = 2*size-1 ; i>=size-1 ; i--)
- printf ("%08X ", r[i]);
- printf("\n");
- }
- free (a);
- free (b);
- free (c);
- free (r);
- }
-
- return 0;
-}
-#endif
-
#if 0
int mpfr_mul ()
{
+#define MPFR_MUL_BASECASE_THREEHOLD 5
+
/* multiplies two mantissa in temporary allocated space */
b1 = MPFR_LIKELY (bn >= cn)
? mpn_mul (tmp, MPFR_MANT (b), bn, MPFR_MANT (c), cn)
diff --git a/tuneup.c b/tuneup.c
index 3879ee9b0..776934081 100644
--- a/tuneup.c
+++ b/tuneup.c
@@ -79,7 +79,7 @@ int verbose;
We can't use MPFR library since the THRESHOLD can't vary */
/* Setup mpfr_exp_2 */
-mp_prec_t mpfr_exp_2_threshold = MPFR_EXP_2_THRESHOLD;
+mp_prec_t mpfr_exp_2_threshold;
#undef MPFR_EXP_2_THRESHOLD
#define MPFR_EXP_2_THRESHOLD mpfr_exp_2_threshold
#include "exp_2.c"
@@ -88,7 +88,7 @@ static double speed_mpfr_exp_2 (struct speed_params *s) {
}
/* Setup mpfr_exp */
-mp_prec_t mpfr_exp_threshold = MPFR_EXP_THRESHOLD;
+mp_prec_t mpfr_exp_threshold;
#undef MPFR_EXP_THRESHOLD
#define MPFR_EXP_THRESHOLD mpfr_exp_threshold
#include "exp.c"
@@ -134,6 +134,11 @@ static double domeasure (mp_prec_t *threshold,
s.size = p;
size = (p - 1)/BITS_PER_MP_LIMB+1;
s.xp = malloc (size*sizeof (mp_limb_t));
+ if (s.xp == NULL)
+ {
+ fprintf (stderr, "Can't allocate memory.\n");
+ abort ();
+ }
mpn_random (s.xp, size);
*threshold = MPFR_PREC_MAX;
t1 = speed_measure (func, &s);
@@ -238,10 +243,99 @@ tune_simple_func (mp_prec_t *threshold,
i = analyze_data (measure, THRESHOLD_FINAL_WINDOW+1);
*threshold = pmin + i;
if (verbose)
- printf ("Choosing %lu\n", *threshold);
+ printf ("Threshold = %lu\n", *threshold);
return;
}
+
+
+/************************************
+ * Tune Mulder's mulhigh function *
+ ************************************/
+#define TOLERANCE 1.015
+#ifndef MPFR_MULHIGH_SIZE
+# define MPFR_MULHIGH_SIZE 512
+#endif
+#define MPFR_MULHIGH_TAB_SIZE MPFR_MULHIGH_SIZE
+#include "mulders.c"
+
+static double speed_mpfr_mulhigh (struct speed_params *s) {
+ SPEED_ROUTINE_MPN_MUL_N (mpfr_mulhigh_n);
+}
+
+/* Tune size N: to speed up! */
+static mp_size_t
+tune_mulder_upto (mp_size_t n)
+{
+ struct speed_params s;
+ mp_size_t k, kbest;
+ double t, tbest;
+ TMP_DECL (marker);
+
+ if (n == 0)
+ return -1;
+
+ TMP_MARK (marker);
+ s.align_xp = s.align_yp = s.align_wp = 64;
+ s.size = n;
+ s.xp = TMP_ALLOC (n*sizeof (mp_limb_t));
+ s.yp = TMP_ALLOC (n*sizeof (mp_limb_t));
+ mpn_random (s.xp, n);
+ mpn_random (s.yp, n);
+
+ /* Check k == -1, mpn_mul_base_basecase */
+ mulhigh_ktab[n] = -1;
+ kbest = -1;
+ tbest = speed_measure (speed_mpfr_mulhigh, &s);
+
+ /* Check k == 0, mpn_mulhigh_basecase */
+ mulhigh_ktab[n] = 0;
+ t = speed_measure (speed_mpfr_mulhigh, &s);
+ if (t * TOLERANCE < tbest)
+ kbest = 0, tbest = t;
+
+ /* Check Mulder */
+ for (k = (n+1)/2 ; k < n ; k++) {
+ mulhigh_ktab[n] = k;
+ t = speed_measure (speed_mpfr_mulhigh, &s);
+ if (t * TOLERANCE < tbest)
+ kbest = k, tbest = t;
+ }
+
+ mulhigh_ktab[n] = kbest;
+
+ TMP_FREE (marker);
+ return kbest;
+}
+
+static void
+tune_mulder (FILE *f)
+{
+ mp_size_t k;
+
+ if (verbose)
+ printf ("Tuning mpfr_mulhigh_n[%d]", (int) MPFR_MULHIGH_TAB_SIZE);
+ fprintf (f, "#define MPFR_MULHIGH_TAB \\\n ");
+ for (k = 0 ; k < MPFR_MULHIGH_TAB_SIZE ; k++)
+ {
+ fprintf (f, "%d", (int) tune_mulder_upto (k));
+ if (k != MPFR_MULHIGH_TAB_SIZE-1)
+ fputc (',', f);
+ if ((k+1) % 16 == 0)
+ fprintf (f, " \\\n ");
+ if (verbose)
+ putchar ('.');
+ }
+ fprintf (f, " \n");
+ if (verbose)
+ putchar ('\n');
+}
+
+
+
+/**************************************************
+ * Tune all the threshold of MPFR *
+ **************************************************/
static void all (const char *filename)
{
FILE *f;
@@ -291,6 +385,9 @@ static void all (const char *filename)
#endif
fprintf (f, "\n");
+ /* Tune mulhigh */
+ tune_mulder (f);
+
/* Tune mpfr_exp_2 */
if (verbose)
printf ("Tuning mpfr_exp_2...\n");
@@ -329,7 +426,7 @@ int main (int argc, char *argv[])
verbose = argc > 1;
if (verbose)
- printf ("Tuning MPFR. Warning: it may take hours!\n");
+ printf ("Tuning MPFR.\n");
all ("mparam.h");