summaryrefslogtreecommitdiff
path: root/tune
diff options
context:
space:
mode:
authorNiels Möller <nisse@lysator.liu.se>2011-11-11 14:59:14 +0100
committerNiels Möller <nisse@lysator.liu.se>2011-11-11 14:59:14 +0100
commit5b0e8651a493b9128594851eff5387bde8081526 (patch)
treea362b2b040a395b18a1841f3e3e2e877ddc53aaa /tune
parenteb453fbaa0a498d2b1bfd05c9a51310da203fd33 (diff)
downloadgmp-5b0e8651a493b9128594851eff5387bde8081526.tar.gz
Tuning of mpn_hgcd_appr and mpn_hgcd_reduce.
Diffstat (limited to 'tune')
-rw-r--r--tune/Makefile.am7
-rw-r--r--tune/common.c16
-rw-r--r--tune/hgcd_reduce_1.c30
-rw-r--r--tune/hgcd_reduce_2.c29
-rw-r--r--tune/speed.c4
-rw-r--r--tune/speed.h64
-rw-r--r--tune/tuneup.c25
7 files changed, 173 insertions, 2 deletions
diff --git a/tune/Makefile.am b/tune/Makefile.am
index e54c020d4..117e5ca2c 100644
--- a/tune/Makefile.am
+++ b/tune/Makefile.am
@@ -43,7 +43,8 @@ libspeed_la_SOURCES = \
common.c divrem1div.c divrem1inv.c divrem2div.c divrem2inv.c \
freq.c \
gcdext_single.c gcdext_double.c gcdextod.c gcdextos.c \
- hgcd_lehmer.c jacbase1.c jacbase2.c jacbase3.c jacbase4.c \
+ hgcd_lehmer.c hgcd_reduce_1.c hgcd_reduce_2.c \
+ jacbase1.c jacbase2.c jacbase3.c jacbase4.c \
mod_1_div.c mod_1_inv.c mod_1_1-1.c mod_1_1-2.c modlinv.c \
noop.c powm_mod.c powm_redc.c pre_divrem_1.c \
set_strb.c set_strs.c set_strp.c time.c
@@ -129,7 +130,9 @@ TUNE_MPN_SRCS = $(TUNE_MPN_SRCS_BASIC) divrem_1.c mod_1.c
TUNE_MPN_SRCS_BASIC = div_qr_2.c bdiv_q.c bdiv_qr.c \
dcpi1_div_qr.c dcpi1_divappr_q.c dcpi1_bdiv_qr.c dcpi1_bdiv_q.c \
invertappr.c invert.c binvert.c divrem_2.c gcd.c gcdext.c \
- get_str.c set_str.c matrix22_mul.c hgcd.c mul_n.c sqr.c \
+ get_str.c set_str.c matrix22_mul.c \
+ hgcd.c hgcd_appr.c hgcd_reduce.c \
+ mul_n.c sqr.c \
mullo_n.c mul_fft.c mul.c tdiv_qr.c mulmod_bnm1.c sqrmod_bnm1.c \
mulmid.c mulmid_n.c toom42_mulmid.c \
nussbaumer_mul.c toom6h_mul.c toom8h_mul.c toom6_sqr.c toom8_sqr.c \
diff --git a/tune/common.c b/tune/common.c
index eb2d4ba1a..cc333a470 100644
--- a/tune/common.c
+++ b/tune/common.c
@@ -1539,6 +1539,22 @@ speed_mpn_hgcd_appr (struct speed_params *s)
}
double
+speed_mpn_hgcd_reduce (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_HGCD_REDUCE_CALL (mpn_hgcd_reduce, mpn_hgcd_reduce_itch);
+}
+double
+speed_mpn_hgcd_reduce_1 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_HGCD_REDUCE_CALL (mpn_hgcd_reduce_1, mpn_hgcd_reduce_1_itch);
+}
+double
+speed_mpn_hgcd_reduce_2 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_HGCD_REDUCE_CALL (mpn_hgcd_reduce_2, mpn_hgcd_reduce_2_itch);
+}
+
+double
speed_mpn_gcd (struct speed_params *s)
{
SPEED_ROUTINE_MPN_GCD (mpn_gcd);
diff --git a/tune/hgcd_reduce_1.c b/tune/hgcd_reduce_1.c
new file mode 100644
index 000000000..996362414
--- /dev/null
+++ b/tune/hgcd_reduce_1.c
@@ -0,0 +1,30 @@
+/* mpn/generic/hgcd_reduce.c forced to use hgcd. */
+
+/*
+Copyright 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#undef HGCD_REDUCE_THRESHOLD
+#define HGCD_REDUCE_THRESHOLD MP_SIZE_T_MAX
+#define __gmpn_hgcd_reduce mpn_hgcd_reduce_1
+#define __gmpn_hgcd_reduce_itch mpn_hgcd_reduce_1_itch
+
+
+#include "../mpn/generic/hgcd_reduce.c"
diff --git a/tune/hgcd_reduce_2.c b/tune/hgcd_reduce_2.c
new file mode 100644
index 000000000..1eed4ba11
--- /dev/null
+++ b/tune/hgcd_reduce_2.c
@@ -0,0 +1,29 @@
+/* mpn/generic/hgcd_reduce.c forced to use hgcd_appr. */
+
+/*
+Copyright 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#undef HGCD_REDUCE_THRESHOLD
+#define HGCD_REDUCE_THRESHOLD 0
+#define __gmpn_hgcd_reduce mpn_hgcd_reduce_2
+#define __gmpn_hgcd_reduce_itch mpn_hgcd_reduce_2_itch
+
+#include "../mpn/generic/hgcd_reduce.c"
diff --git a/tune/speed.c b/tune/speed.c
index 061517e28..08c13e776 100644
--- a/tune/speed.c
+++ b/tune/speed.c
@@ -279,6 +279,10 @@ const struct routine_t {
{ "mpn_hgcd_lehmer", speed_mpn_hgcd_lehmer },
{ "mpn_hgcd_appr", speed_mpn_hgcd_appr },
+ { "mpn_hgcd_reduce", speed_mpn_hgcd_reduce },
+ { "mpn_hgcd_reduce_1", speed_mpn_hgcd_reduce_1 },
+ { "mpn_hgcd_reduce_2", speed_mpn_hgcd_reduce_2 },
+
{ "mpn_gcd_1", speed_mpn_gcd_1, FLAG_R_OPTIONAL },
{ "mpn_gcd_1N", speed_mpn_gcd_1N, FLAG_R_OPTIONAL },
diff --git a/tune/speed.h b/tune/speed.h
index 70484d391..5add58720 100644
--- a/tune/speed.h
+++ b/tune/speed.h
@@ -198,6 +198,9 @@ double speed_mpn_matrix22_mul __GMP_PROTO ((struct speed_params *s));
double speed_mpn_hgcd __GMP_PROTO ((struct speed_params *s));
double speed_mpn_hgcd_lehmer __GMP_PROTO ((struct speed_params *s));
double speed_mpn_hgcd_appr __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_hgcd_reduce __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_hgcd_reduce_1 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_hgcd_reduce_2 __GMP_PROTO ((struct speed_params *s));
double speed_mpn_gcd __GMP_PROTO ((struct speed_params *s));
double speed_mpn_gcd_1 __GMP_PROTO ((struct speed_params *s));
double speed_mpn_gcd_1N __GMP_PROTO ((struct speed_params *s));
@@ -488,6 +491,16 @@ mp_size_t mpn_hgcd_lehmer
__GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, mp_ptr));
#define MPN_HGCD_LEHMER_ITCH(n) (n)
+mp_size_t mpn_hgcd_reduce_1
+ __GMP_PROTO ((struct hgcd_matrix *, mp_ptr, mp_ptr, mp_size_t, mp_size_t, mp_ptr));
+mp_size_t mpn_hgcd_reduce_1_itch
+ __GMP_PROTO ((mp_size_t, mp_size_t));
+
+mp_size_t mpn_hgcd_reduce_2
+ __GMP_PROTO ((struct hgcd_matrix *, mp_ptr, mp_ptr, mp_size_t, mp_size_t, mp_ptr));
+mp_size_t mpn_hgcd_reduce_2_itch
+ __GMP_PROTO ((mp_size_t, mp_size_t));
+
mp_limb_t mpn_sb_divrem_mn_div __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t));
mp_limb_t mpn_sb_divrem_mn_inv __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t));
@@ -2706,6 +2719,57 @@ int speed_routine_count_zeros_setup
return t; \
}
+#define SPEED_ROUTINE_MPN_HGCD_REDUCE_CALL(func, itchfunc) \
+ { \
+ mp_size_t hgcd_init_itch, hgcd_step_itch; \
+ mp_ptr ap, bp, wp, tmp1; \
+ struct hgcd_matrix hgcd; \
+ mp_size_t p = s->size/2; \
+ int res; \
+ unsigned i; \
+ double t; \
+ TMP_DECL; \
+ \
+ if (s->size < 2) \
+ return -1; \
+ \
+ TMP_MARK; \
+ \
+ SPEED_TMP_ALLOC_LIMBS (ap, s->size + 1, s->align_xp); \
+ SPEED_TMP_ALLOC_LIMBS (bp, s->size + 1, s->align_yp); \
+ \
+ s->xp[s->size - 1] |= 1; \
+ s->yp[s->size - 1] |= 1; \
+ \
+ hgcd_init_itch = MPN_HGCD_MATRIX_INIT_ITCH (s->size); \
+ hgcd_step_itch = itchfunc (s->size, p); \
+ \
+ SPEED_TMP_ALLOC_LIMBS (tmp1, hgcd_init_itch, s->align_wp); \
+ SPEED_TMP_ALLOC_LIMBS (wp, hgcd_step_itch, s->align_wp); \
+ \
+ speed_operand_src (s, s->xp, s->size); \
+ speed_operand_src (s, s->yp, s->size); \
+ speed_operand_dst (s, ap, s->size + 1); \
+ speed_operand_dst (s, bp, s->size + 1); \
+ speed_operand_dst (s, wp, hgcd_step_itch); \
+ speed_operand_dst (s, tmp1, hgcd_init_itch); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ { \
+ MPN_COPY (ap, s->xp, s->size); \
+ MPN_COPY (bp, s->yp, s->size); \
+ mpn_hgcd_matrix_init (&hgcd, s->size, tmp1); \
+ res = func (&hgcd, ap, bp, s->size, p, wp); \
+ } \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ TMP_FREE; \
+ return t; \
+ }
+
/* Run some GCDs of s->size limbs each. The number of different data values
is decreased as s->size**2, since GCD is a quadratic algorithm.
SPEED_ROUTINE_MPN_GCD runs more times than SPEED_ROUTINE_MPN_GCDEXT
diff --git a/tune/tuneup.c b/tune/tuneup.c
index 4f53c979c..c62a25362 100644
--- a/tune/tuneup.c
+++ b/tune/tuneup.c
@@ -195,6 +195,8 @@ mp_size_t redc_2_to_redc_n_threshold = MP_SIZE_T_MAX;
mp_size_t powm_threshold = MP_SIZE_T_MAX;
mp_size_t matrix22_strassen_threshold = MP_SIZE_T_MAX;
mp_size_t hgcd_threshold = MP_SIZE_T_MAX;
+mp_size_t hgcd_appr_threshold = MP_SIZE_T_MAX;
+mp_size_t hgcd_reduce_threshold = MP_SIZE_T_MAX;
mp_size_t gcd_accel_threshold = MP_SIZE_T_MAX;
mp_size_t gcd_dc_threshold = MP_SIZE_T_MAX;
mp_size_t gcdext_dc_threshold = MP_SIZE_T_MAX;
@@ -1755,6 +1757,27 @@ tune_hgcd (void)
}
void
+tune_hgcd_appr (void)
+{
+ static struct param_t param;
+ param.name = "HGCD_APPR_THRESHOLD";
+ param.function = speed_mpn_hgcd_appr;
+ /* We seem to get strange results for small sizes */
+ param.min_size = 30;
+ one (&hgcd_appr_threshold, &param);
+}
+
+void
+tune_hgcd_reduce (void)
+{
+ static struct param_t param;
+ param.name = "HGCD_REDUCE_THRESHOLD";
+ param.function = speed_mpn_hgcd_reduce;
+ param.min_size = 30;
+ one (&hgcd_reduce_threshold, &param);
+}
+
+void
tune_gcd_dc (void)
{
static struct param_t param;
@@ -2579,6 +2602,8 @@ all (void)
tune_matrix22_mul ();
tune_hgcd ();
+ tune_hgcd_appr ();
+ tune_hgcd_reduce();
tune_gcd_dc ();
tune_gcdext_dc ();
tune_jacobi_base ();