diff options
author | Niels Möller <nisse@lysator.liu.se> | 2011-11-11 14:59:14 +0100 |
---|---|---|
committer | Niels Möller <nisse@lysator.liu.se> | 2011-11-11 14:59:14 +0100 |
commit | 5b0e8651a493b9128594851eff5387bde8081526 (patch) | |
tree | a362b2b040a395b18a1841f3e3e2e877ddc53aaa /tune | |
parent | eb453fbaa0a498d2b1bfd05c9a51310da203fd33 (diff) | |
download | gmp-5b0e8651a493b9128594851eff5387bde8081526.tar.gz |
Tuning of mpn_hgcd_appr and mpn_hgcd_reduce.
Diffstat (limited to 'tune')
-rw-r--r-- | tune/Makefile.am | 7 | ||||
-rw-r--r-- | tune/common.c | 16 | ||||
-rw-r--r-- | tune/hgcd_reduce_1.c | 30 | ||||
-rw-r--r-- | tune/hgcd_reduce_2.c | 29 | ||||
-rw-r--r-- | tune/speed.c | 4 | ||||
-rw-r--r-- | tune/speed.h | 64 | ||||
-rw-r--r-- | tune/tuneup.c | 25 |
7 files changed, 173 insertions, 2 deletions
diff --git a/tune/Makefile.am b/tune/Makefile.am index e54c020d4..117e5ca2c 100644 --- a/tune/Makefile.am +++ b/tune/Makefile.am @@ -43,7 +43,8 @@ libspeed_la_SOURCES = \ common.c divrem1div.c divrem1inv.c divrem2div.c divrem2inv.c \ freq.c \ gcdext_single.c gcdext_double.c gcdextod.c gcdextos.c \ - hgcd_lehmer.c jacbase1.c jacbase2.c jacbase3.c jacbase4.c \ + hgcd_lehmer.c hgcd_reduce_1.c hgcd_reduce_2.c \ + jacbase1.c jacbase2.c jacbase3.c jacbase4.c \ mod_1_div.c mod_1_inv.c mod_1_1-1.c mod_1_1-2.c modlinv.c \ noop.c powm_mod.c powm_redc.c pre_divrem_1.c \ set_strb.c set_strs.c set_strp.c time.c @@ -129,7 +130,9 @@ TUNE_MPN_SRCS = $(TUNE_MPN_SRCS_BASIC) divrem_1.c mod_1.c TUNE_MPN_SRCS_BASIC = div_qr_2.c bdiv_q.c bdiv_qr.c \ dcpi1_div_qr.c dcpi1_divappr_q.c dcpi1_bdiv_qr.c dcpi1_bdiv_q.c \ invertappr.c invert.c binvert.c divrem_2.c gcd.c gcdext.c \ - get_str.c set_str.c matrix22_mul.c hgcd.c mul_n.c sqr.c \ + get_str.c set_str.c matrix22_mul.c \ + hgcd.c hgcd_appr.c hgcd_reduce.c \ + mul_n.c sqr.c \ mullo_n.c mul_fft.c mul.c tdiv_qr.c mulmod_bnm1.c sqrmod_bnm1.c \ mulmid.c mulmid_n.c toom42_mulmid.c \ nussbaumer_mul.c toom6h_mul.c toom8h_mul.c toom6_sqr.c toom8_sqr.c \ diff --git a/tune/common.c b/tune/common.c index eb2d4ba1a..cc333a470 100644 --- a/tune/common.c +++ b/tune/common.c @@ -1539,6 +1539,22 @@ speed_mpn_hgcd_appr (struct speed_params *s) } double +speed_mpn_hgcd_reduce (struct speed_params *s) +{ + SPEED_ROUTINE_MPN_HGCD_REDUCE_CALL (mpn_hgcd_reduce, mpn_hgcd_reduce_itch); +} +double +speed_mpn_hgcd_reduce_1 (struct speed_params *s) +{ + SPEED_ROUTINE_MPN_HGCD_REDUCE_CALL (mpn_hgcd_reduce_1, mpn_hgcd_reduce_1_itch); +} +double +speed_mpn_hgcd_reduce_2 (struct speed_params *s) +{ + SPEED_ROUTINE_MPN_HGCD_REDUCE_CALL (mpn_hgcd_reduce_2, mpn_hgcd_reduce_2_itch); +} + +double speed_mpn_gcd (struct speed_params *s) { SPEED_ROUTINE_MPN_GCD (mpn_gcd); diff --git a/tune/hgcd_reduce_1.c b/tune/hgcd_reduce_1.c new file mode 100644 index 000000000..996362414 --- /dev/null +++ b/tune/hgcd_reduce_1.c @@ -0,0 +1,30 @@ +/* mpn/generic/hgcd_reduce.c forced to use hgcd. */ + +/* +Copyright 2010 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ + +#include "gmp.h" +#include "gmp-impl.h" + +#undef HGCD_REDUCE_THRESHOLD +#define HGCD_REDUCE_THRESHOLD MP_SIZE_T_MAX +#define __gmpn_hgcd_reduce mpn_hgcd_reduce_1 +#define __gmpn_hgcd_reduce_itch mpn_hgcd_reduce_1_itch + + +#include "../mpn/generic/hgcd_reduce.c" diff --git a/tune/hgcd_reduce_2.c b/tune/hgcd_reduce_2.c new file mode 100644 index 000000000..1eed4ba11 --- /dev/null +++ b/tune/hgcd_reduce_2.c @@ -0,0 +1,29 @@ +/* mpn/generic/hgcd_reduce.c forced to use hgcd_appr. */ + +/* +Copyright 2010 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ + +#include "gmp.h" +#include "gmp-impl.h" + +#undef HGCD_REDUCE_THRESHOLD +#define HGCD_REDUCE_THRESHOLD 0 +#define __gmpn_hgcd_reduce mpn_hgcd_reduce_2 +#define __gmpn_hgcd_reduce_itch mpn_hgcd_reduce_2_itch + +#include "../mpn/generic/hgcd_reduce.c" diff --git a/tune/speed.c b/tune/speed.c index 061517e28..08c13e776 100644 --- a/tune/speed.c +++ b/tune/speed.c @@ -279,6 +279,10 @@ const struct routine_t { { "mpn_hgcd_lehmer", speed_mpn_hgcd_lehmer }, { "mpn_hgcd_appr", speed_mpn_hgcd_appr }, + { "mpn_hgcd_reduce", speed_mpn_hgcd_reduce }, + { "mpn_hgcd_reduce_1", speed_mpn_hgcd_reduce_1 }, + { "mpn_hgcd_reduce_2", speed_mpn_hgcd_reduce_2 }, + { "mpn_gcd_1", speed_mpn_gcd_1, FLAG_R_OPTIONAL }, { "mpn_gcd_1N", speed_mpn_gcd_1N, FLAG_R_OPTIONAL }, diff --git a/tune/speed.h b/tune/speed.h index 70484d391..5add58720 100644 --- a/tune/speed.h +++ b/tune/speed.h @@ -198,6 +198,9 @@ double speed_mpn_matrix22_mul __GMP_PROTO ((struct speed_params *s)); double speed_mpn_hgcd __GMP_PROTO ((struct speed_params *s)); double speed_mpn_hgcd_lehmer __GMP_PROTO ((struct speed_params *s)); double speed_mpn_hgcd_appr __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_hgcd_reduce __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_hgcd_reduce_1 __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_hgcd_reduce_2 __GMP_PROTO ((struct speed_params *s)); double speed_mpn_gcd __GMP_PROTO ((struct speed_params *s)); double speed_mpn_gcd_1 __GMP_PROTO ((struct speed_params *s)); double speed_mpn_gcd_1N __GMP_PROTO ((struct speed_params *s)); @@ -488,6 +491,16 @@ mp_size_t mpn_hgcd_lehmer __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, mp_ptr)); #define MPN_HGCD_LEHMER_ITCH(n) (n) +mp_size_t mpn_hgcd_reduce_1 + __GMP_PROTO ((struct hgcd_matrix *, mp_ptr, mp_ptr, mp_size_t, mp_size_t, mp_ptr)); +mp_size_t mpn_hgcd_reduce_1_itch + __GMP_PROTO ((mp_size_t, mp_size_t)); + +mp_size_t mpn_hgcd_reduce_2 + __GMP_PROTO ((struct hgcd_matrix *, mp_ptr, mp_ptr, mp_size_t, mp_size_t, mp_ptr)); +mp_size_t mpn_hgcd_reduce_2_itch + __GMP_PROTO ((mp_size_t, mp_size_t)); + mp_limb_t mpn_sb_divrem_mn_div __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t)); mp_limb_t mpn_sb_divrem_mn_inv __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t)); @@ -2706,6 +2719,57 @@ int speed_routine_count_zeros_setup return t; \ } +#define SPEED_ROUTINE_MPN_HGCD_REDUCE_CALL(func, itchfunc) \ + { \ + mp_size_t hgcd_init_itch, hgcd_step_itch; \ + mp_ptr ap, bp, wp, tmp1; \ + struct hgcd_matrix hgcd; \ + mp_size_t p = s->size/2; \ + int res; \ + unsigned i; \ + double t; \ + TMP_DECL; \ + \ + if (s->size < 2) \ + return -1; \ + \ + TMP_MARK; \ + \ + SPEED_TMP_ALLOC_LIMBS (ap, s->size + 1, s->align_xp); \ + SPEED_TMP_ALLOC_LIMBS (bp, s->size + 1, s->align_yp); \ + \ + s->xp[s->size - 1] |= 1; \ + s->yp[s->size - 1] |= 1; \ + \ + hgcd_init_itch = MPN_HGCD_MATRIX_INIT_ITCH (s->size); \ + hgcd_step_itch = itchfunc (s->size, p); \ + \ + SPEED_TMP_ALLOC_LIMBS (tmp1, hgcd_init_itch, s->align_wp); \ + SPEED_TMP_ALLOC_LIMBS (wp, hgcd_step_itch, s->align_wp); \ + \ + speed_operand_src (s, s->xp, s->size); \ + speed_operand_src (s, s->yp, s->size); \ + speed_operand_dst (s, ap, s->size + 1); \ + speed_operand_dst (s, bp, s->size + 1); \ + speed_operand_dst (s, wp, hgcd_step_itch); \ + speed_operand_dst (s, tmp1, hgcd_init_itch); \ + speed_cache_fill (s); \ + \ + speed_starttime (); \ + i = s->reps; \ + do \ + { \ + MPN_COPY (ap, s->xp, s->size); \ + MPN_COPY (bp, s->yp, s->size); \ + mpn_hgcd_matrix_init (&hgcd, s->size, tmp1); \ + res = func (&hgcd, ap, bp, s->size, p, wp); \ + } \ + while (--i != 0); \ + t = speed_endtime (); \ + TMP_FREE; \ + return t; \ + } + /* Run some GCDs of s->size limbs each. The number of different data values is decreased as s->size**2, since GCD is a quadratic algorithm. SPEED_ROUTINE_MPN_GCD runs more times than SPEED_ROUTINE_MPN_GCDEXT diff --git a/tune/tuneup.c b/tune/tuneup.c index 4f53c979c..c62a25362 100644 --- a/tune/tuneup.c +++ b/tune/tuneup.c @@ -195,6 +195,8 @@ mp_size_t redc_2_to_redc_n_threshold = MP_SIZE_T_MAX; mp_size_t powm_threshold = MP_SIZE_T_MAX; mp_size_t matrix22_strassen_threshold = MP_SIZE_T_MAX; mp_size_t hgcd_threshold = MP_SIZE_T_MAX; +mp_size_t hgcd_appr_threshold = MP_SIZE_T_MAX; +mp_size_t hgcd_reduce_threshold = MP_SIZE_T_MAX; mp_size_t gcd_accel_threshold = MP_SIZE_T_MAX; mp_size_t gcd_dc_threshold = MP_SIZE_T_MAX; mp_size_t gcdext_dc_threshold = MP_SIZE_T_MAX; @@ -1755,6 +1757,27 @@ tune_hgcd (void) } void +tune_hgcd_appr (void) +{ + static struct param_t param; + param.name = "HGCD_APPR_THRESHOLD"; + param.function = speed_mpn_hgcd_appr; + /* We seem to get strange results for small sizes */ + param.min_size = 30; + one (&hgcd_appr_threshold, ¶m); +} + +void +tune_hgcd_reduce (void) +{ + static struct param_t param; + param.name = "HGCD_REDUCE_THRESHOLD"; + param.function = speed_mpn_hgcd_reduce; + param.min_size = 30; + one (&hgcd_reduce_threshold, ¶m); +} + +void tune_gcd_dc (void) { static struct param_t param; @@ -2579,6 +2602,8 @@ all (void) tune_matrix22_mul (); tune_hgcd (); + tune_hgcd_appr (); + tune_hgcd_reduce(); tune_gcd_dc (); tune_gcdext_dc (); tune_jacobi_base (); |