diff options
Diffstat (limited to 'tune')
-rw-r--r-- | tune/Makefile.am | 2 | ||||
-rw-r--r-- | tune/common.c | 5 | ||||
-rw-r--r-- | tune/hgcd2-3.c | 39 | ||||
-rw-r--r-- | tune/speed.c | 1 | ||||
-rw-r--r-- | tune/speed.h | 3 | ||||
-rw-r--r-- | tune/tuneup.c | 46 |
6 files changed, 87 insertions, 9 deletions
diff --git a/tune/Makefile.am b/tune/Makefile.am index 4fa28ed12..bb107833c 100644 --- a/tune/Makefile.am +++ b/tune/Makefile.am @@ -58,7 +58,7 @@ libspeed_la_SOURCES = \ gcdext_single.c gcdext_double.c gcdextod.c gcdextos.c \ hgcd_lehmer.c hgcd_appr_lehmer.c hgcd_reduce_1.c hgcd_reduce_2.c \ jacbase1.c jacbase2.c jacbase3.c jacbase4.c \ - hgcd2-1.c hgcd2-2.c \ + hgcd2-1.c hgcd2-2.c hgcd2-3.c \ mod_1_div.c mod_1_inv.c mod_1_1-1.c mod_1_1-2.c modlinv.c \ noop.c powm_mod.c powm_redc.c pre_divrem_1.c \ set_strb.c set_strs.c set_strp.c time.c diff --git a/tune/common.c b/tune/common.c index da4fb1145..ec7168175 100644 --- a/tune/common.c +++ b/tune/common.c @@ -1648,6 +1648,11 @@ speed_mpn_hgcd2_2 (struct speed_params *s) { SPEED_ROUTINE_MPN_HGCD2 (mpn_hgcd2_2); } +double +speed_mpn_hgcd2_3 (struct speed_params *s) +{ + SPEED_ROUTINE_MPN_HGCD2 (mpn_hgcd2_3); +} double speed_mpn_hgcd (struct speed_params *s) diff --git a/tune/hgcd2-3.c b/tune/hgcd2-3.c new file mode 100644 index 000000000..8027347b6 --- /dev/null +++ b/tune/hgcd2-3.c @@ -0,0 +1,39 @@ +/* mpn/generic/hgcd2.c method 3. + +Copyright 2019 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + +or + + * the GNU General Public License as published by the Free Software + Foundation; either version 2 of the License, or (at your option) any + later version. + +or both in parallel, as here. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received copies of the GNU General Public License and the +GNU Lesser General Public License along with the GNU MP Library. If not, +see https://www.gnu.org/licenses/. */ + +#include "gmp-impl.h" + +#undef HGCD2_METHOD +#define HGCD2_METHOD 3 +#define __gmpn_hgcd2 mpn_hgcd2_3 +/* Not used, but renamed to not get duplicate definitions */ +#define __gmpn_hgcd_mul_matrix1_vector mpn_hgcd_mul_matrix1_vector_3 + +#include "mpn/generic/hgcd2.c" diff --git a/tune/speed.c b/tune/speed.c index b46d94476..4f5a348ae 100644 --- a/tune/speed.c +++ b/tune/speed.c @@ -288,6 +288,7 @@ const struct routine_t { { "mpn_hgcd2", speed_mpn_hgcd2, FLAG_NODATA }, { "mpn_hgcd2_1", speed_mpn_hgcd2_1, FLAG_NODATA }, { "mpn_hgcd2_2", speed_mpn_hgcd2_2, FLAG_NODATA }, + { "mpn_hgcd2_3", speed_mpn_hgcd2_3, FLAG_NODATA }, { "mpn_hgcd", speed_mpn_hgcd }, { "mpn_hgcd_lehmer", speed_mpn_hgcd_lehmer }, { "mpn_hgcd_appr", speed_mpn_hgcd_appr }, diff --git a/tune/speed.h b/tune/speed.h index 968bccac7..5df155841 100644 --- a/tune/speed.h +++ b/tune/speed.h @@ -217,6 +217,7 @@ double speed_mpn_matrix22_mul (struct speed_params *); double speed_mpn_hgcd2 (struct speed_params *); double speed_mpn_hgcd2_1 (struct speed_params *); double speed_mpn_hgcd2_2 (struct speed_params *); +double speed_mpn_hgcd2_3 (struct speed_params *); double speed_mpn_hgcd (struct speed_params *); double speed_mpn_hgcd_lehmer (struct speed_params *); double speed_mpn_hgcd_appr (struct speed_params *); @@ -487,6 +488,8 @@ int mpn_hgcd2_1 (mp_limb_t ah, mp_limb_t al, mp_limb_t bh, mp_limb_t bl, struct hgcd_matrix1 *M); int mpn_hgcd2_2 (mp_limb_t ah, mp_limb_t al, mp_limb_t bh, mp_limb_t bl, struct hgcd_matrix1 *M); +int mpn_hgcd2_3 (mp_limb_t ah, mp_limb_t al, mp_limb_t bh, mp_limb_t bl, + struct hgcd_matrix1 *M); mp_limb_t mpn_mod_1_div (mp_srcptr, mp_size_t, mp_limb_t); mp_limb_t mpn_mod_1_inv (mp_srcptr, mp_size_t, mp_limb_t); diff --git a/tune/tuneup.c b/tune/tuneup.c index 5642bf789..c353940a4 100644 --- a/tune/tuneup.c +++ b/tune/tuneup.c @@ -518,6 +518,15 @@ print_define_remark (const char *name, mp_size_t value, const char *remark) print_define_end_remark (name, value, remark); } +void +print_define_with_margin (const char *name, mp_size_t value, + mp_size_t runner_up, double speedup) +{ + char buf[100]; + snprintf (buf, sizeof(buf), "%.2f%% faster than %ld", + 100.0 * (speedup - 1), runner_up); + print_define_remark (name, value, buf); +} void one (mp_size_t *threshold, struct param_t *param) @@ -1902,26 +1911,47 @@ void tune_hgcd2 (void) { static struct param_t param; - double t1, t2; + double t[3+1]; int method; + int runner_up_method; + double runner_up_ratio; s.size = 1; - t1 = tuneup_measure (speed_mpn_hgcd2_1, ¶m, &s); + t[1] = tuneup_measure (speed_mpn_hgcd2_1, ¶m, &s); if (option_trace >= 1) - printf ("size=%ld, mpn_hgcd2_1 %.9f\n", (long) s.size, t1); + printf ("size=%ld, mpn_hgcd2_1 %.9f\n", (long) s.size, t[1]); - t2 = tuneup_measure (speed_mpn_hgcd2_2, ¶m, &s); + t[2] = tuneup_measure (speed_mpn_hgcd2_2, ¶m, &s); if (option_trace >= 1) - printf ("size=%ld, mpn_hgcd2_2 %.9f\n", (long) s.size, t2); + printf ("size=%ld, mpn_hgcd2_2 %.9f\n", (long) s.size, t[2]); - if (t1 == -1.0 || t2 == -1.0) + t[3] = tuneup_measure (speed_mpn_hgcd2_3, ¶m, &s); + if (option_trace >= 1) + printf ("size=%ld, mpn_hgcd2_3 %.9f\n", (long) s.size, t[3]); + + if (t[1] == -1.0 || t[2] == -1.0 || t[3] == -1.0) { printf ("Oops, can't measure all mpn_hgcd2 methods\n"); abort (); } - method = (t1 < t2) ? 1 : 2; - print_define ("HGCD2_METHOD", method); + if (t[1] < t[2] && t[1] < t[3]) + { + method = 1; + runner_up_method = (t[2] < t[3]) ? 2 : 3; + } + else if (t[2] < t[3]) + { + method = 2; + runner_up_method = (t[1] < t[3]) ? 1 : 3; + } + else + { + method = 3; + runner_up_method = (t[1] < t[2]) ? 1 : 2; + } + print_define_with_margin ("HGCD2_METHOD", method, runner_up_method, + t[runner_up_method] / t[method]); } void |