diff options
-rw-r--r-- | ChangeLog | 9 | ||||
-rw-r--r-- | mpn/generic/hgcd2.c | 44 | ||||
-rw-r--r-- | tune/Makefile.am | 1 | ||||
-rw-r--r-- | tune/common.c | 10 | ||||
-rw-r--r-- | tune/hgcd2-1.c | 39 | ||||
-rw-r--r-- | tune/hgcd2-2.c | 39 | ||||
-rw-r--r-- | tune/speed.c | 2 | ||||
-rw-r--r-- | tune/speed.h | 7 | ||||
-rw-r--r-- | tune/tuneup.c | 27 |
9 files changed, 162 insertions, 16 deletions
@@ -1,5 +1,14 @@ 2019-09-04 Niels Möller <nisse@lysator.liu.se> + * mpn/generic/hgcd2.c (HGCD2_METHOD): New parameter. + (DIV1): New macro, using either the div1 function or plain + division, depending on the value of HGCD2_METHOD. + (mpn_hgcd2): Use DIV1. + * tune/speed.c, tune/speed.h, tune/common.c, tune/Makefile.am: Add + measuring of mpn_hgcd2 methods. + * tune/hgcd2-1.c, tune/hgcd2-2.c: New files. + * tune/tuneup.c: Tune HGCD2_METHOD. + * tune/speed.h (SPEED_ROUTINE_MPN_HGCD2): New macro. * tune/common.c (speed_mpn_hgcd2): New function. * tune/speed.c (routine): Add mpn_hgcd2. diff --git a/mpn/generic/hgcd2.c b/mpn/generic/hgcd2.c index f15b6099c..45b437cda 100644 --- a/mpn/generic/hgcd2.c +++ b/mpn/generic/hgcd2.c @@ -35,7 +35,22 @@ see https://www.gnu.org/licenses/. */ #include "gmp-impl.h" #include "longlong.h" -#if GMP_NAIL_BITS == 0 +#ifndef HGCD2_METHOD +#define HGCD2_METHOD 2 +#endif + +#if GMP_NAIL_BITS != 0 +#error Nails not implemented +#endif + +#if HGCD2_METHOD == 1 + +#define DIV1(q, r, a, b) do { \ + (q) = (a)/(b); \ + (r) = (a) - (q)*(b); \ + } while (0) + +#elif HGCD2_METHOD == 2 /* Copied from the old mpn/generic/gcdext.c, and modified slightly to return the remainder. */ @@ -93,6 +108,14 @@ div1 (mp_ptr rp, *rp = n0; return q; } +#define DIV1(q, r, a, b) do { \ + mp_limb_t __div1_r; \ + (q) = div1 (&__div1_r, a, b); \ + (r) = __div1_r; \ + } while (0) +#else +#error Unknown HGCD2_METHOD +#endif /* Two-limb division optimized for small quotients. */ static inline mp_limb_t @@ -197,15 +220,6 @@ div2 (mp_ptr rp, } #endif -#else /* GMP_NAIL_BITS != 0 */ -/* Check all functions for nail support. */ -/* hgcd2 should be defined to take inputs including nail bits, and - produce a matrix with elements also including nail bits. This is - necessary, for the matrix elements to be useful with mpn_mul_1, - mpn_addmul_1 and friends. */ -#error Not implemented -#endif /* GMP_NAIL_BITS != 0 */ - /* Reduces a,b until |a-b| (almost) fits in one limb + 1 bit. Constructs matrix M. Returns 1 if we make progress, i.e. can perform at least one subtraction. Otherwise returns zero. */ @@ -360,9 +374,8 @@ mpn_hgcd2 (mp_limb_t ah, mp_limb_t al, mp_limb_t bh, mp_limb_t bl, } else { - mp_limb_t r; - mp_limb_t q = div1 (&r, ah, bh); - ah = r; + mp_limb_t q; + DIV1 (q, ah, ah, bh); if (ah < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2 + 1))) { /* A is too small, but q is correct. */ @@ -389,9 +402,8 @@ mpn_hgcd2 (mp_limb_t ah, mp_limb_t al, mp_limb_t bh, mp_limb_t bl, } else { - mp_limb_t r; - mp_limb_t q = div1 (&r, bh, ah); - bh = r; + mp_limb_t q; + DIV1 (q, bh, bh, ah); if (bh < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2 + 1))) { /* B is too small, but q is correct. */ diff --git a/tune/Makefile.am b/tune/Makefile.am index 5fb3b50d3..4fa28ed12 100644 --- a/tune/Makefile.am +++ b/tune/Makefile.am @@ -58,6 +58,7 @@ libspeed_la_SOURCES = \ gcdext_single.c gcdext_double.c gcdextod.c gcdextos.c \ hgcd_lehmer.c hgcd_appr_lehmer.c hgcd_reduce_1.c hgcd_reduce_2.c \ jacbase1.c jacbase2.c jacbase3.c jacbase4.c \ + hgcd2-1.c hgcd2-2.c \ mod_1_div.c mod_1_inv.c mod_1_1-1.c mod_1_1-2.c modlinv.c \ noop.c powm_mod.c powm_redc.c pre_divrem_1.c \ set_strb.c set_strs.c set_strp.c time.c diff --git a/tune/common.c b/tune/common.c index a918a67a4..da4fb1145 100644 --- a/tune/common.c +++ b/tune/common.c @@ -1638,6 +1638,16 @@ speed_mpn_hgcd2 (struct speed_params *s) { SPEED_ROUTINE_MPN_HGCD2 (mpn_hgcd2); } +double +speed_mpn_hgcd2_1 (struct speed_params *s) +{ + SPEED_ROUTINE_MPN_HGCD2 (mpn_hgcd2_1); +} +double +speed_mpn_hgcd2_2 (struct speed_params *s) +{ + SPEED_ROUTINE_MPN_HGCD2 (mpn_hgcd2_2); +} double speed_mpn_hgcd (struct speed_params *s) diff --git a/tune/hgcd2-1.c b/tune/hgcd2-1.c new file mode 100644 index 000000000..ecc033b50 --- /dev/null +++ b/tune/hgcd2-1.c @@ -0,0 +1,39 @@ +/* mpn/generic/hgcd2.c method 1. + +Copyright 2019 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + +or + + * the GNU General Public License as published by the Free Software + Foundation; either version 2 of the License, or (at your option) any + later version. + +or both in parallel, as here. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received copies of the GNU General Public License and the +GNU Lesser General Public License along with the GNU MP Library. If not, +see https://www.gnu.org/licenses/. */ + +#include "gmp-impl.h" + +#undef HGCD2_METHOD +#define HGCD2_METHOD 1 +#define __gmpn_hgcd2 mpn_hgcd2_1 +/* Not used, but renamed to not get duplicate definitions */ +#define __gmpn_hgcd_mul_matrix1_vector mpn_hgcd_mul_matrix1_vector_1 + +#include "mpn/generic/hgcd2.c" diff --git a/tune/hgcd2-2.c b/tune/hgcd2-2.c new file mode 100644 index 000000000..aa045dd7b --- /dev/null +++ b/tune/hgcd2-2.c @@ -0,0 +1,39 @@ +/* mpn/generic/hgcd2.c method 2. + +Copyright 2019 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + +or + + * the GNU General Public License as published by the Free Software + Foundation; either version 2 of the License, or (at your option) any + later version. + +or both in parallel, as here. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received copies of the GNU General Public License and the +GNU Lesser General Public License along with the GNU MP Library. If not, +see https://www.gnu.org/licenses/. */ + +#include "gmp-impl.h" + +#undef HGCD2_METHOD +#define HGCD2_METHOD 2 +#define __gmpn_hgcd2 mpn_hgcd2_2 +/* Not used, but renamed to not get duplicate definitions */ +#define __gmpn_hgcd_mul_matrix1_vector mpn_hgcd_mul_matrix1_vector_2 + +#include "mpn/generic/hgcd2.c" diff --git a/tune/speed.c b/tune/speed.c index 4220d6be5..b46d94476 100644 --- a/tune/speed.c +++ b/tune/speed.c @@ -286,6 +286,8 @@ const struct routine_t { { "mpn_matrix22_mul", speed_mpn_matrix22_mul }, { "mpn_hgcd2", speed_mpn_hgcd2, FLAG_NODATA }, + { "mpn_hgcd2_1", speed_mpn_hgcd2_1, FLAG_NODATA }, + { "mpn_hgcd2_2", speed_mpn_hgcd2_2, FLAG_NODATA }, { "mpn_hgcd", speed_mpn_hgcd }, { "mpn_hgcd_lehmer", speed_mpn_hgcd_lehmer }, { "mpn_hgcd_appr", speed_mpn_hgcd_appr }, diff --git a/tune/speed.h b/tune/speed.h index 044bc9c2b..968bccac7 100644 --- a/tune/speed.h +++ b/tune/speed.h @@ -215,6 +215,8 @@ double speed_mpn_div_qr_2u (struct speed_params *); double speed_mpn_fib2_ui (struct speed_params *); double speed_mpn_matrix22_mul (struct speed_params *); double speed_mpn_hgcd2 (struct speed_params *); +double speed_mpn_hgcd2_1 (struct speed_params *); +double speed_mpn_hgcd2_2 (struct speed_params *); double speed_mpn_hgcd (struct speed_params *); double speed_mpn_hgcd_lehmer (struct speed_params *); double speed_mpn_hgcd_appr (struct speed_params *); @@ -481,6 +483,11 @@ int mpn_jacobi_base_2 (mp_limb_t, mp_limb_t, int); int mpn_jacobi_base_3 (mp_limb_t, mp_limb_t, int); int mpn_jacobi_base_4 (mp_limb_t, mp_limb_t, int); +int mpn_hgcd2_1 (mp_limb_t ah, mp_limb_t al, mp_limb_t bh, mp_limb_t bl, + struct hgcd_matrix1 *M); +int mpn_hgcd2_2 (mp_limb_t ah, mp_limb_t al, mp_limb_t bh, mp_limb_t bl, + struct hgcd_matrix1 *M); + mp_limb_t mpn_mod_1_div (mp_srcptr, mp_size_t, mp_limb_t); mp_limb_t mpn_mod_1_inv (mp_srcptr, mp_size_t, mp_limb_t); diff --git a/tune/tuneup.c b/tune/tuneup.c index 600a4a03b..5642bf789 100644 --- a/tune/tuneup.c +++ b/tune/tuneup.c @@ -1899,6 +1899,32 @@ tune_matrix22_mul (void) } void +tune_hgcd2 (void) +{ + static struct param_t param; + double t1, t2; + int method; + + s.size = 1; + t1 = tuneup_measure (speed_mpn_hgcd2_1, ¶m, &s); + if (option_trace >= 1) + printf ("size=%ld, mpn_hgcd2_1 %.9f\n", (long) s.size, t1); + + t2 = tuneup_measure (speed_mpn_hgcd2_2, ¶m, &s); + if (option_trace >= 1) + printf ("size=%ld, mpn_hgcd2_2 %.9f\n", (long) s.size, t2); + + if (t1 == -1.0 || t2 == -1.0) + { + printf ("Oops, can't measure all mpn_hgcd2 methods\n"); + abort (); + } + + method = (t1 < t2) ? 1 : 2; + print_define ("HGCD2_METHOD", method); +} + +void tune_hgcd (void) { static struct param_t param; @@ -2973,6 +2999,7 @@ all (void) printf("\n"); tune_matrix22_mul (); + tune_hgcd2 (); tune_hgcd (); tune_hgcd_appr (); tune_hgcd_reduce(); |