diff options
author | Kevin Ryde <user42@zip.com.au> | 2000-10-22 02:44:53 +0200 |
---|---|---|
committer | Kevin Ryde <user42@zip.com.au> | 2000-10-22 02:44:53 +0200 |
commit | 7674e58ddc466663b38393de92aa31d64d8f2971 (patch) | |
tree | 3c88b56103e04a9af205e8000c3176dfa110f7d5 | |
parent | 0d49c6e32c361ed502aa313f16f8876a17a6e466 (diff) | |
download | gmp-7674e58ddc466663b38393de92aa31d64d8f2971.tar.gz |
* tune/gcd_bin.c: New file.
* tune/gcd_finda_gen.c: New file.
* tune/Makefile.am (libspeed_la_SOURCES): Add them.
* tune/speed.[ch],common.c (mpn_gcd_binary, find_a): Add measuring.
-rw-r--r-- | tune/Makefile.am | 3 | ||||
-rw-r--r-- | tune/common.c | 5 | ||||
-rw-r--r-- | tune/gcd_bin.c | 26 | ||||
-rw-r--r-- | tune/gcd_finda_gen.c | 89 | ||||
-rw-r--r-- | tune/speed.c | 6 | ||||
-rw-r--r-- | tune/speed.h | 53 |
6 files changed, 178 insertions, 4 deletions
diff --git a/tune/Makefile.am b/tune/Makefile.am index 8270f76d6..eb58cb4e7 100644 --- a/tune/Makefile.am +++ b/tune/Makefile.am @@ -45,7 +45,8 @@ LDADD = $(DEPENDENCIES) EXTRA_LTLIBRARIES = libspeed.la libdummy.la libspeed_la_SOURCES = \ - common.c freq.c modlinv.c mul_n_mpn.c mul_n_open.c noop.c time.c + common.c freq.c gcd_bin.c gcd_finda_gen.c modlinv.c \ + mul_n_mpn.c mul_n_open.c noop.c time.c libspeed_la_DEPENDENCIES = $(SPEED_CYCLECOUNTER_OBJ) libspeed_la_LIBADD = $(libspeed_la_DEPENDENCIES) $(top_builddir)/libgmp.la -lm diff --git a/tune/common.c b/tune/common.c index 07ff66756..fd3cf18c2 100644 --- a/tune/common.c +++ b/tune/common.c @@ -810,6 +810,11 @@ speed_mpn_gcd (struct speed_params *s) SPEED_ROUTINE_MPN_GCD (mpn_gcd); } double +speed_mpn_gcd_binary (struct speed_params *s) +{ + SPEED_ROUTINE_MPN_GCD (mpn_gcd_binary); +} +double speed_mpn_gcdext (struct speed_params *s) { SPEED_ROUTINE_MPN_GCDEXT (mpn_gcdext); diff --git a/tune/gcd_bin.c b/tune/gcd_bin.c new file mode 100644 index 000000000..487df0d0d --- /dev/null +++ b/tune/gcd_bin.c @@ -0,0 +1,26 @@ +/* mpn/generic/gcd.c forced to use the binary algorithm. */ + +/* +Copyright 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. +*/ + +#define GCD_ACCEL_THRESHOLD MP_SIZE_T_MAX +#define __gmpn_gcd mpn_gcd_binary +#include "../mpn/generic/gcd.c" diff --git a/tune/gcd_finda_gen.c b/tune/gcd_finda_gen.c new file mode 100644 index 000000000..8d9aba274 --- /dev/null +++ b/tune/gcd_finda_gen.c @@ -0,0 +1,89 @@ +/* mpn/generic/gcd.c accel GCD find_a() made available for measuring. */ + +/* +Copyright 1999, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. +*/ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +#include "speed.h" + + +/* This code is copied from mpn/generic/gcd.c, there's no global entrypoint + for it, being inlined. */ + + +static +#if ! defined (__i386__) +__gmp_inline /* don't inline this for the x86 */ +#endif +mp_limb_t +#if __STDC__ +find_a (mp_srcptr cp) +#else +find_a (cp) + mp_srcptr cp; +#endif +{ + unsigned long int leading_zero_bits = 0; + + mp_limb_t n1_l = cp[0]; /* N1 == n1_h * 2^BITS_PER_MP_LIMB + n1_l. */ + mp_limb_t n1_h = cp[1]; + + mp_limb_t n2_l = -n1_l; /* N2 == n2_h * 2^BITS_PER_MP_LIMB + n2_l. */ + mp_limb_t n2_h = ~n1_h; + + /* Main loop. */ + while (n2_h) /* While N2 >= 2^BITS_PER_MP_LIMB. */ + { + /* N1 <-- N1 % N2. */ + if ((MP_LIMB_T_HIGHBIT >> leading_zero_bits & n2_h) == 0) + { + unsigned long int i; + count_leading_zeros (i, n2_h); + i -= leading_zero_bits, leading_zero_bits += i; + n2_h = n2_h<<i | n2_l>>(BITS_PER_MP_LIMB - i), n2_l <<= i; + do + { + if (n1_h > n2_h || (n1_h == n2_h && n1_l >= n2_l)) + n1_h -= n2_h + (n1_l < n2_l), n1_l -= n2_l; + n2_l = n2_l>>1 | n2_h<<(BITS_PER_MP_LIMB - 1), n2_h >>= 1; + i -= 1; + } + while (i); + } + if (n1_h > n2_h || (n1_h == n2_h && n1_l >= n2_l)) + n1_h -= n2_h + (n1_l < n2_l), n1_l -= n2_l; + + MP_LIMB_T_SWAP (n1_h, n2_h); + MP_LIMB_T_SWAP (n1_l, n2_l); + } + + return n2_l; +} + + +double +speed_find_a (struct speed_params *s) +{ + SPEED_ROUTINE_MPN_GCD_FINDA (find_a) +} diff --git a/tune/speed.c b/tune/speed.c index 2f3d54e1c..c68fac890 100644 --- a/tune/speed.c +++ b/tune/speed.c @@ -166,10 +166,12 @@ const struct routine_t { { "mpn_popcount", speed_mpn_popcount }, { "mpn_hamdist", speed_mpn_hamdist }, - { "mpn_gcdext", speed_mpn_gcdext }, { "mpn_gcd", speed_mpn_gcd }, - { "mpn_gcd_1", speed_mpn_gcd_1, FLAG_R_OPTIONAL }, + { "mpn_gcd_binary", speed_mpn_gcd_binary }, + { "find_a", speed_find_a, FLAG_NODATA }, + { "mpn_gcd_1", speed_mpn_gcd_1, FLAG_R_OPTIONAL }, + { "mpn_gcdext", speed_mpn_gcdext }, { "mpn_jacobi_base", speed_mpn_jacobi_base }, { "mpn_mul_basecase", speed_mpn_mul_basecase, FLAG_R_OPTIONAL }, diff --git a/tune/speed.h b/tune/speed.h index a513debf6..d31f55779 100644 --- a/tune/speed.h +++ b/tune/speed.h @@ -126,6 +126,7 @@ double speed_modlimb_invert_mul1 _PROTO ((struct speed_params *s)); double speed_modlimb_invert_loop _PROTO ((struct speed_params *s)); double speed_modlimb_invert_cond _PROTO ((struct speed_params *s)); double speed_modlimb_invert_arith _PROTO ((struct speed_params *s)); +double speed_find_a _PROTO ((struct speed_params *s)); double speed_gmp_allocate_free _PROTO ((struct speed_params *s)); double speed_gmp_allocate_reallocate_free _PROTO ((struct speed_params *s)); @@ -152,6 +153,7 @@ double speed_mpn_divrem_1cf _PROTO ((struct speed_params *s)); double speed_mpn_divrem_2 _PROTO ((struct speed_params *s)); double speed_mpn_gcd _PROTO ((struct speed_params *s)); double speed_mpn_gcd_1 _PROTO ((struct speed_params *s)); +double speed_mpn_gcd_binary _PROTO ((struct speed_params *s)); double speed_mpn_gcdext _PROTO ((struct speed_params *s)); double speed_mpn_get_str _PROTO ((struct speed_params *s)); double speed_mpn_hamdist _PROTO ((struct speed_params *s)); @@ -243,6 +245,9 @@ extern int speed_option_addrs; extern int speed_option_verbose; void speed_option_set _PROTO((const char *s)); +mp_size_t mpn_gcd _PROTO ((mp_ptr gp, + mp_ptr up, mp_size_t usize, + mp_ptr vp, mp_size_t vsize)); void mpn_toom3_mul_n_open _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr)); void mpn_toom3_sqr_n_open _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr)); @@ -919,7 +924,8 @@ void mpn_toom3_sqr_n_mpn _PROTO((mp_ptr, mp_srcptr, mp_size_t, mp_ptr)); MPN_COPY (px, pieces==1 ? s->xp : s->xp_block, psize); \ MPN_COPY (py, pieces==1 ? s->yp : s->yp_block, psize); \ \ - /* y must be odd, x must have at least as many bits as y */ \ + /* y must be odd, x must have at least as many bits as y, \ + high limbs must be non-zero */ \ for (j = 0; j < pieces; j++) \ { \ mp_ptr x = px+j*s->size; \ @@ -1169,4 +1175,49 @@ void mpn_toom3_sqr_n_mpn _PROTO((mp_ptr, mp_srcptr, mp_size_t, mp_ptr)); return t; \ } + +/* Run an accel gcd find_a() function over various data values. A set of + values is used in case some run particularly fast or slow. The size + parameter is ignored, the amount of data tested is fixed. */ + +#define SPEED_ROUTINE_MPN_GCD_FINDA(function) \ + { \ + unsigned i, j; \ + mp_limb_t cp[SPEED_BLOCK_SIZE][2]; \ + double t; \ + TMP_DECL (marker); \ + \ + TMP_MARK (marker); \ + \ + /* low must be odd, high must be non-zero */ \ + for (i = 0; i < SPEED_BLOCK_SIZE; i++) \ + { \ + cp[i][0] = s->xp_block[i] | 1; \ + cp[i][1] = s->yp_block[i] + (s->yp_block[i] == 0); \ + } \ + \ + speed_operand_src (s, &cp[0][0], 2*SPEED_BLOCK_SIZE); \ + speed_cache_fill (s); \ + \ + speed_starttime (); \ + i = s->reps; \ + do \ + { \ + j = SPEED_BLOCK_SIZE; \ + do \ + { \ + function (cp[j-1]); \ + } \ + while (--j != 0); \ + } \ + while (--i != 0); \ + t = speed_endtime (); \ + \ + TMP_FREE (marker); \ + \ + s->time_divisor = SPEED_BLOCK_SIZE; \ + return t; \ + } + + #endif |