summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKevin Ryde <user42@zip.com.au>2000-10-22 02:44:53 +0200
committerKevin Ryde <user42@zip.com.au>2000-10-22 02:44:53 +0200
commit7674e58ddc466663b38393de92aa31d64d8f2971 (patch)
tree3c88b56103e04a9af205e8000c3176dfa110f7d5
parent0d49c6e32c361ed502aa313f16f8876a17a6e466 (diff)
downloadgmp-7674e58ddc466663b38393de92aa31d64d8f2971.tar.gz
* tune/gcd_bin.c: New file.
* tune/gcd_finda_gen.c: New file. * tune/Makefile.am (libspeed_la_SOURCES): Add them. * tune/speed.[ch],common.c (mpn_gcd_binary, find_a): Add measuring.
-rw-r--r--tune/Makefile.am3
-rw-r--r--tune/common.c5
-rw-r--r--tune/gcd_bin.c26
-rw-r--r--tune/gcd_finda_gen.c89
-rw-r--r--tune/speed.c6
-rw-r--r--tune/speed.h53
6 files changed, 178 insertions, 4 deletions
diff --git a/tune/Makefile.am b/tune/Makefile.am
index 8270f76d6..eb58cb4e7 100644
--- a/tune/Makefile.am
+++ b/tune/Makefile.am
@@ -45,7 +45,8 @@ LDADD = $(DEPENDENCIES)
EXTRA_LTLIBRARIES = libspeed.la libdummy.la
libspeed_la_SOURCES = \
- common.c freq.c modlinv.c mul_n_mpn.c mul_n_open.c noop.c time.c
+ common.c freq.c gcd_bin.c gcd_finda_gen.c modlinv.c \
+ mul_n_mpn.c mul_n_open.c noop.c time.c
libspeed_la_DEPENDENCIES = $(SPEED_CYCLECOUNTER_OBJ)
libspeed_la_LIBADD = $(libspeed_la_DEPENDENCIES) $(top_builddir)/libgmp.la -lm
diff --git a/tune/common.c b/tune/common.c
index 07ff66756..fd3cf18c2 100644
--- a/tune/common.c
+++ b/tune/common.c
@@ -810,6 +810,11 @@ speed_mpn_gcd (struct speed_params *s)
SPEED_ROUTINE_MPN_GCD (mpn_gcd);
}
double
+speed_mpn_gcd_binary (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_GCD (mpn_gcd_binary);
+}
+double
speed_mpn_gcdext (struct speed_params *s)
{
SPEED_ROUTINE_MPN_GCDEXT (mpn_gcdext);
diff --git a/tune/gcd_bin.c b/tune/gcd_bin.c
new file mode 100644
index 000000000..487df0d0d
--- /dev/null
+++ b/tune/gcd_bin.c
@@ -0,0 +1,26 @@
+/* mpn/generic/gcd.c forced to use the binary algorithm. */
+
+/*
+Copyright 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA.
+*/
+
+#define GCD_ACCEL_THRESHOLD MP_SIZE_T_MAX
+#define __gmpn_gcd mpn_gcd_binary
+#include "../mpn/generic/gcd.c"
diff --git a/tune/gcd_finda_gen.c b/tune/gcd_finda_gen.c
new file mode 100644
index 000000000..8d9aba274
--- /dev/null
+++ b/tune/gcd_finda_gen.c
@@ -0,0 +1,89 @@
+/* mpn/generic/gcd.c accel GCD find_a() made available for measuring. */
+
+/*
+Copyright 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA.
+*/
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#include "speed.h"
+
+
+/* This code is copied from mpn/generic/gcd.c, there's no global entrypoint
+ for it, being inlined. */
+
+
+static
+#if ! defined (__i386__)
+__gmp_inline /* don't inline this for the x86 */
+#endif
+mp_limb_t
+#if __STDC__
+find_a (mp_srcptr cp)
+#else
+find_a (cp)
+ mp_srcptr cp;
+#endif
+{
+ unsigned long int leading_zero_bits = 0;
+
+ mp_limb_t n1_l = cp[0]; /* N1 == n1_h * 2^BITS_PER_MP_LIMB + n1_l. */
+ mp_limb_t n1_h = cp[1];
+
+ mp_limb_t n2_l = -n1_l; /* N2 == n2_h * 2^BITS_PER_MP_LIMB + n2_l. */
+ mp_limb_t n2_h = ~n1_h;
+
+ /* Main loop. */
+ while (n2_h) /* While N2 >= 2^BITS_PER_MP_LIMB. */
+ {
+ /* N1 <-- N1 % N2. */
+ if ((MP_LIMB_T_HIGHBIT >> leading_zero_bits & n2_h) == 0)
+ {
+ unsigned long int i;
+ count_leading_zeros (i, n2_h);
+ i -= leading_zero_bits, leading_zero_bits += i;
+ n2_h = n2_h<<i | n2_l>>(BITS_PER_MP_LIMB - i), n2_l <<= i;
+ do
+ {
+ if (n1_h > n2_h || (n1_h == n2_h && n1_l >= n2_l))
+ n1_h -= n2_h + (n1_l < n2_l), n1_l -= n2_l;
+ n2_l = n2_l>>1 | n2_h<<(BITS_PER_MP_LIMB - 1), n2_h >>= 1;
+ i -= 1;
+ }
+ while (i);
+ }
+ if (n1_h > n2_h || (n1_h == n2_h && n1_l >= n2_l))
+ n1_h -= n2_h + (n1_l < n2_l), n1_l -= n2_l;
+
+ MP_LIMB_T_SWAP (n1_h, n2_h);
+ MP_LIMB_T_SWAP (n1_l, n2_l);
+ }
+
+ return n2_l;
+}
+
+
+double
+speed_find_a (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_GCD_FINDA (find_a)
+}
diff --git a/tune/speed.c b/tune/speed.c
index 2f3d54e1c..c68fac890 100644
--- a/tune/speed.c
+++ b/tune/speed.c
@@ -166,10 +166,12 @@ const struct routine_t {
{ "mpn_popcount", speed_mpn_popcount },
{ "mpn_hamdist", speed_mpn_hamdist },
- { "mpn_gcdext", speed_mpn_gcdext },
{ "mpn_gcd", speed_mpn_gcd },
- { "mpn_gcd_1", speed_mpn_gcd_1, FLAG_R_OPTIONAL },
+ { "mpn_gcd_binary", speed_mpn_gcd_binary },
+ { "find_a", speed_find_a, FLAG_NODATA },
+ { "mpn_gcd_1", speed_mpn_gcd_1, FLAG_R_OPTIONAL },
+ { "mpn_gcdext", speed_mpn_gcdext },
{ "mpn_jacobi_base", speed_mpn_jacobi_base },
{ "mpn_mul_basecase", speed_mpn_mul_basecase, FLAG_R_OPTIONAL },
diff --git a/tune/speed.h b/tune/speed.h
index a513debf6..d31f55779 100644
--- a/tune/speed.h
+++ b/tune/speed.h
@@ -126,6 +126,7 @@ double speed_modlimb_invert_mul1 _PROTO ((struct speed_params *s));
double speed_modlimb_invert_loop _PROTO ((struct speed_params *s));
double speed_modlimb_invert_cond _PROTO ((struct speed_params *s));
double speed_modlimb_invert_arith _PROTO ((struct speed_params *s));
+double speed_find_a _PROTO ((struct speed_params *s));
double speed_gmp_allocate_free _PROTO ((struct speed_params *s));
double speed_gmp_allocate_reallocate_free _PROTO ((struct speed_params *s));
@@ -152,6 +153,7 @@ double speed_mpn_divrem_1cf _PROTO ((struct speed_params *s));
double speed_mpn_divrem_2 _PROTO ((struct speed_params *s));
double speed_mpn_gcd _PROTO ((struct speed_params *s));
double speed_mpn_gcd_1 _PROTO ((struct speed_params *s));
+double speed_mpn_gcd_binary _PROTO ((struct speed_params *s));
double speed_mpn_gcdext _PROTO ((struct speed_params *s));
double speed_mpn_get_str _PROTO ((struct speed_params *s));
double speed_mpn_hamdist _PROTO ((struct speed_params *s));
@@ -243,6 +245,9 @@ extern int speed_option_addrs;
extern int speed_option_verbose;
void speed_option_set _PROTO((const char *s));
+mp_size_t mpn_gcd _PROTO ((mp_ptr gp,
+ mp_ptr up, mp_size_t usize,
+ mp_ptr vp, mp_size_t vsize));
void mpn_toom3_mul_n_open _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t,
mp_ptr));
void mpn_toom3_sqr_n_open _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
@@ -919,7 +924,8 @@ void mpn_toom3_sqr_n_mpn _PROTO((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
MPN_COPY (px, pieces==1 ? s->xp : s->xp_block, psize); \
MPN_COPY (py, pieces==1 ? s->yp : s->yp_block, psize); \
\
- /* y must be odd, x must have at least as many bits as y */ \
+ /* y must be odd, x must have at least as many bits as y, \
+ high limbs must be non-zero */ \
for (j = 0; j < pieces; j++) \
{ \
mp_ptr x = px+j*s->size; \
@@ -1169,4 +1175,49 @@ void mpn_toom3_sqr_n_mpn _PROTO((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
return t; \
}
+
+/* Run an accel gcd find_a() function over various data values. A set of
+ values is used in case some run particularly fast or slow. The size
+ parameter is ignored, the amount of data tested is fixed. */
+
+#define SPEED_ROUTINE_MPN_GCD_FINDA(function) \
+ { \
+ unsigned i, j; \
+ mp_limb_t cp[SPEED_BLOCK_SIZE][2]; \
+ double t; \
+ TMP_DECL (marker); \
+ \
+ TMP_MARK (marker); \
+ \
+ /* low must be odd, high must be non-zero */ \
+ for (i = 0; i < SPEED_BLOCK_SIZE; i++) \
+ { \
+ cp[i][0] = s->xp_block[i] | 1; \
+ cp[i][1] = s->yp_block[i] + (s->yp_block[i] == 0); \
+ } \
+ \
+ speed_operand_src (s, &cp[0][0], 2*SPEED_BLOCK_SIZE); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ { \
+ j = SPEED_BLOCK_SIZE; \
+ do \
+ { \
+ function (cp[j-1]); \
+ } \
+ while (--j != 0); \
+ } \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ TMP_FREE (marker); \
+ \
+ s->time_divisor = SPEED_BLOCK_SIZE; \
+ return t; \
+ }
+
+
#endif