* tune/gcd_bin.c: New file.

* tune/gcd_finda_gen.c: New file. * tune/Makefile.am (libspeed_la_SOURCES): Add them. * tune/speed.[ch],common.c (mpn_gcd_binary, find_a): Add measuring.
author: Kevin Ryde <user42@zip.com.au> 2000-10-22 02:44:53 +0200
committer: Kevin Ryde <user42@zip.com.au> 2000-10-22 02:44:53 +0200
commit: 7674e58ddc466663b38393de92aa31d64d8f2971 (patch)
tree: 3c88b56103e04a9af205e8000c3176dfa110f7d5
parent: 0d49c6e32c361ed502aa313f16f8876a17a6e466 (diff)
download: gmp-7674e58ddc466663b38393de92aa31d64d8f2971.tar.gz
6 files changed, 178 insertions, 4 deletions
diff --git a/tune/Makefile.am b/tune/Makefile.am
index 8270f76d6..eb58cb4e7 100644
--- a/tune/Makefile.am
+++ b/tune/Makefile.am
@@ -45,7 +45,8 @@ LDADD = $(DEPENDENCIES)
 EXTRA_LTLIBRARIES = libspeed.la libdummy.la
 
 libspeed_la_SOURCES = \
-  common.c freq.c modlinv.c mul_n_mpn.c mul_n_open.c noop.c time.c
+  common.c freq.c gcd_bin.c gcd_finda_gen.c modlinv.c \
+  mul_n_mpn.c mul_n_open.c noop.c time.c
 libspeed_la_DEPENDENCIES = $(SPEED_CYCLECOUNTER_OBJ)
 libspeed_la_LIBADD = $(libspeed_la_DEPENDENCIES) $(top_builddir)/libgmp.la -lm
 
diff --git a/tune/common.c b/tune/common.c
index 07ff66756..fd3cf18c2 100644
--- a/tune/common.c
+++ b/tune/common.c
@@ -810,6 +810,11 @@ speed_mpn_gcd (struct speed_params *s)
   SPEED_ROUTINE_MPN_GCD (mpn_gcd);
 }
 double
+speed_mpn_gcd_binary (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_GCD (mpn_gcd_binary);
+}
+double
 speed_mpn_gcdext (struct speed_params *s)
 {
   SPEED_ROUTINE_MPN_GCDEXT (mpn_gcdext);
diff --git a/tune/gcd_bin.c b/tune/gcd_bin.c
new file mode 100644
index 000000000..487df0d0d
--- /dev/null
+++ b/tune/gcd_bin.c
@@ -0,0 +1,26 @@
+/* mpn/generic/gcd.c forced to use the binary algorithm. */
+
+/*
+Copyright 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA.
+*/
+
+#define GCD_ACCEL_THRESHOLD  MP_SIZE_T_MAX
+#define __gmpn_gcd  mpn_gcd_binary
+#include "../mpn/generic/gcd.c"
diff --git a/tune/gcd_finda_gen.c b/tune/gcd_finda_gen.c
new file mode 100644
index 000000000..8d9aba274
--- /dev/null
+++ b/tune/gcd_finda_gen.c
@@ -0,0 +1,89 @@
+/* mpn/generic/gcd.c accel GCD find_a() made available for measuring.  */
+
+/*
+Copyright 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA.
+*/
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#include "speed.h"
+
+
+/* This code is copied from mpn/generic/gcd.c, there's no global entrypoint
+   for it, being inlined. */
+
+
+static
+#if ! defined (__i386__)
+__gmp_inline			/* don't inline this for the x86 */
+#endif
+mp_limb_t
+#if __STDC__
+find_a (mp_srcptr cp)
+#else
+find_a (cp)
+     mp_srcptr cp;
+#endif
+{
+  unsigned long int leading_zero_bits = 0;
+
+  mp_limb_t n1_l = cp[0];	/* N1 == n1_h * 2^BITS_PER_MP_LIMB + n1_l.  */
+  mp_limb_t n1_h = cp[1];
+
+  mp_limb_t n2_l = -n1_l;	/* N2 == n2_h * 2^BITS_PER_MP_LIMB + n2_l.  */
+  mp_limb_t n2_h = ~n1_h;
+
+  /* Main loop.  */
+  while (n2_h)			/* While N2 >= 2^BITS_PER_MP_LIMB.  */
+    {
+      /* N1 <-- N1 % N2.  */
+      if ((MP_LIMB_T_HIGHBIT >> leading_zero_bits & n2_h) == 0)
+	{
+	  unsigned long int i;
+	  count_leading_zeros (i, n2_h);
+	  i -= leading_zero_bits, leading_zero_bits += i;
+	  n2_h = n2_h<<i | n2_l>>(BITS_PER_MP_LIMB - i), n2_l <<= i;
+	  do
+	    {
+	      if (n1_h > n2_h || (n1_h == n2_h && n1_l >= n2_l))
+		n1_h -= n2_h + (n1_l < n2_l), n1_l -= n2_l;
+	      n2_l = n2_l>>1 | n2_h<<(BITS_PER_MP_LIMB - 1), n2_h >>= 1;
+	      i -= 1;
+	    }
+	  while (i);
+	}
+      if (n1_h > n2_h || (n1_h == n2_h && n1_l >= n2_l))
+	n1_h -= n2_h + (n1_l < n2_l), n1_l -= n2_l;
+
+      MP_LIMB_T_SWAP (n1_h, n2_h);
+      MP_LIMB_T_SWAP (n1_l, n2_l);
+    }
+
+  return n2_l;
+}
+
+
+double
+speed_find_a (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_GCD_FINDA (find_a)
+}
diff --git a/tune/speed.c b/tune/speed.c
index 2f3d54e1c..c68fac890 100644
--- a/tune/speed.c
+++ b/tune/speed.c
@@ -166,10 +166,12 @@ const struct routine_t {
   { "mpn_popcount",      speed_mpn_popcount         },
   { "mpn_hamdist",       speed_mpn_hamdist          },
 
-  { "mpn_gcdext",        speed_mpn_gcdext           },
   { "mpn_gcd",           speed_mpn_gcd              },
-  { "mpn_gcd_1",         speed_mpn_gcd_1, FLAG_R_OPTIONAL },
+  { "mpn_gcd_binary",    speed_mpn_gcd_binary       },
+  { "find_a",            speed_find_a, FLAG_NODATA  },
 
+  { "mpn_gcd_1",         speed_mpn_gcd_1, FLAG_R_OPTIONAL },
+  { "mpn_gcdext",        speed_mpn_gcdext           },
   { "mpn_jacobi_base",   speed_mpn_jacobi_base      },
 
   { "mpn_mul_basecase",  speed_mpn_mul_basecase, FLAG_R_OPTIONAL },
diff --git a/tune/speed.h b/tune/speed.h
index a513debf6..d31f55779 100644
--- a/tune/speed.h
+++ b/tune/speed.h
@@ -126,6 +126,7 @@ double speed_modlimb_invert_mul1 _PROTO ((struct speed_params *s));
 double speed_modlimb_invert_loop _PROTO ((struct speed_params *s));
 double speed_modlimb_invert_cond _PROTO ((struct speed_params *s));
 double speed_modlimb_invert_arith _PROTO ((struct speed_params *s));
+double speed_find_a _PROTO ((struct speed_params *s));
 double speed_gmp_allocate_free _PROTO ((struct speed_params *s));
 double speed_gmp_allocate_reallocate_free _PROTO ((struct speed_params *s));
 
@@ -152,6 +153,7 @@ double speed_mpn_divrem_1cf _PROTO ((struct speed_params *s));
 double speed_mpn_divrem_2 _PROTO ((struct speed_params *s));
 double speed_mpn_gcd _PROTO ((struct speed_params *s));
 double speed_mpn_gcd_1 _PROTO ((struct speed_params *s));
+double speed_mpn_gcd_binary _PROTO ((struct speed_params *s));
 double speed_mpn_gcdext _PROTO ((struct speed_params *s));
 double speed_mpn_get_str _PROTO ((struct speed_params *s));
 double speed_mpn_hamdist _PROTO ((struct speed_params *s));
@@ -243,6 +245,9 @@ extern int  speed_option_addrs;
 extern int  speed_option_verbose;
 void speed_option_set _PROTO((const char *s));
 
+mp_size_t mpn_gcd _PROTO ((mp_ptr gp,
+                           mp_ptr up, mp_size_t usize,
+                           mp_ptr vp, mp_size_t vsize));
 void mpn_toom3_mul_n_open _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t,
                                    mp_ptr));
 void mpn_toom3_sqr_n_open _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
@@ -919,7 +924,8 @@ void mpn_toom3_sqr_n_mpn _PROTO((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
     MPN_COPY (px, pieces==1 ? s->xp : s->xp_block, psize);      \
     MPN_COPY (py, pieces==1 ? s->yp : s->yp_block, psize);      \
                                                                 \
-    /* y must be odd, x must have at least as many bits as y */ \
+    /* y must be odd, x must have at least as many bits as y,   \
+       high limbs must be non-zero */                           \
     for (j = 0; j < pieces; j++)                                \
       {                                                         \
         mp_ptr  x = px+j*s->size;                               \
@@ -1169,4 +1175,49 @@ void mpn_toom3_sqr_n_mpn _PROTO((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
     return t;                                                                \
   }  
 
+
+/* Run an accel gcd find_a() function over various data values.  A set of
+   values is used in case some run particularly fast or slow.  The size
+   parameter is ignored, the amount of data tested is fixed.  */
+
+#define SPEED_ROUTINE_MPN_GCD_FINDA(function)                   \
+  {                                                             \
+    unsigned  i, j;                                             \
+    mp_limb_t cp[SPEED_BLOCK_SIZE][2];                          \
+    double    t;                                                \
+    TMP_DECL (marker);                                          \
+                                                                \
+    TMP_MARK (marker);                                          \
+                                                                \
+    /* low must be odd, high must be non-zero */                \
+    for (i = 0; i < SPEED_BLOCK_SIZE; i++)                      \
+      {                                                         \
+        cp[i][0] = s->xp_block[i] | 1;                          \
+        cp[i][1] = s->yp_block[i] + (s->yp_block[i] == 0);      \
+      }                                                         \
+                                                                \
+    speed_operand_src (s, &cp[0][0], 2*SPEED_BLOCK_SIZE);       \
+    speed_cache_fill (s);                                       \
+                                                                \
+    speed_starttime ();                                         \
+    i = s->reps;                                                \
+    do                                                          \
+      {                                                         \
+        j = SPEED_BLOCK_SIZE;                                   \
+        do                                                      \
+          {                                                     \
+            function (cp[j-1]);                                 \
+          }                                                     \
+        while (--j != 0);                                       \
+      }                                                         \
+    while (--i != 0);                                           \
+    t = speed_endtime ();                                       \
+                                                                \
+    TMP_FREE (marker);                                          \
+                                                                \
+    s->time_divisor = SPEED_BLOCK_SIZE;                         \
+    return t;                                                   \
+  }  
+
+
 #endif
author	Kevin Ryde <user42@zip.com.au>	2000-10-22 02:44:53 +0200
committer	Kevin Ryde <user42@zip.com.au>	2000-10-22 02:44:53 +0200
commit	7674e58ddc466663b38393de92aa31d64d8f2971 (patch)
tree	3c88b56103e04a9af205e8000c3176dfa110f7d5
parent	0d49c6e32c361ed502aa313f16f8876a17a6e466 (diff)
download	gmp-7674e58ddc466663b38393de92aa31d64d8f2971.tar.gz