Tuning of mpn_hgcd_appr and mpn_hgcd_reduce.

author: Niels Möller <nisse@lysator.liu.se> 2011-11-11 14:59:14 +0100
committer: Niels Möller <nisse@lysator.liu.se> 2011-11-11 14:59:14 +0100
commit: 5b0e8651a493b9128594851eff5387bde8081526 (patch)
tree: a362b2b040a395b18a1841f3e3e2e877ddc53aaa /tune
parent: eb453fbaa0a498d2b1bfd05c9a51310da203fd33 (diff)
download: gmp-5b0e8651a493b9128594851eff5387bde8081526.tar.gz
7 files changed, 173 insertions, 2 deletions
diff --git a/tune/Makefile.am b/tune/Makefile.am
index e54c020d4..117e5ca2c 100644
--- a/tune/Makefile.am
+++ b/tune/Makefile.am
@@ -43,7 +43,8 @@ libspeed_la_SOURCES =							\
   common.c divrem1div.c divrem1inv.c divrem2div.c divrem2inv.c		\
   freq.c								\
   gcdext_single.c gcdext_double.c gcdextod.c gcdextos.c			\
-  hgcd_lehmer.c jacbase1.c jacbase2.c jacbase3.c jacbase4.c		\
+  hgcd_lehmer.c hgcd_reduce_1.c hgcd_reduce_2.c				\
+  jacbase1.c jacbase2.c jacbase3.c jacbase4.c				\
   mod_1_div.c mod_1_inv.c mod_1_1-1.c mod_1_1-2.c modlinv.c		\
   noop.c powm_mod.c powm_redc.c pre_divrem_1.c				\
   set_strb.c set_strs.c set_strp.c time.c
@@ -129,7 +130,9 @@ TUNE_MPN_SRCS = $(TUNE_MPN_SRCS_BASIC) divrem_1.c mod_1.c
 TUNE_MPN_SRCS_BASIC = div_qr_2.c bdiv_q.c bdiv_qr.c			\
   dcpi1_div_qr.c dcpi1_divappr_q.c dcpi1_bdiv_qr.c dcpi1_bdiv_q.c	\
   invertappr.c invert.c binvert.c divrem_2.c gcd.c gcdext.c		\
-  get_str.c set_str.c matrix22_mul.c hgcd.c mul_n.c sqr.c		\
+  get_str.c set_str.c matrix22_mul.c					\
+  hgcd.c hgcd_appr.c hgcd_reduce.c					\
+  mul_n.c sqr.c								\
   mullo_n.c mul_fft.c mul.c tdiv_qr.c mulmod_bnm1.c sqrmod_bnm1.c	\
   mulmid.c mulmid_n.c toom42_mulmid.c					\
   nussbaumer_mul.c toom6h_mul.c toom8h_mul.c toom6_sqr.c toom8_sqr.c	\
diff --git a/tune/common.c b/tune/common.c
index eb2d4ba1a..cc333a470 100644
--- a/tune/common.c
+++ b/tune/common.c
@@ -1539,6 +1539,22 @@ speed_mpn_hgcd_appr (struct speed_params *s)
 }
 
 double
+speed_mpn_hgcd_reduce (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_HGCD_REDUCE_CALL (mpn_hgcd_reduce, mpn_hgcd_reduce_itch);
+}
+double
+speed_mpn_hgcd_reduce_1 (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_HGCD_REDUCE_CALL (mpn_hgcd_reduce_1, mpn_hgcd_reduce_1_itch);
+}
+double
+speed_mpn_hgcd_reduce_2 (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_HGCD_REDUCE_CALL (mpn_hgcd_reduce_2, mpn_hgcd_reduce_2_itch);
+}
+
+double
 speed_mpn_gcd (struct speed_params *s)
 {
   SPEED_ROUTINE_MPN_GCD (mpn_gcd);
diff --git a/tune/hgcd_reduce_1.c b/tune/hgcd_reduce_1.c
new file mode 100644
index 000000000..996362414
--- /dev/null
+++ b/tune/hgcd_reduce_1.c
@@ -0,0 +1,30 @@
+/* mpn/generic/hgcd_reduce.c forced to use hgcd. */
+
+/*
+Copyright 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#undef  HGCD_REDUCE_THRESHOLD
+#define HGCD_REDUCE_THRESHOLD MP_SIZE_T_MAX
+#define __gmpn_hgcd_reduce  mpn_hgcd_reduce_1
+#define __gmpn_hgcd_reduce_itch  mpn_hgcd_reduce_1_itch
+
+
+#include "../mpn/generic/hgcd_reduce.c"
diff --git a/tune/hgcd_reduce_2.c b/tune/hgcd_reduce_2.c
new file mode 100644
index 000000000..1eed4ba11
--- /dev/null
+++ b/tune/hgcd_reduce_2.c
@@ -0,0 +1,29 @@
+/* mpn/generic/hgcd_reduce.c forced to use hgcd_appr. */
+
+/*
+Copyright 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#undef  HGCD_REDUCE_THRESHOLD
+#define HGCD_REDUCE_THRESHOLD 0
+#define __gmpn_hgcd_reduce mpn_hgcd_reduce_2
+#define __gmpn_hgcd_reduce_itch mpn_hgcd_reduce_2_itch
+
+#include "../mpn/generic/hgcd_reduce.c"
diff --git a/tune/speed.c b/tune/speed.c
index 061517e28..08c13e776 100644
--- a/tune/speed.c
+++ b/tune/speed.c
@@ -279,6 +279,10 @@ const struct routine_t {
   { "mpn_hgcd_lehmer",   speed_mpn_hgcd_lehmer      },
   { "mpn_hgcd_appr",     speed_mpn_hgcd_appr        },
 
+  { "mpn_hgcd_reduce",   speed_mpn_hgcd_reduce      },
+  { "mpn_hgcd_reduce_1", speed_mpn_hgcd_reduce_1    },
+  { "mpn_hgcd_reduce_2", speed_mpn_hgcd_reduce_2    },
+  
   { "mpn_gcd_1",         speed_mpn_gcd_1,  FLAG_R_OPTIONAL },
   { "mpn_gcd_1N",        speed_mpn_gcd_1N, FLAG_R_OPTIONAL },
 
diff --git a/tune/speed.h b/tune/speed.h
index 70484d391..5add58720 100644
--- a/tune/speed.h
+++ b/tune/speed.h
@@ -198,6 +198,9 @@ double speed_mpn_matrix22_mul __GMP_PROTO ((struct speed_params *s));
 double speed_mpn_hgcd __GMP_PROTO ((struct speed_params *s));
 double speed_mpn_hgcd_lehmer __GMP_PROTO ((struct speed_params *s));
 double speed_mpn_hgcd_appr __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_hgcd_reduce __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_hgcd_reduce_1 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_hgcd_reduce_2 __GMP_PROTO ((struct speed_params *s));
 double speed_mpn_gcd __GMP_PROTO ((struct speed_params *s));
 double speed_mpn_gcd_1 __GMP_PROTO ((struct speed_params *s));
 double speed_mpn_gcd_1N __GMP_PROTO ((struct speed_params *s));
@@ -488,6 +491,16 @@ mp_size_t mpn_hgcd_lehmer
   __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, mp_ptr));
 #define MPN_HGCD_LEHMER_ITCH(n) (n)
 
+mp_size_t mpn_hgcd_reduce_1
+  __GMP_PROTO ((struct hgcd_matrix *, mp_ptr, mp_ptr, mp_size_t, mp_size_t, mp_ptr));
+mp_size_t mpn_hgcd_reduce_1_itch
+  __GMP_PROTO ((mp_size_t, mp_size_t));
+
+mp_size_t mpn_hgcd_reduce_2
+  __GMP_PROTO ((struct hgcd_matrix *, mp_ptr, mp_ptr, mp_size_t, mp_size_t, mp_ptr));
+mp_size_t mpn_hgcd_reduce_2_itch
+  __GMP_PROTO ((mp_size_t, mp_size_t));
+
 mp_limb_t mpn_sb_divrem_mn_div __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t));
 mp_limb_t mpn_sb_divrem_mn_inv __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t));
 
@@ -2706,6 +2719,57 @@ int speed_routine_count_zeros_setup
     return t;								\
   }
 
+#define SPEED_ROUTINE_MPN_HGCD_REDUCE_CALL(func, itchfunc)		\
+  {									\
+    mp_size_t hgcd_init_itch, hgcd_step_itch;				\
+    mp_ptr ap, bp, wp, tmp1;						\
+    struct hgcd_matrix hgcd;						\
+    mp_size_t p = s->size/2;						\
+    int res;								\
+    unsigned i;								\
+    double t;								\
+    TMP_DECL;								\
+    									\
+    if (s->size < 2)							\
+      return -1;							\
+    									\
+    TMP_MARK;								\
+    									\
+    SPEED_TMP_ALLOC_LIMBS (ap, s->size + 1, s->align_xp);		\
+    SPEED_TMP_ALLOC_LIMBS (bp, s->size + 1, s->align_yp);		\
+    									\
+    s->xp[s->size - 1] |= 1;						\
+    s->yp[s->size - 1] |= 1;						\
+    									\
+    hgcd_init_itch = MPN_HGCD_MATRIX_INIT_ITCH (s->size);		\
+    hgcd_step_itch = itchfunc (s->size, p);				\
+    									\
+    SPEED_TMP_ALLOC_LIMBS (tmp1, hgcd_init_itch, s->align_wp);		\
+    SPEED_TMP_ALLOC_LIMBS (wp, hgcd_step_itch, s->align_wp);			\
+    									\
+    speed_operand_src (s, s->xp, s->size);				\
+    speed_operand_src (s, s->yp, s->size);				\
+    speed_operand_dst (s, ap, s->size + 1);				\
+    speed_operand_dst (s, bp, s->size + 1);				\
+    speed_operand_dst (s, wp, hgcd_step_itch);				\
+    speed_operand_dst (s, tmp1, hgcd_init_itch);			\
+    speed_cache_fill (s);						\
+    									\
+    speed_starttime ();							\
+    i = s->reps;							\
+    do									\
+      {									\
+	MPN_COPY (ap, s->xp, s->size);					\
+	MPN_COPY (bp, s->yp, s->size);					\
+	mpn_hgcd_matrix_init (&hgcd, s->size, tmp1);			\
+	res = func (&hgcd, ap, bp, s->size, p, wp);			\
+      }									\
+    while (--i != 0);							\
+    t = speed_endtime ();						\
+    TMP_FREE;								\
+    return t;								\
+  }
+
 /* Run some GCDs of s->size limbs each.  The number of different data values
    is decreased as s->size**2, since GCD is a quadratic algorithm.
    SPEED_ROUTINE_MPN_GCD runs more times than SPEED_ROUTINE_MPN_GCDEXT
diff --git a/tune/tuneup.c b/tune/tuneup.c
index 4f53c979c..c62a25362 100644
--- a/tune/tuneup.c
+++ b/tune/tuneup.c
@@ -195,6 +195,8 @@ mp_size_t  redc_2_to_redc_n_threshold   = MP_SIZE_T_MAX;
 mp_size_t  powm_threshold               = MP_SIZE_T_MAX;
 mp_size_t  matrix22_strassen_threshold  = MP_SIZE_T_MAX;
 mp_size_t  hgcd_threshold               = MP_SIZE_T_MAX;
+mp_size_t  hgcd_appr_threshold          = MP_SIZE_T_MAX;
+mp_size_t  hgcd_reduce_threshold        = MP_SIZE_T_MAX;
 mp_size_t  gcd_accel_threshold          = MP_SIZE_T_MAX;
 mp_size_t  gcd_dc_threshold             = MP_SIZE_T_MAX;
 mp_size_t  gcdext_dc_threshold          = MP_SIZE_T_MAX;
@@ -1755,6 +1757,27 @@ tune_hgcd (void)
 }
 
 void
+tune_hgcd_appr (void)
+{
+  static struct param_t  param;
+  param.name = "HGCD_APPR_THRESHOLD";
+  param.function = speed_mpn_hgcd_appr;
+  /* We seem to get strange results for small sizes */
+  param.min_size = 30;
+  one (&hgcd_appr_threshold, &param);
+}
+
+void
+tune_hgcd_reduce (void)
+{
+  static struct param_t  param;
+  param.name = "HGCD_REDUCE_THRESHOLD";
+  param.function = speed_mpn_hgcd_reduce;
+  param.min_size = 30;
+  one (&hgcd_reduce_threshold, &param);
+}
+
+void
 tune_gcd_dc (void)
 {
   static struct param_t  param;
@@ -2579,6 +2602,8 @@ all (void)
 
   tune_matrix22_mul ();
   tune_hgcd ();
+  tune_hgcd_appr ();
+  tune_hgcd_reduce();
   tune_gcd_dc ();
   tune_gcdext_dc ();
   tune_jacobi_base ();
author	Niels Möller <nisse@lysator.liu.se>	2011-11-11 14:59:14 +0100
committer	Niels Möller <nisse@lysator.liu.se>	2011-11-11 14:59:14 +0100
commit	5b0e8651a493b9128594851eff5387bde8081526 (patch)
tree	a362b2b040a395b18a1841f3e3e2e877ddc53aaa /tune
parent	eb453fbaa0a498d2b1bfd05c9a51310da203fd33 (diff)
download	gmp-5b0e8651a493b9128594851eff5387bde8081526.tar.gz