summaryrefslogtreecommitdiff
path: root/tune
diff options
context:
space:
mode:
authorNiels M?ller <nisse@lysator.liu.se>2019-09-05 21:10:05 +0200
committerNiels M?ller <nisse@lysator.liu.se>2019-09-05 21:10:05 +0200
commit9c489cead96f7d38dd428d274805fc9cd48514fa (patch)
tree82af0b57717216c477c939c01f7f461ab043777a /tune
parent95d545683e958f411d896cc9ae55507c144a063a (diff)
downloadgmp-9c489cead96f7d38dd428d274805fc9cd48514fa.tar.gz
For hgcd2, add a div1 function handling q <= 7 specially.
* mpn/generic/hgcd2.c (div1): Return both r and q as a mp_double_limb_t, replacing the DIV1 macro. (div1) [HGCD2_METHOD == 3]: New implementation handling q <= 7 specially and without branches. Based on Torbj?rn's mail to the gmp-devel list. * tune/speed.c, tune/speed.h, tune/common.c, tune/Makefile.am: Add corresponding speed support. * tune/hgcd2-3.c: New file. * tune/tuneup.c (print_define_with_speedup): New function, to output a comment with speedup compared to next-best method. (tune_hgcd2): Update tuning.
Diffstat (limited to 'tune')
-rw-r--r--tune/Makefile.am2
-rw-r--r--tune/common.c5
-rw-r--r--tune/hgcd2-3.c39
-rw-r--r--tune/speed.c1
-rw-r--r--tune/speed.h3
-rw-r--r--tune/tuneup.c46
6 files changed, 87 insertions, 9 deletions
diff --git a/tune/Makefile.am b/tune/Makefile.am
index 4fa28ed12..bb107833c 100644
--- a/tune/Makefile.am
+++ b/tune/Makefile.am
@@ -58,7 +58,7 @@ libspeed_la_SOURCES = \
gcdext_single.c gcdext_double.c gcdextod.c gcdextos.c \
hgcd_lehmer.c hgcd_appr_lehmer.c hgcd_reduce_1.c hgcd_reduce_2.c \
jacbase1.c jacbase2.c jacbase3.c jacbase4.c \
- hgcd2-1.c hgcd2-2.c \
+ hgcd2-1.c hgcd2-2.c hgcd2-3.c \
mod_1_div.c mod_1_inv.c mod_1_1-1.c mod_1_1-2.c modlinv.c \
noop.c powm_mod.c powm_redc.c pre_divrem_1.c \
set_strb.c set_strs.c set_strp.c time.c
diff --git a/tune/common.c b/tune/common.c
index da4fb1145..ec7168175 100644
--- a/tune/common.c
+++ b/tune/common.c
@@ -1648,6 +1648,11 @@ speed_mpn_hgcd2_2 (struct speed_params *s)
{
SPEED_ROUTINE_MPN_HGCD2 (mpn_hgcd2_2);
}
+double
+speed_mpn_hgcd2_3 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_HGCD2 (mpn_hgcd2_3);
+}
double
speed_mpn_hgcd (struct speed_params *s)
diff --git a/tune/hgcd2-3.c b/tune/hgcd2-3.c
new file mode 100644
index 000000000..8027347b6
--- /dev/null
+++ b/tune/hgcd2-3.c
@@ -0,0 +1,39 @@
+/* mpn/generic/hgcd2.c method 3.
+
+Copyright 2019 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+
+#undef HGCD2_METHOD
+#define HGCD2_METHOD 3
+#define __gmpn_hgcd2 mpn_hgcd2_3
+/* Not used, but renamed to not get duplicate definitions */
+#define __gmpn_hgcd_mul_matrix1_vector mpn_hgcd_mul_matrix1_vector_3
+
+#include "mpn/generic/hgcd2.c"
diff --git a/tune/speed.c b/tune/speed.c
index b46d94476..4f5a348ae 100644
--- a/tune/speed.c
+++ b/tune/speed.c
@@ -288,6 +288,7 @@ const struct routine_t {
{ "mpn_hgcd2", speed_mpn_hgcd2, FLAG_NODATA },
{ "mpn_hgcd2_1", speed_mpn_hgcd2_1, FLAG_NODATA },
{ "mpn_hgcd2_2", speed_mpn_hgcd2_2, FLAG_NODATA },
+ { "mpn_hgcd2_3", speed_mpn_hgcd2_3, FLAG_NODATA },
{ "mpn_hgcd", speed_mpn_hgcd },
{ "mpn_hgcd_lehmer", speed_mpn_hgcd_lehmer },
{ "mpn_hgcd_appr", speed_mpn_hgcd_appr },
diff --git a/tune/speed.h b/tune/speed.h
index 968bccac7..5df155841 100644
--- a/tune/speed.h
+++ b/tune/speed.h
@@ -217,6 +217,7 @@ double speed_mpn_matrix22_mul (struct speed_params *);
double speed_mpn_hgcd2 (struct speed_params *);
double speed_mpn_hgcd2_1 (struct speed_params *);
double speed_mpn_hgcd2_2 (struct speed_params *);
+double speed_mpn_hgcd2_3 (struct speed_params *);
double speed_mpn_hgcd (struct speed_params *);
double speed_mpn_hgcd_lehmer (struct speed_params *);
double speed_mpn_hgcd_appr (struct speed_params *);
@@ -487,6 +488,8 @@ int mpn_hgcd2_1 (mp_limb_t ah, mp_limb_t al, mp_limb_t bh, mp_limb_t bl,
struct hgcd_matrix1 *M);
int mpn_hgcd2_2 (mp_limb_t ah, mp_limb_t al, mp_limb_t bh, mp_limb_t bl,
struct hgcd_matrix1 *M);
+int mpn_hgcd2_3 (mp_limb_t ah, mp_limb_t al, mp_limb_t bh, mp_limb_t bl,
+ struct hgcd_matrix1 *M);
mp_limb_t mpn_mod_1_div (mp_srcptr, mp_size_t, mp_limb_t);
mp_limb_t mpn_mod_1_inv (mp_srcptr, mp_size_t, mp_limb_t);
diff --git a/tune/tuneup.c b/tune/tuneup.c
index 5642bf789..c353940a4 100644
--- a/tune/tuneup.c
+++ b/tune/tuneup.c
@@ -518,6 +518,15 @@ print_define_remark (const char *name, mp_size_t value, const char *remark)
print_define_end_remark (name, value, remark);
}
+void
+print_define_with_margin (const char *name, mp_size_t value,
+ mp_size_t runner_up, double speedup)
+{
+ char buf[100];
+ snprintf (buf, sizeof(buf), "%.2f%% faster than %ld",
+ 100.0 * (speedup - 1), runner_up);
+ print_define_remark (name, value, buf);
+}
void
one (mp_size_t *threshold, struct param_t *param)
@@ -1902,26 +1911,47 @@ void
tune_hgcd2 (void)
{
static struct param_t param;
- double t1, t2;
+ double t[3+1];
int method;
+ int runner_up_method;
+ double runner_up_ratio;
s.size = 1;
- t1 = tuneup_measure (speed_mpn_hgcd2_1, &param, &s);
+ t[1] = tuneup_measure (speed_mpn_hgcd2_1, &param, &s);
if (option_trace >= 1)
- printf ("size=%ld, mpn_hgcd2_1 %.9f\n", (long) s.size, t1);
+ printf ("size=%ld, mpn_hgcd2_1 %.9f\n", (long) s.size, t[1]);
- t2 = tuneup_measure (speed_mpn_hgcd2_2, &param, &s);
+ t[2] = tuneup_measure (speed_mpn_hgcd2_2, &param, &s);
if (option_trace >= 1)
- printf ("size=%ld, mpn_hgcd2_2 %.9f\n", (long) s.size, t2);
+ printf ("size=%ld, mpn_hgcd2_2 %.9f\n", (long) s.size, t[2]);
- if (t1 == -1.0 || t2 == -1.0)
+ t[3] = tuneup_measure (speed_mpn_hgcd2_3, &param, &s);
+ if (option_trace >= 1)
+ printf ("size=%ld, mpn_hgcd2_3 %.9f\n", (long) s.size, t[3]);
+
+ if (t[1] == -1.0 || t[2] == -1.0 || t[3] == -1.0)
{
printf ("Oops, can't measure all mpn_hgcd2 methods\n");
abort ();
}
- method = (t1 < t2) ? 1 : 2;
- print_define ("HGCD2_METHOD", method);
+ if (t[1] < t[2] && t[1] < t[3])
+ {
+ method = 1;
+ runner_up_method = (t[2] < t[3]) ? 2 : 3;
+ }
+ else if (t[2] < t[3])
+ {
+ method = 2;
+ runner_up_method = (t[1] < t[3]) ? 1 : 3;
+ }
+ else
+ {
+ method = 3;
+ runner_up_method = (t[1] < t[2]) ? 1 : 2;
+ }
+ print_define_with_margin ("HGCD2_METHOD", method, runner_up_method,
+ t[runner_up_method] / t[method]);
}
void