summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog6
-rw-r--r--tune/common.c5
-rw-r--r--tune/speed.c1
-rw-r--r--tune/speed.h7
4 files changed, 19 insertions, 0 deletions
diff --git a/ChangeLog b/ChangeLog
index 1ac104a10..970da8419 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,11 @@
2019-08-16 Niels Möller <nisse@lysator.liu.se>
+ Speed support for gcd_22. Calls mpn_gcd_22(al, al, bl, bl), so
+ that B+1 is a common factor.
+ * tune/speed.h (SPEED_ROUTINE_MPN_GCD_22): New macro.
+ * tune/speed.c (routine): Add mpn_gcd_22.
+ * tune/common.c (speed_mpn_gcd_22): New function.
+
* mpn/generic/gcd.c (gcd_2): Moved to gcd_22.c below.
(mpn_gcd): Adapt for calling gcd_22.
* mpn/generic/gcd_22.c (mpn_gcd_22): New file and function.
diff --git a/tune/common.c b/tune/common.c
index 1291f5dc9..0b3f491b7 100644
--- a/tune/common.c
+++ b/tune/common.c
@@ -1726,6 +1726,11 @@ speed_mpn_gcd_1N (struct speed_params *s)
{
SPEED_ROUTINE_MPN_GCD_1N (mpn_gcd_1);
}
+double
+speed_mpn_gcd_22 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_GCD_22 (mpn_gcd_22);
+}
double
diff --git a/tune/speed.c b/tune/speed.c
index f9261227b..a013a24b7 100644
--- a/tune/speed.c
+++ b/tune/speed.c
@@ -297,6 +297,7 @@ const struct routine_t {
{ "mpn_gcd_1", speed_mpn_gcd_1, FLAG_R_OPTIONAL },
{ "mpn_gcd_11", speed_mpn_gcd_11, FLAG_R_OPTIONAL },
{ "mpn_gcd_1N", speed_mpn_gcd_1N, FLAG_R_OPTIONAL },
+ { "mpn_gcd_22", speed_mpn_gcd_22, FLAG_R_OPTIONAL },
{ "mpn_gcd", speed_mpn_gcd },
diff --git a/tune/speed.h b/tune/speed.h
index 41df4c675..25b154203 100644
--- a/tune/speed.h
+++ b/tune/speed.h
@@ -225,6 +225,7 @@ double speed_mpn_gcd (struct speed_params *);
double speed_mpn_gcd_1 (struct speed_params *);
double speed_mpn_gcd_11 (struct speed_params *);
double speed_mpn_gcd_1N (struct speed_params *);
+double speed_mpn_gcd_22 (struct speed_params *);
double speed_mpn_gcdext (struct speed_params *);
double speed_mpn_gcdext_double (struct speed_params *);
double speed_mpn_gcdext_one_double (struct speed_params *);
@@ -2825,6 +2826,12 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int);
SPEED_ROUTINE_MPN_GCD_1_CALL((px[i] |= 1, py[i] |= 1), \
function (px[j-1], py[j-1]))
+/* Multiply limbs by (B+1). Then we get a gcd exceeding one limb, so
+ we can measure gcd_22 loop only, without gcd_11. */
+#define SPEED_ROUTINE_MPN_GCD_22(function) \
+ SPEED_ROUTINE_MPN_GCD_1_CALL((px[i] |= 1, py[i] |= 1), \
+ function (px[j-1], px[j-1], py[j-1], py[j-1]))
+
#define SPEED_ROUTINE_MPN_JACBASE(function) \
SPEED_ROUTINE_MPN_GCD_1_CALL \
({ \