From 5638f7afc35f4d04fa748d261213be67f3cf28d3 Mon Sep 17 00:00:00 2001 From: Niels M?ller Date: Fri, 16 Aug 2019 08:21:42 +0200 Subject: Speed support for gcd_22. Calls mpn_gcd_22(al, al, bl, bl), so that B+1 is a common factor. * tune/speed.h (SPEED_ROUTINE_MPN_GCD_22): New macro. * tune/speed.c (routine): Add mpn_gcd_22. * tune/common.c (speed_mpn_gcd_22): New function. --- tune/common.c | 5 +++++ tune/speed.c | 1 + tune/speed.h | 7 +++++++ 3 files changed, 13 insertions(+) (limited to 'tune') diff --git a/tune/common.c b/tune/common.c index 1291f5dc9..0b3f491b7 100644 --- a/tune/common.c +++ b/tune/common.c @@ -1726,6 +1726,11 @@ speed_mpn_gcd_1N (struct speed_params *s) { SPEED_ROUTINE_MPN_GCD_1N (mpn_gcd_1); } +double +speed_mpn_gcd_22 (struct speed_params *s) +{ + SPEED_ROUTINE_MPN_GCD_22 (mpn_gcd_22); +} double diff --git a/tune/speed.c b/tune/speed.c index f9261227b..a013a24b7 100644 --- a/tune/speed.c +++ b/tune/speed.c @@ -297,6 +297,7 @@ const struct routine_t { { "mpn_gcd_1", speed_mpn_gcd_1, FLAG_R_OPTIONAL }, { "mpn_gcd_11", speed_mpn_gcd_11, FLAG_R_OPTIONAL }, { "mpn_gcd_1N", speed_mpn_gcd_1N, FLAG_R_OPTIONAL }, + { "mpn_gcd_22", speed_mpn_gcd_22, FLAG_R_OPTIONAL }, { "mpn_gcd", speed_mpn_gcd }, diff --git a/tune/speed.h b/tune/speed.h index 41df4c675..25b154203 100644 --- a/tune/speed.h +++ b/tune/speed.h @@ -225,6 +225,7 @@ double speed_mpn_gcd (struct speed_params *); double speed_mpn_gcd_1 (struct speed_params *); double speed_mpn_gcd_11 (struct speed_params *); double speed_mpn_gcd_1N (struct speed_params *); +double speed_mpn_gcd_22 (struct speed_params *); double speed_mpn_gcdext (struct speed_params *); double speed_mpn_gcdext_double (struct speed_params *); double speed_mpn_gcdext_one_double (struct speed_params *); @@ -2825,6 +2826,12 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int); SPEED_ROUTINE_MPN_GCD_1_CALL((px[i] |= 1, py[i] |= 1), \ function (px[j-1], py[j-1])) +/* Multiply limbs by (B+1). Then we get a gcd exceeding one limb, so + we can measure gcd_22 loop only, without gcd_11. */ +#define SPEED_ROUTINE_MPN_GCD_22(function) \ + SPEED_ROUTINE_MPN_GCD_1_CALL((px[i] |= 1, py[i] |= 1), \ + function (px[j-1], px[j-1], py[j-1], py[j-1])) + #define SPEED_ROUTINE_MPN_JACBASE(function) \ SPEED_ROUTINE_MPN_GCD_1_CALL \ ({ \ -- cgit v1.2.1