diff options
-rw-r--r-- | ChangeLog | 6 | ||||
-rw-r--r-- | tune/common.c | 5 | ||||
-rw-r--r-- | tune/speed.c | 1 | ||||
-rw-r--r-- | tune/speed.h | 7 |
4 files changed, 19 insertions, 0 deletions
@@ -1,5 +1,11 @@ 2019-08-16 Niels Möller <nisse@lysator.liu.se> + Speed support for gcd_22. Calls mpn_gcd_22(al, al, bl, bl), so + that B+1 is a common factor. + * tune/speed.h (SPEED_ROUTINE_MPN_GCD_22): New macro. + * tune/speed.c (routine): Add mpn_gcd_22. + * tune/common.c (speed_mpn_gcd_22): New function. + * mpn/generic/gcd.c (gcd_2): Moved to gcd_22.c below. (mpn_gcd): Adapt for calling gcd_22. * mpn/generic/gcd_22.c (mpn_gcd_22): New file and function. diff --git a/tune/common.c b/tune/common.c index 1291f5dc9..0b3f491b7 100644 --- a/tune/common.c +++ b/tune/common.c @@ -1726,6 +1726,11 @@ speed_mpn_gcd_1N (struct speed_params *s) { SPEED_ROUTINE_MPN_GCD_1N (mpn_gcd_1); } +double +speed_mpn_gcd_22 (struct speed_params *s) +{ + SPEED_ROUTINE_MPN_GCD_22 (mpn_gcd_22); +} double diff --git a/tune/speed.c b/tune/speed.c index f9261227b..a013a24b7 100644 --- a/tune/speed.c +++ b/tune/speed.c @@ -297,6 +297,7 @@ const struct routine_t { { "mpn_gcd_1", speed_mpn_gcd_1, FLAG_R_OPTIONAL }, { "mpn_gcd_11", speed_mpn_gcd_11, FLAG_R_OPTIONAL }, { "mpn_gcd_1N", speed_mpn_gcd_1N, FLAG_R_OPTIONAL }, + { "mpn_gcd_22", speed_mpn_gcd_22, FLAG_R_OPTIONAL }, { "mpn_gcd", speed_mpn_gcd }, diff --git a/tune/speed.h b/tune/speed.h index 41df4c675..25b154203 100644 --- a/tune/speed.h +++ b/tune/speed.h @@ -225,6 +225,7 @@ double speed_mpn_gcd (struct speed_params *); double speed_mpn_gcd_1 (struct speed_params *); double speed_mpn_gcd_11 (struct speed_params *); double speed_mpn_gcd_1N (struct speed_params *); +double speed_mpn_gcd_22 (struct speed_params *); double speed_mpn_gcdext (struct speed_params *); double speed_mpn_gcdext_double (struct speed_params *); double speed_mpn_gcdext_one_double (struct speed_params *); @@ -2825,6 +2826,12 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int); SPEED_ROUTINE_MPN_GCD_1_CALL((px[i] |= 1, py[i] |= 1), \ function (px[j-1], py[j-1])) +/* Multiply limbs by (B+1). Then we get a gcd exceeding one limb, so + we can measure gcd_22 loop only, without gcd_11. */ +#define SPEED_ROUTINE_MPN_GCD_22(function) \ + SPEED_ROUTINE_MPN_GCD_1_CALL((px[i] |= 1, py[i] |= 1), \ + function (px[j-1], px[j-1], py[j-1], py[j-1])) + #define SPEED_ROUTINE_MPN_JACBASE(function) \ SPEED_ROUTINE_MPN_GCD_1_CALL \ ({ \ |