diff options
-rw-r--r-- | gmp-impl.h | 34 | ||||
-rw-r--r-- | mpn/generic/sqrlo.c | 13 | ||||
-rw-r--r-- | mpn/generic/sqrlo_basecase.c | 7 | ||||
-rw-r--r-- | tune/Makefile.am | 2 | ||||
-rw-r--r-- | tune/tuneup.c | 50 |
5 files changed, 90 insertions, 16 deletions
diff --git a/gmp-impl.h b/gmp-impl.h index 689c91886..24214a604 100644 --- a/gmp-impl.h +++ b/gmp-impl.h @@ -2090,6 +2090,12 @@ __GMP_DECLSPEC mp_limb_t gmp_primesieve (mp_ptr, mp_limb_t); #ifndef MULLO_BASECASE_THRESHOLD_LIMIT #define MULLO_BASECASE_THRESHOLD_LIMIT MULLO_BASECASE_THRESHOLD #endif +#ifndef SQRLO_BASECASE_THRESHOLD_LIMIT +#define SQRLO_BASECASE_THRESHOLD_LIMIT SQRLO_BASECASE_THRESHOLD +#endif +#ifndef SQRLO_DC_THRESHOLD_LIMIT +#define SQRLO_DC_THRESHOLD_LIMIT SQRLO_DC_THRESHOLD +#endif /* SQR_BASECASE_THRESHOLD is where mpn_sqr_basecase should take over from mpn_mul_basecase. Default is to use mpn_sqr_basecase from 0. (Note that we @@ -2138,6 +2144,18 @@ __GMP_DECLSPEC mp_limb_t gmp_primesieve (mp_ptr, mp_limb_t); #define MULLO_MUL_N_THRESHOLD (2*MUL_FFT_THRESHOLD) #endif +#ifndef SQRLO_BASECASE_THRESHOLD +#define SQRLO_BASECASE_THRESHOLD 0 /* never use mpn_sqr_basecase */ +#endif + +#ifndef SQRLO_DC_THRESHOLD +#define SQRLO_DC_THRESHOLD (MULLO_DC_THRESHOLD) +#endif + +#ifndef SQRLO_SQR_THRESHOLD +#define SQRLO_SQR_THRESHOLD (MULLO_MUL_N_THRESHOLD) +#endif + #ifndef DC_DIV_QR_THRESHOLD #define DC_DIV_QR_THRESHOLD (2*MUL_TOOM22_THRESHOLD) #endif @@ -4789,6 +4807,18 @@ extern mp_size_t mullo_dc_threshold; #define MULLO_MUL_N_THRESHOLD mullo_mul_n_threshold extern mp_size_t mullo_mul_n_threshold; +#undef SQRLO_BASECASE_THRESHOLD +#define SQRLO_BASECASE_THRESHOLD sqrlo_basecase_threshold +extern mp_size_t sqrlo_basecase_threshold; + +#undef SQRLO_DC_THRESHOLD +#define SQRLO_DC_THRESHOLD sqrlo_dc_threshold +extern mp_size_t sqrlo_dc_threshold; + +#undef SQRLO_SQR_THRESHOLD +#define SQRLO_SQR_THRESHOLD sqrlo_sqr_threshold +extern mp_size_t sqrlo_sqr_threshold; + #undef MULMID_TOOM42_THRESHOLD #define MULMID_TOOM42_THRESHOLD mulmid_toom42_threshold extern mp_size_t mulmid_toom42_threshold; @@ -4985,6 +5015,8 @@ extern struct fft_table_nk mpn_fft_table3[2][FFT_TABLE3_SIZE]; #undef MUL_TOOM22_THRESHOLD_LIMIT #undef MUL_TOOM33_THRESHOLD_LIMIT #undef MULLO_BASECASE_THRESHOLD_LIMIT +#undef SQRLO_BASECASE_THRESHOLD_LIMIT +#undef SQRLO_DC_THRESHOLD_LIMIT #undef SQR_TOOM3_THRESHOLD_LIMIT #define SQR_TOOM2_MAX_GENERIC 200 #define MUL_TOOM22_THRESHOLD_LIMIT 700 @@ -4997,6 +5029,8 @@ extern struct fft_table_nk mpn_fft_table3[2][FFT_TABLE3_SIZE]; #define MUL_TOOM8H_THRESHOLD_LIMIT 1200 #define SQR_TOOM8_THRESHOLD_LIMIT 1200 #define MULLO_BASECASE_THRESHOLD_LIMIT 200 +#define SQRLO_BASECASE_THRESHOLD_LIMIT 200 +#define SQRLO_DC_THRESHOLD_LIMIT 400 #define GET_STR_THRESHOLD_LIMIT 150 #define FAC_DSC_THRESHOLD_LIMIT 2048 diff --git a/mpn/generic/sqrlo.c b/mpn/generic/sqrlo.c index c0ff44ef6..1b6946ac6 100644 --- a/mpn/generic/sqrlo.c +++ b/mpn/generic/sqrlo.c @@ -38,19 +38,6 @@ see https://www.gnu.org/licenses/. */ #include "gmp.h" #include "gmp-impl.h" -#ifndef SQRLO_BASECASE_THRESHOLD_LIMIT -#define SQRLO_BASECASE_THRESHOLD_LIMIT 200 -#endif -#ifndef SQRLO_BASECASE_THRESHOLD -#define SQRLO_BASECASE_THRESHOLD 0 -#endif -#ifndef SQRLO_DC_THRESHOLD -#define SQRLO_DC_THRESHOLD (2*SQR_TOOM2_THRESHOLD) -#endif -#ifndef SQRLO_SQR_THRESHOLD -#define SQRLO_SQR_THRESHOLD (2*SQR_FFT_THRESHOLD) -#endif - #if TUNE_PROGRAM_BUILD || WANT_FAT_BINARY #define MAYBE_range_basecase 1 #define MAYBE_range_toom22 1 diff --git a/mpn/generic/sqrlo_basecase.c b/mpn/generic/sqrlo_basecase.c index 9dbdea7cd..867000791 100644 --- a/mpn/generic/sqrlo_basecase.c +++ b/mpn/generic/sqrlo_basecase.c @@ -91,6 +91,9 @@ see https://www.gnu.org/licenses/. */ } while (0) #endif +/* Avoid zero allocations when SQRLO_LO_THRESHOLD is 0 (this code not used). */ +#define SQRLO_BASECASE_ALLOC \ + (SQRLO_DC_THRESHOLD_LIMIT < 2 ? 1 : SQRLO_DC_THRESHOLD_LIMIT - 1) /* Default mpn_sqrlo_basecase using mpn_addmul_1. */ #ifndef SQRLO_SPECIAL_CASES @@ -147,11 +150,11 @@ mpn_sqrlo_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n) } else { - mp_limb_t tp[2 * SQR_TOOM2_THRESHOLD - 1]; + mp_limb_t tp[SQRLO_BASECASE_ALLOC]; mp_size_t i; /* must fit n-1 limbs in tp */ - ASSERT (n <= 2 * SQR_TOOM2_THRESHOLD); + ASSERT (n <= SQRLO_DC_THRESHOLD_LIMIT); --n; #if SQRLO_SHORTCUT_MULTIPLICATIONS diff --git a/tune/Makefile.am b/tune/Makefile.am index a60427dd6..e12e1d0dd 100644 --- a/tune/Makefile.am +++ b/tune/Makefile.am @@ -146,7 +146,7 @@ TUNE_MPN_SRCS_BASIC = div_qr_2.c bdiv_q.c bdiv_qr.c \ hgcd.c hgcd_appr.c hgcd_reduce.c \ mul_n.c sqr.c sec_powm.c \ mullo_n.c mul_fft.c mul.c tdiv_qr.c mulmod_bnm1.c sqrmod_bnm1.c \ - mulmid.c mulmid_n.c toom42_mulmid.c \ + mulmid.c mulmid_n.c toom42_mulmid.c sqrlo.c sqrlo_basecase.c \ nussbaumer_mul.c toom6h_mul.c toom8h_mul.c toom6_sqr.c toom8_sqr.c \ toom22_mul.c toom2_sqr.c toom33_mul.c toom3_sqr.c toom44_mul.c toom4_sqr.c diff --git a/tune/tuneup.c b/tune/tuneup.c index 32f2b765a..283e919eb 100644 --- a/tune/tuneup.c +++ b/tune/tuneup.c @@ -181,6 +181,9 @@ mp_size_t sqr_fft_modf_threshold = MP_SIZE_T_MAX; mp_size_t mullo_basecase_threshold = MP_SIZE_T_MAX; mp_size_t mullo_dc_threshold = MP_SIZE_T_MAX; mp_size_t mullo_mul_n_threshold = MP_SIZE_T_MAX; +mp_size_t sqrlo_basecase_threshold = MP_SIZE_T_MAX; +mp_size_t sqrlo_dc_threshold = MP_SIZE_T_MAX; +mp_size_t sqrlo_sqr_threshold = MP_SIZE_T_MAX; mp_size_t mulmid_toom42_threshold = MP_SIZE_T_MAX; mp_size_t mulmod_bnm1_threshold = MP_SIZE_T_MAX; mp_size_t sqrmod_bnm1_threshold = MP_SIZE_T_MAX; @@ -1390,6 +1393,52 @@ tune_mullo (void) } void +tune_sqrlo (void) +{ + static struct param_t param; + + param.function = speed_mpn_sqrlo; + + param.name = "SQRLO_BASECASE_THRESHOLD"; + param.min_size = 1; + param.min_is_always = 1; + param.max_size = SQRLO_BASECASE_THRESHOLD_LIMIT-1; + param.stop_factor = 1.5; + param.noprint = 1; + one (&sqrlo_basecase_threshold, ¶m); + + param.name = "SQRLO_DC_THRESHOLD"; + param.min_size = 8; + param.min_is_always = 0; + param.max_size = SQRLO_DC_THRESHOLD_LIMIT-1; + one (&sqrlo_dc_threshold, ¶m); + + if (sqrlo_basecase_threshold >= sqrlo_dc_threshold) + { + print_define ("SQRLO_BASECASE_THRESHOLD", sqrlo_dc_threshold); + print_define_remark ("SQRLO_DC_THRESHOLD", 0, "never mpn_sqrlo_basecase"); + } + else + { + print_define ("SQRLO_BASECASE_THRESHOLD", sqrlo_basecase_threshold); + print_define ("SQRLO_DC_THRESHOLD", sqrlo_dc_threshold); + } + + if (WANT_FFT && sqr_fft_threshold < MP_SIZE_T_MAX / 2) + { + param.name = "SQRLO_SQR_THRESHOLD"; + param.min_size = sqrlo_dc_threshold; + param.max_size = 2 * sqr_fft_threshold; + param.noprint = 0; + param.step_factor = 0.03; + one (&sqrlo_sqr_threshold, ¶m); + } + else + print_define_remark ("SQRLO_SQR_THRESHOLD", MP_SIZE_T_MAX, + "without FFT use sqrlo forever"); +} + +void tune_mulmid (void) { static struct param_t param; @@ -2836,6 +2885,7 @@ all (void) printf ("\n"); tune_mullo (); + tune_sqrlo (); printf("\n"); tune_dc_div (); |