summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarco Bodrato <bodrato@mail.dm.unipi.it>2015-08-25 19:59:27 +0200
committerMarco Bodrato <bodrato@mail.dm.unipi.it>2015-08-25 19:59:27 +0200
commit7ca6b0302f59e6be1d6814db8793910ffcdccb80 (patch)
tree3e36f77097ce48241f65c8dc596b0664d53dad66
parentfb60f49fcda637f80f549fbcc5085c2383e894ff (diff)
downloadgmp-7ca6b0302f59e6be1d6814db8793910ffcdccb80.tar.gz
Tuning code for SQRLO_*:THRESHOLDs.
-rw-r--r--gmp-impl.h34
-rw-r--r--mpn/generic/sqrlo.c13
-rw-r--r--mpn/generic/sqrlo_basecase.c7
-rw-r--r--tune/Makefile.am2
-rw-r--r--tune/tuneup.c50
5 files changed, 90 insertions, 16 deletions
diff --git a/gmp-impl.h b/gmp-impl.h
index 689c91886..24214a604 100644
--- a/gmp-impl.h
+++ b/gmp-impl.h
@@ -2090,6 +2090,12 @@ __GMP_DECLSPEC mp_limb_t gmp_primesieve (mp_ptr, mp_limb_t);
#ifndef MULLO_BASECASE_THRESHOLD_LIMIT
#define MULLO_BASECASE_THRESHOLD_LIMIT MULLO_BASECASE_THRESHOLD
#endif
+#ifndef SQRLO_BASECASE_THRESHOLD_LIMIT
+#define SQRLO_BASECASE_THRESHOLD_LIMIT SQRLO_BASECASE_THRESHOLD
+#endif
+#ifndef SQRLO_DC_THRESHOLD_LIMIT
+#define SQRLO_DC_THRESHOLD_LIMIT SQRLO_DC_THRESHOLD
+#endif
/* SQR_BASECASE_THRESHOLD is where mpn_sqr_basecase should take over from
mpn_mul_basecase. Default is to use mpn_sqr_basecase from 0. (Note that we
@@ -2138,6 +2144,18 @@ __GMP_DECLSPEC mp_limb_t gmp_primesieve (mp_ptr, mp_limb_t);
#define MULLO_MUL_N_THRESHOLD (2*MUL_FFT_THRESHOLD)
#endif
+#ifndef SQRLO_BASECASE_THRESHOLD
+#define SQRLO_BASECASE_THRESHOLD 0 /* never use mpn_sqr_basecase */
+#endif
+
+#ifndef SQRLO_DC_THRESHOLD
+#define SQRLO_DC_THRESHOLD (MULLO_DC_THRESHOLD)
+#endif
+
+#ifndef SQRLO_SQR_THRESHOLD
+#define SQRLO_SQR_THRESHOLD (MULLO_MUL_N_THRESHOLD)
+#endif
+
#ifndef DC_DIV_QR_THRESHOLD
#define DC_DIV_QR_THRESHOLD (2*MUL_TOOM22_THRESHOLD)
#endif
@@ -4789,6 +4807,18 @@ extern mp_size_t mullo_dc_threshold;
#define MULLO_MUL_N_THRESHOLD mullo_mul_n_threshold
extern mp_size_t mullo_mul_n_threshold;
+#undef SQRLO_BASECASE_THRESHOLD
+#define SQRLO_BASECASE_THRESHOLD sqrlo_basecase_threshold
+extern mp_size_t sqrlo_basecase_threshold;
+
+#undef SQRLO_DC_THRESHOLD
+#define SQRLO_DC_THRESHOLD sqrlo_dc_threshold
+extern mp_size_t sqrlo_dc_threshold;
+
+#undef SQRLO_SQR_THRESHOLD
+#define SQRLO_SQR_THRESHOLD sqrlo_sqr_threshold
+extern mp_size_t sqrlo_sqr_threshold;
+
#undef MULMID_TOOM42_THRESHOLD
#define MULMID_TOOM42_THRESHOLD mulmid_toom42_threshold
extern mp_size_t mulmid_toom42_threshold;
@@ -4985,6 +5015,8 @@ extern struct fft_table_nk mpn_fft_table3[2][FFT_TABLE3_SIZE];
#undef MUL_TOOM22_THRESHOLD_LIMIT
#undef MUL_TOOM33_THRESHOLD_LIMIT
#undef MULLO_BASECASE_THRESHOLD_LIMIT
+#undef SQRLO_BASECASE_THRESHOLD_LIMIT
+#undef SQRLO_DC_THRESHOLD_LIMIT
#undef SQR_TOOM3_THRESHOLD_LIMIT
#define SQR_TOOM2_MAX_GENERIC 200
#define MUL_TOOM22_THRESHOLD_LIMIT 700
@@ -4997,6 +5029,8 @@ extern struct fft_table_nk mpn_fft_table3[2][FFT_TABLE3_SIZE];
#define MUL_TOOM8H_THRESHOLD_LIMIT 1200
#define SQR_TOOM8_THRESHOLD_LIMIT 1200
#define MULLO_BASECASE_THRESHOLD_LIMIT 200
+#define SQRLO_BASECASE_THRESHOLD_LIMIT 200
+#define SQRLO_DC_THRESHOLD_LIMIT 400
#define GET_STR_THRESHOLD_LIMIT 150
#define FAC_DSC_THRESHOLD_LIMIT 2048
diff --git a/mpn/generic/sqrlo.c b/mpn/generic/sqrlo.c
index c0ff44ef6..1b6946ac6 100644
--- a/mpn/generic/sqrlo.c
+++ b/mpn/generic/sqrlo.c
@@ -38,19 +38,6 @@ see https://www.gnu.org/licenses/. */
#include "gmp.h"
#include "gmp-impl.h"
-#ifndef SQRLO_BASECASE_THRESHOLD_LIMIT
-#define SQRLO_BASECASE_THRESHOLD_LIMIT 200
-#endif
-#ifndef SQRLO_BASECASE_THRESHOLD
-#define SQRLO_BASECASE_THRESHOLD 0
-#endif
-#ifndef SQRLO_DC_THRESHOLD
-#define SQRLO_DC_THRESHOLD (2*SQR_TOOM2_THRESHOLD)
-#endif
-#ifndef SQRLO_SQR_THRESHOLD
-#define SQRLO_SQR_THRESHOLD (2*SQR_FFT_THRESHOLD)
-#endif
-
#if TUNE_PROGRAM_BUILD || WANT_FAT_BINARY
#define MAYBE_range_basecase 1
#define MAYBE_range_toom22 1
diff --git a/mpn/generic/sqrlo_basecase.c b/mpn/generic/sqrlo_basecase.c
index 9dbdea7cd..867000791 100644
--- a/mpn/generic/sqrlo_basecase.c
+++ b/mpn/generic/sqrlo_basecase.c
@@ -91,6 +91,9 @@ see https://www.gnu.org/licenses/. */
} while (0)
#endif
+/* Avoid zero allocations when SQRLO_LO_THRESHOLD is 0 (this code not used). */
+#define SQRLO_BASECASE_ALLOC \
+ (SQRLO_DC_THRESHOLD_LIMIT < 2 ? 1 : SQRLO_DC_THRESHOLD_LIMIT - 1)
/* Default mpn_sqrlo_basecase using mpn_addmul_1. */
#ifndef SQRLO_SPECIAL_CASES
@@ -147,11 +150,11 @@ mpn_sqrlo_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
}
else
{
- mp_limb_t tp[2 * SQR_TOOM2_THRESHOLD - 1];
+ mp_limb_t tp[SQRLO_BASECASE_ALLOC];
mp_size_t i;
/* must fit n-1 limbs in tp */
- ASSERT (n <= 2 * SQR_TOOM2_THRESHOLD);
+ ASSERT (n <= SQRLO_DC_THRESHOLD_LIMIT);
--n;
#if SQRLO_SHORTCUT_MULTIPLICATIONS
diff --git a/tune/Makefile.am b/tune/Makefile.am
index a60427dd6..e12e1d0dd 100644
--- a/tune/Makefile.am
+++ b/tune/Makefile.am
@@ -146,7 +146,7 @@ TUNE_MPN_SRCS_BASIC = div_qr_2.c bdiv_q.c bdiv_qr.c \
hgcd.c hgcd_appr.c hgcd_reduce.c \
mul_n.c sqr.c sec_powm.c \
mullo_n.c mul_fft.c mul.c tdiv_qr.c mulmod_bnm1.c sqrmod_bnm1.c \
- mulmid.c mulmid_n.c toom42_mulmid.c \
+ mulmid.c mulmid_n.c toom42_mulmid.c sqrlo.c sqrlo_basecase.c \
nussbaumer_mul.c toom6h_mul.c toom8h_mul.c toom6_sqr.c toom8_sqr.c \
toom22_mul.c toom2_sqr.c toom33_mul.c toom3_sqr.c toom44_mul.c toom4_sqr.c
diff --git a/tune/tuneup.c b/tune/tuneup.c
index 32f2b765a..283e919eb 100644
--- a/tune/tuneup.c
+++ b/tune/tuneup.c
@@ -181,6 +181,9 @@ mp_size_t sqr_fft_modf_threshold = MP_SIZE_T_MAX;
mp_size_t mullo_basecase_threshold = MP_SIZE_T_MAX;
mp_size_t mullo_dc_threshold = MP_SIZE_T_MAX;
mp_size_t mullo_mul_n_threshold = MP_SIZE_T_MAX;
+mp_size_t sqrlo_basecase_threshold = MP_SIZE_T_MAX;
+mp_size_t sqrlo_dc_threshold = MP_SIZE_T_MAX;
+mp_size_t sqrlo_sqr_threshold = MP_SIZE_T_MAX;
mp_size_t mulmid_toom42_threshold = MP_SIZE_T_MAX;
mp_size_t mulmod_bnm1_threshold = MP_SIZE_T_MAX;
mp_size_t sqrmod_bnm1_threshold = MP_SIZE_T_MAX;
@@ -1390,6 +1393,52 @@ tune_mullo (void)
}
void
+tune_sqrlo (void)
+{
+ static struct param_t param;
+
+ param.function = speed_mpn_sqrlo;
+
+ param.name = "SQRLO_BASECASE_THRESHOLD";
+ param.min_size = 1;
+ param.min_is_always = 1;
+ param.max_size = SQRLO_BASECASE_THRESHOLD_LIMIT-1;
+ param.stop_factor = 1.5;
+ param.noprint = 1;
+ one (&sqrlo_basecase_threshold, &param);
+
+ param.name = "SQRLO_DC_THRESHOLD";
+ param.min_size = 8;
+ param.min_is_always = 0;
+ param.max_size = SQRLO_DC_THRESHOLD_LIMIT-1;
+ one (&sqrlo_dc_threshold, &param);
+
+ if (sqrlo_basecase_threshold >= sqrlo_dc_threshold)
+ {
+ print_define ("SQRLO_BASECASE_THRESHOLD", sqrlo_dc_threshold);
+ print_define_remark ("SQRLO_DC_THRESHOLD", 0, "never mpn_sqrlo_basecase");
+ }
+ else
+ {
+ print_define ("SQRLO_BASECASE_THRESHOLD", sqrlo_basecase_threshold);
+ print_define ("SQRLO_DC_THRESHOLD", sqrlo_dc_threshold);
+ }
+
+ if (WANT_FFT && sqr_fft_threshold < MP_SIZE_T_MAX / 2)
+ {
+ param.name = "SQRLO_SQR_THRESHOLD";
+ param.min_size = sqrlo_dc_threshold;
+ param.max_size = 2 * sqr_fft_threshold;
+ param.noprint = 0;
+ param.step_factor = 0.03;
+ one (&sqrlo_sqr_threshold, &param);
+ }
+ else
+ print_define_remark ("SQRLO_SQR_THRESHOLD", MP_SIZE_T_MAX,
+ "without FFT use sqrlo forever");
+}
+
+void
tune_mulmid (void)
{
static struct param_t param;
@@ -2836,6 +2885,7 @@ all (void)
printf ("\n");
tune_mullo ();
+ tune_sqrlo ();
printf("\n");
tune_dc_div ();