summaryrefslogtreecommitdiff
path: root/tune
diff options
context:
space:
mode:
authorNiels Möller <nisse@lysator.liu.se>2011-10-03 13:30:35 +0200
committerNiels Möller <nisse@lysator.liu.se>2011-10-03 13:30:35 +0200
commit730b95e57ffcc38a364b0cd1f5352aaf6ea361c4 (patch)
treeb82237bffd1a08089e40a6ab4fb122f04cc37d21 /tune
parentab5d3c7f9553c3a3058813b7076d061794a57b33 (diff)
downloadgmp-730b95e57ffcc38a364b0cd1f5352aaf6ea361c4.tar.gz
Tuning of mulmid.
Diffstat (limited to 'tune')
-rw-r--r--tune/Makefile.am1
-rw-r--r--tune/common.c56
-rw-r--r--tune/speed.c12
-rw-r--r--tune/speed.h178
-rw-r--r--tune/tuneup.c16
5 files changed, 262 insertions, 1 deletions
diff --git a/tune/Makefile.am b/tune/Makefile.am
index c932cd3ab..e54c020d4 100644
--- a/tune/Makefile.am
+++ b/tune/Makefile.am
@@ -131,6 +131,7 @@ TUNE_MPN_SRCS_BASIC = div_qr_2.c bdiv_q.c bdiv_qr.c \
invertappr.c invert.c binvert.c divrem_2.c gcd.c gcdext.c \
get_str.c set_str.c matrix22_mul.c hgcd.c mul_n.c sqr.c \
mullo_n.c mul_fft.c mul.c tdiv_qr.c mulmod_bnm1.c sqrmod_bnm1.c \
+ mulmid.c mulmid_n.c toom42_mulmid.c \
nussbaumer_mul.c toom6h_mul.c toom8h_mul.c toom6_sqr.c toom8_sqr.c \
toom22_mul.c toom2_sqr.c toom33_mul.c toom3_sqr.c toom44_mul.c toom4_sqr.c
diff --git a/tune/common.c b/tune/common.c
index 63d582399..293474014 100644
--- a/tune/common.c
+++ b/tune/common.c
@@ -926,6 +926,38 @@ speed_mpn_sub_n (struct speed_params *s)
SPEED_ROUTINE_MPN_BINARY_N (mpn_sub_n);
}
+double
+speed_mpn_add_err1_n (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_BINARY_ERR1_N (mpn_add_err1_n);
+}
+double
+speed_mpn_sub_err1_n (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_BINARY_ERR1_N (mpn_sub_err1_n);
+}
+double
+speed_mpn_add_err2_n (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_BINARY_ERR2_N (mpn_add_err2_n);
+}
+double
+speed_mpn_sub_err2_n (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_BINARY_ERR2_N (mpn_sub_err2_n);
+}
+double
+speed_mpn_add_err3_n (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_BINARY_ERR3_N (mpn_add_err3_n);
+}
+double
+speed_mpn_sub_err3_n (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_BINARY_ERR3_N (mpn_sub_err3_n);
+}
+
+
#if HAVE_NATIVE_mpn_add_n_sub_n
double
speed_mpn_add_n_sub_n (struct speed_params *s)
@@ -1369,6 +1401,30 @@ speed_mpn_mullo_basecase (struct speed_params *s)
}
double
+speed_mpn_mulmid_basecase (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_MULMID (mpn_mulmid_basecase);
+}
+
+double
+speed_mpn_mulmid (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_MULMID (mpn_mulmid);
+}
+
+double
+speed_mpn_mulmid_n (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_MULMID_N (mpn_mulmid_n);
+}
+
+double
+speed_mpn_toom42_mulmid (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_TOOM42_MULMID (mpn_toom42_mulmid);
+}
+
+double
speed_mpn_mulmod_bnm1 (struct speed_params *s)
{
SPEED_ROUTINE_MPN_MULMOD_BNM1_CALL (mpn_mulmod_bnm1 (wp, s->size, s->xp, s->size, s->yp, s->size, tp));
diff --git a/tune/speed.c b/tune/speed.c
index 27894a868..245318bb3 100644
--- a/tune/speed.c
+++ b/tune/speed.c
@@ -153,6 +153,13 @@ const struct routine_t {
{ "mpn_add_n", speed_mpn_add_n, FLAG_R_OPTIONAL },
{ "mpn_sub_n", speed_mpn_sub_n, FLAG_R_OPTIONAL },
+ { "mpn_add_err1_n", speed_mpn_add_err1_n },
+ { "mpn_add_err2_n", speed_mpn_add_err2_n },
+ { "mpn_add_err3_n", speed_mpn_add_err3_n },
+ { "mpn_sub_err1_n", speed_mpn_sub_err1_n },
+ { "mpn_sub_err2_n", speed_mpn_sub_err2_n },
+ { "mpn_sub_err3_n", speed_mpn_sub_err3_n },
+
#if HAVE_NATIVE_mpn_add_n_sub_n
{ "mpn_add_n_sub_n", speed_mpn_add_n_sub_n, FLAG_R_OPTIONAL },
#endif
@@ -332,6 +339,11 @@ const struct routine_t {
{ "mpn_mullo_n", speed_mpn_mullo_n },
{ "mpn_mullo_basecase", speed_mpn_mullo_basecase },
+ { "mpn_mulmid_basecase", speed_mpn_mulmid_basecase, FLAG_R_OPTIONAL },
+ { "mpn_toom42_mulmid", speed_mpn_toom42_mulmid },
+ { "mpn_mulmid_n", speed_mpn_mulmid_n },
+ { "mpn_mulmid", speed_mpn_mulmid, FLAG_R_OPTIONAL },
+
{ "mpn_bc_mulmod_bnm1", speed_mpn_bc_mulmod_bnm1 },
{ "mpn_mulmod_bnm1", speed_mpn_mulmod_bnm1 },
{ "mpn_mulmod_bnm1_rounded", speed_mpn_mulmod_bnm1_rounded },
diff --git a/tune/speed.h b/tune/speed.h
index 48a420668..0074d4f7f 100644
--- a/tune/speed.h
+++ b/tune/speed.h
@@ -117,7 +117,7 @@ struct speed_params {
struct {
mp_ptr ptr;
mp_size_t size;
- } src[3], dst[4];
+ } src[5], dst[4];
};
typedef double (*speed_function_t) __GMP_PROTO ((struct speed_params *s));
@@ -145,6 +145,9 @@ double speed_binvert_limb_arith __GMP_PROTO ((struct speed_params *s));
double speed_mpf_init_clear __GMP_PROTO ((struct speed_params *s));
double speed_mpn_add_n __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_add_err1_n __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_add_err2_n __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_add_err3_n __GMP_PROTO ((struct speed_params *s));
double speed_mpn_addlsh_n __GMP_PROTO ((struct speed_params *s));
double speed_mpn_addlsh1_n __GMP_PROTO ((struct speed_params *s));
double speed_mpn_addlsh2_n __GMP_PROTO ((struct speed_params *s));
@@ -234,6 +237,8 @@ double speed_mpn_mul_5 __GMP_PROTO ((struct speed_params *s));
double speed_mpn_mul_6 __GMP_PROTO ((struct speed_params *s));
double speed_mpn_mul __GMP_PROTO ((struct speed_params *s));
double speed_mpn_mul_basecase __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_mulmid __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_mulmid_basecase __GMP_PROTO ((struct speed_params *s));
double speed_mpn_mul_fft __GMP_PROTO ((struct speed_params *s));
double speed_mpn_mul_fft_sqr __GMP_PROTO ((struct speed_params *s));
double speed_mpn_fft_mul __GMP_PROTO ((struct speed_params *s));
@@ -246,6 +251,7 @@ double speed_mpn_nussbaumer_mul __GMP_PROTO ((struct speed_params *s));
double speed_mpn_nussbaumer_mul_sqr __GMP_PROTO ((struct speed_params *s));
double speed_mpn_mul_n __GMP_PROTO ((struct speed_params *s));
double speed_mpn_mul_n_sqr __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_mulmid_n __GMP_PROTO ((struct speed_params *s));
double speed_mpn_mullo_n __GMP_PROTO ((struct speed_params *s));
double speed_mpn_mullo_basecase __GMP_PROTO ((struct speed_params *s));
double speed_mpn_nand_n __GMP_PROTO ((struct speed_params *s));
@@ -294,6 +300,9 @@ double speed_mpn_sqr __GMP_PROTO ((struct speed_params *s));
double speed_mpn_sqrtrem __GMP_PROTO ((struct speed_params *s));
double speed_mpn_rootrem __GMP_PROTO ((struct speed_params *s));
double speed_mpn_sub_n __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_sub_err1_n __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_sub_err2_n __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_sub_err3_n __GMP_PROTO ((struct speed_params *s));
double speed_mpn_sublsh_n __GMP_PROTO ((struct speed_params *s));
double speed_mpn_sublsh1_n __GMP_PROTO ((struct speed_params *s));
double speed_mpn_sublsh2_n __GMP_PROTO ((struct speed_params *s));
@@ -321,6 +330,7 @@ double speed_mpn_toom32_for_toom53_mul __GMP_PROTO ((struct speed_params *s));
double speed_mpn_toom53_for_toom32_mul __GMP_PROTO ((struct speed_params *s));
double speed_mpn_toom42_for_toom53_mul __GMP_PROTO ((struct speed_params *s));
double speed_mpn_toom53_for_toom42_mul __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_toom42_mulmid __GMP_PROTO ((struct speed_params *s));
double speed_mpn_mulmod_bnm1 __GMP_PROTO ((struct speed_params *s));
double speed_mpn_bc_mulmod_bnm1 __GMP_PROTO ((struct speed_params *s));
double speed_mpn_mulmod_bnm1_rounded __GMP_PROTO ((struct speed_params *s));
@@ -712,6 +722,72 @@ int speed_routine_count_zeros_setup
return t; \
}
+
+/* For mpn_aors_errK_n, where 1 <= K <= 3. */
+#define SPEED_ROUTINE_MPN_BINARY_ERR_N_CALL(call, K) \
+ { \
+ mp_ptr wp; \
+ mp_ptr xp, yp; \
+ mp_ptr zp[K]; \
+ mp_limb_t ep[2*K]; \
+ unsigned i; \
+ double t; \
+ TMP_DECL; \
+ \
+ SPEED_RESTRICT_COND (s->size >= 1); \
+ \
+ TMP_MARK; \
+ SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp); \
+ \
+ /* (don't have a mechnanism to specify zp alignments) */ \
+ for (i = 0; i < K; i++) \
+ SPEED_TMP_ALLOC_LIMBS (zp[i], s->size, 0); \
+ \
+ xp = s->xp; \
+ yp = s->yp; \
+ \
+ if (s->r == 0) ; \
+ else if (s->r == 1) { xp = wp; } \
+ else if (s->r == 2) { yp = wp; } \
+ else if (s->r == 3) { xp = wp; yp = wp; } \
+ else if (s->r == 4) { yp = xp; } \
+ else { \
+ TMP_FREE; \
+ return -1.0; \
+ } \
+ \
+ /* initialize wp if operand overlap */ \
+ if (xp == wp || yp == wp) \
+ MPN_COPY (wp, s->xp, s->size); \
+ \
+ speed_operand_src (s, xp, s->size); \
+ speed_operand_src (s, yp, s->size); \
+ for (i = 0; i < K; i++) \
+ speed_operand_src (s, zp[i], s->size); \
+ speed_operand_dst (s, wp, s->size); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ call; \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ TMP_FREE; \
+ return t; \
+ }
+
+#define SPEED_ROUTINE_MPN_BINARY_ERR1_N(function) \
+ SPEED_ROUTINE_MPN_BINARY_ERR_N_CALL ((*function) (wp, xp, yp, ep, zp[0], s->size, 0), 1)
+
+#define SPEED_ROUTINE_MPN_BINARY_ERR2_N(function) \
+ SPEED_ROUTINE_MPN_BINARY_ERR_N_CALL ((*function) (wp, xp, yp, ep, zp[0], zp[1], s->size, 0), 2)
+
+#define SPEED_ROUTINE_MPN_BINARY_ERR3_N(function) \
+ SPEED_ROUTINE_MPN_BINARY_ERR_N_CALL ((*function) (wp, xp, yp, ep, zp[0], zp[1], zp[2], s->size, 0), 3)
+
+
/* For mpn_add_n, mpn_sub_n, or similar. */
#define SPEED_ROUTINE_MPN_ADDSUB_N_CALL(call) \
{ \
@@ -1050,6 +1126,106 @@ int speed_routine_count_zeros_setup
return t; \
}
+/* For mpn_mulmid, mpn_mulmid_basecase, xsize=r, ysize=s->size. */
+#define SPEED_ROUTINE_MPN_MULMID(function) \
+ { \
+ mp_ptr wp, xp; \
+ mp_size_t size1; \
+ unsigned i; \
+ double t; \
+ TMP_DECL; \
+ \
+ size1 = (s->r == 0 ? (2 * s->size - 1) : s->r); \
+ \
+ SPEED_RESTRICT_COND (s->size >= 1); \
+ SPEED_RESTRICT_COND (size1 >= s->size); \
+ \
+ TMP_MARK; \
+ SPEED_TMP_ALLOC_LIMBS (wp, size1 - s->size + 3, s->align_wp); \
+ SPEED_TMP_ALLOC_LIMBS (xp, size1, s->align_xp); \
+ \
+ speed_operand_src (s, xp, size1); \
+ speed_operand_src (s, s->yp, s->size); \
+ speed_operand_dst (s, wp, size1 - s->size + 3); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ function (wp, xp, size1, s->yp, s->size); \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ TMP_FREE; \
+ return t; \
+ }
+
+#define SPEED_ROUTINE_MPN_MULMID_N(function) \
+ { \
+ mp_ptr wp, xp; \
+ mp_size_t size1; \
+ unsigned i; \
+ double t; \
+ TMP_DECL; \
+ \
+ size1 = 2 * s->size - 1; \
+ \
+ SPEED_RESTRICT_COND (s->size >= 1); \
+ \
+ TMP_MARK; \
+ SPEED_TMP_ALLOC_LIMBS (wp, size1 - s->size + 3, s->align_wp); \
+ SPEED_TMP_ALLOC_LIMBS (xp, size1, s->align_xp); \
+ \
+ speed_operand_src (s, xp, size1); \
+ speed_operand_src (s, s->yp, s->size); \
+ speed_operand_dst (s, wp, size1 - s->size + 3); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ function (wp, xp, s->yp, s->size); \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ TMP_FREE; \
+ return t; \
+ }
+
+#define SPEED_ROUTINE_MPN_TOOM42_MULMID(function) \
+ { \
+ mp_ptr wp, xp, scratch; \
+ mp_size_t size1, scratch_size; \
+ unsigned i; \
+ double t; \
+ TMP_DECL; \
+ \
+ size1 = 2 * s->size - 1; \
+ \
+ SPEED_RESTRICT_COND (s->size >= 1); \
+ \
+ TMP_MARK; \
+ SPEED_TMP_ALLOC_LIMBS (wp, size1 - s->size + 3, s->align_wp); \
+ SPEED_TMP_ALLOC_LIMBS (xp, size1, s->align_xp); \
+ scratch_size = mpn_toom42_mulmid_itch (s->size); \
+ SPEED_TMP_ALLOC_LIMBS (scratch, scratch_size, 0); \
+ \
+ speed_operand_src (s, xp, size1); \
+ speed_operand_src (s, s->yp, s->size); \
+ speed_operand_dst (s, wp, size1 - s->size + 3); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ function (wp, xp, s->yp, s->size, scratch); \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ TMP_FREE; \
+ return t; \
+ }
+
#define SPEED_ROUTINE_MPN_MULMOD_BNM1_CALL(call) \
{ \
mp_ptr wp, tp; \
diff --git a/tune/tuneup.c b/tune/tuneup.c
index 88ee2158b..4f53c979c 100644
--- a/tune/tuneup.c
+++ b/tune/tuneup.c
@@ -170,6 +170,7 @@ mp_size_t sqr_fft_modf_threshold = MP_SIZE_T_MAX;
mp_size_t mullo_basecase_threshold = MP_SIZE_T_MAX;
mp_size_t mullo_dc_threshold = MP_SIZE_T_MAX;
mp_size_t mullo_mul_n_threshold = MP_SIZE_T_MAX;
+mp_size_t mulmid_toom42_threshold = MP_SIZE_T_MAX;
mp_size_t mulmod_bnm1_threshold = MP_SIZE_T_MAX;
mp_size_t sqrmod_bnm1_threshold = MP_SIZE_T_MAX;
mp_size_t div_sb_preinv_threshold = MP_SIZE_T_MAX;
@@ -1345,6 +1346,18 @@ tune_mullo (void)
}
void
+tune_mulmid (void)
+{
+ static struct param_t param;
+
+ param.name = "MULMID_TOOM42_THRESHOLD";
+ param.function = speed_mpn_mulmid_n;
+ param.min_size = 4;
+ param.max_size = 100;
+ one (&mulmid_toom42_threshold, &param);
+}
+
+void
tune_mulmod_bnm1 (void)
{
static struct param_t param;
@@ -2532,6 +2545,9 @@ all (void)
tune_sqr ();
printf("\n");
+ tune_mulmid ();
+ printf("\n");
+
tune_mulmod_bnm1 ();
tune_sqrmod_bnm1 ();
printf("\n");