summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarco Bodrato <bodrato@mail.dm.unipi.it>2022-02-15 09:04:59 +0100
committerMarco Bodrato <bodrato@mail.dm.unipi.it>2022-02-15 09:04:59 +0100
commite9b82efc35aa13d4aa3cfa7b0d2e3f919d17b290 (patch)
tree785338f940314586a6ab7a7a2b6e8368caa64484
parent7ec32571f3a3dbeafa591d9e558b62fcd01ee3ff (diff)
downloadgmp-e9b82efc35aa13d4aa3cfa7b0d2e3f919d17b290.tar.gz
tune/: tune/speed support for mpn_{mul,sqr}mod_bknp1
-rw-r--r--tune/common.c52
-rw-r--r--tune/speed.c5
-rw-r--r--tune/speed.h68
3 files changed, 125 insertions, 0 deletions
diff --git a/tune/common.c b/tune/common.c
index b757bf271..52c6d5eea 100644
--- a/tune/common.c
+++ b/tune/common.c
@@ -1598,6 +1598,58 @@ speed_mpn_sqrmod_bnm1 (struct speed_params *s)
}
double
+speed_mpn_mulmod_bknp1 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_MULMOD_BNP1_CALL (mpn_mulmod_bknp1 (wp, s->xp, s->yp, nk, k, tp),1);
+}
+
+double
+speed_mpn_sqrmod_bknp1 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_MULMOD_BNP1_CALL (mpn_sqrmod_bknp1 (wp, s->xp, nk, k, tp),1);
+}
+
+static void
+mpn_bc_mulmod_bnp1 (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n,
+ unsigned k, mp_ptr tp)
+{
+ if (k > 2)
+ mpn_mulmod_bknp1 (rp, ap, bp, n, k, tp);
+ else
+ {
+ n *= k;
+ mpn_mul_n (tp, ap, bp, n);
+ mpn_sub_n (rp, tp, tp + n, n);
+ }
+}
+
+static void
+mpn_bc_sqrmod_bnp1 (mp_ptr rp, mp_srcptr ap, mp_size_t n,
+ unsigned k, mp_ptr tp)
+{
+ if (k > 2)
+ mpn_sqrmod_bknp1 (rp, ap, n, k, tp);
+ else
+ {
+ n *= k;
+ mpn_sqr (tp, ap, n);
+ mpn_sub_n (rp, tp, tp + n, n);
+ }
+}
+
+double
+speed_mpn_mulmod_bnp1 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_MULMOD_BNP1_CALL (mpn_bc_mulmod_bnp1 (wp, s->xp, s->yp, nk, k, tp),0);
+}
+
+double
+speed_mpn_sqrmod_bnp1 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_MULMOD_BNP1_CALL (mpn_bc_sqrmod_bnp1 (wp, s->xp, nk, k, tp),0);
+}
+
+double
speed_mpn_matrix22_mul (struct speed_params *s)
{
/* Speed params only includes 2 inputs, so we have to invent the
diff --git a/tune/speed.c b/tune/speed.c
index e764204a2..e0de68a56 100644
--- a/tune/speed.c
+++ b/tune/speed.c
@@ -385,6 +385,11 @@ const struct routine_t {
{ "mpn_mulmod_bnm1_rounded", speed_mpn_mulmod_bnm1_rounded },
{ "mpn_sqrmod_bnm1", speed_mpn_sqrmod_bnm1 },
+ { "mpn_mulmod_bknp1", speed_mpn_mulmod_bknp1, FLAG_R_OPTIONAL },
+ { "mpn_sqrmod_bknp1", speed_mpn_sqrmod_bknp1, FLAG_R_OPTIONAL },
+ { "mpn_mulmod_bnp1", speed_mpn_mulmod_bnp1 },
+ { "mpn_sqrmod_bnp1", speed_mpn_sqrmod_bnp1 },
+
{ "mpn_invert", speed_mpn_invert },
{ "mpn_invertappr", speed_mpn_invertappr },
{ "mpn_ni_invertappr", speed_mpn_ni_invertappr },
diff --git a/tune/speed.h b/tune/speed.h
index d82a6051a..3155232c8 100644
--- a/tune/speed.h
+++ b/tune/speed.h
@@ -388,6 +388,10 @@ double speed_mpn_mulmod_bnm1 (struct speed_params *);
double speed_mpn_bc_mulmod_bnm1 (struct speed_params *);
double speed_mpn_mulmod_bnm1_rounded (struct speed_params *);
double speed_mpn_sqrmod_bnm1 (struct speed_params *);
+double speed_mpn_mulmod_bknp1 (struct speed_params *);
+double speed_mpn_sqrmod_bknp1 (struct speed_params *);
+double speed_mpn_mulmod_bnp1 (struct speed_params *);
+double speed_mpn_sqrmod_bnp1 (struct speed_params *);
double speed_mpn_udiv_qrnnd (struct speed_params *);
double speed_mpn_udiv_qrnnd_r (struct speed_params *);
double speed_mpn_umul_ppmm (struct speed_params *);
@@ -1402,6 +1406,70 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int);
return t; \
}
+#ifndef MOD_BKNP1_USE11
+#define MOD_BKNP1_USE11 0
+#endif
+#ifndef MOD_BKNP1_ONLY3
+#define MOD_BKNP1_ONLY3 0
+#endif
+
+#define SPEED_ROUTINE_MPN_MULMOD_BNP1_CALL(call,use_r) \
+ { \
+ mp_ptr wp, tp; \
+ unsigned i, k; \
+ double t; \
+ mp_size_t itch, nk; \
+ TMP_DECL; \
+ \
+ SPEED_RESTRICT_COND (s->size >= 1); \
+ SPEED_RESTRICT_COND (!use_r || (s->r == 0) || \
+ (s->r == 3) || (s->r == 5) || (s->r == 7) || \
+ (s->r == 13) || (s->r == 17) || \
+ ((MOD_BKNP1_USE11) && (s->r == 11))); \
+ \
+ if (!use_r || (s->r < 2)) \
+ { \
+ if (s->size % 3 == 0) {nk = s->size / (k = 3);} \
+ else if (s->size % 5 == 0) {nk = s->size / (k = 5);} \
+ else if (s->size % 7 == 0) {nk = s->size / (k = 7);} \
+ else if (s->size % 11 == 0) {nk = s->size / (k = 11);} \
+ else if (s->size % 13 == 0) {nk = s->size / (k = 13);} \
+ else if (s->size % 17 == 0) {nk = s->size / (k = 17);} \
+ else nk = s->size / (k = 1); \
+ } \
+ else nk = s->size / (k = s->r); \
+ \
+ if (MOD_BKNP1_ONLY3) \
+ k = 3; \
+ SPEED_RESTRICT_COND ((!use_r || (k > 2)) && (s->size == k * nk)); \
+ SPEED_RESTRICT_COND ((GMP_NUMB_MAX % k == 0) || (nk % 3 != 0) || \
+ ((MOD_BKNP1_USE11) && (k == 11))); \
+ \
+ itch = mpn_mulmod_bknp1_itch (s->size); \
+ \
+ TMP_MARK; \
+ SPEED_TMP_ALLOC_LIMBS (wp, 2 * s->size + 2, s->align_wp); \
+ SPEED_TMP_ALLOC_LIMBS (tp, itch, s->align_wp2); \
+ \
+ s->xp [s->size] &= 1; \
+ s->yp [s->size] &= 1; \
+ speed_operand_src (s, s->xp, s->size + 1); \
+ speed_operand_src (s, s->yp, s->size + 1); \
+ speed_operand_dst (s, wp, 2 * s->size + 2); \
+ speed_operand_dst (s, tp, itch); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ call; \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ TMP_FREE; \
+ return t; \
+ }
+
#define SPEED_ROUTINE_MPN_MUL_N_TSPACE(call, tsize, minsize) \
{ \
mp_ptr wp, tspace; \