summaryrefslogtreecommitdiff
path: root/tune
diff options
context:
space:
mode:
authorTorbjorn Granlund <tg@gmplib.org>2017-07-21 18:32:31 +0200
committerTorbjorn Granlund <tg@gmplib.org>2017-07-21 18:32:31 +0200
commit2ca58a400fbee53ccae13fad241a346e4e43e21a (patch)
tree5173ddce0a84608912a5c53e7f4311dcebe28b45 /tune
parent9b13801dce562250c91304c3dd2d8574a0a73449 (diff)
downloadgmp-2ca58a400fbee53ccae13fad241a346e4e43e21a.tar.gz
Measure mpn_sbpi1_bdiv_r.
Diffstat (limited to 'tune')
-rw-r--r--tune/common.c10
-rw-r--r--tune/speed.c1
-rw-r--r--tune/speed.h41
3 files changed, 50 insertions, 2 deletions
diff --git a/tune/common.c b/tune/common.c
index 60b29648b..3c1f8df44 100644
--- a/tune/common.c
+++ b/tune/common.c
@@ -225,8 +225,9 @@ speed_measure (double (*fun) (struct speed_params *s), struct speed_params *s)
fprintf (stderr, "speed_measure() could not get %d results within %.1f%%\n",
e, (TOLERANCE-1.0)*100.0);
fprintf (stderr, " unsorted sorted\n");
- fprintf (stderr, " %.12f %.12f is about 0.5%%\n",
- t_unsorted[0]*(TOLERANCE-1.0), t[0]*(TOLERANCE-1.0));
+ fprintf (stderr, " %.12f %.12f is about %.1f%%\n",
+ t_unsorted[0]*(TOLERANCE-1.0), t[0]*(TOLERANCE-1.0),
+ 100*(TOLERANCE-1.0));
for (i = 0; i < numberof (t); i++)
fprintf (stderr, " %.09f %.09f\n", t_unsorted[i], t[i]);
@@ -895,6 +896,11 @@ speed_mpn_dcpi1_bdiv_q (struct speed_params *s)
SPEED_ROUTINE_MPN_PI1_BDIV_Q (mpn_dcpi1_bdiv_q);
}
double
+speed_mpn_sbpi1_bdiv_r (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_PI1_BDIV_R (mpn_sbpi1_bdiv_r);
+}
+double
speed_mpn_mu_bdiv_q (struct speed_params *s)
{
SPEED_ROUTINE_MPN_MU_BDIV_Q (mpn_mu_bdiv_q, mpn_mu_bdiv_q_itch);
diff --git a/tune/speed.c b/tune/speed.c
index d7bec7afc..b1c2cd44e 100644
--- a/tune/speed.c
+++ b/tune/speed.c
@@ -382,6 +382,7 @@ const struct routine_t {
{ "mpn_dcpi1_bdiv_qr", speed_mpn_dcpi1_bdiv_qr },
{ "mpn_sbpi1_bdiv_q", speed_mpn_sbpi1_bdiv_q },
{ "mpn_dcpi1_bdiv_q", speed_mpn_dcpi1_bdiv_q },
+ { "mpn_sbpi1_bdiv_r", speed_mpn_sbpi1_bdiv_r },
{ "mpn_broot", speed_mpn_broot, FLAG_R },
{ "mpn_broot_invm1", speed_mpn_broot_invm1, FLAG_R },
diff --git a/tune/speed.h b/tune/speed.h
index c8c01093c..6ea000b4c 100644
--- a/tune/speed.h
+++ b/tune/speed.h
@@ -299,6 +299,7 @@ double speed_mpn_sbpi1_bdiv_qr (struct speed_params *);
double speed_mpn_dcpi1_bdiv_qr (struct speed_params *);
double speed_mpn_sbpi1_bdiv_q (struct speed_params *);
double speed_mpn_dcpi1_bdiv_q (struct speed_params *);
+double speed_mpn_sbpi1_bdiv_r (struct speed_params *);
double speed_mpn_mu_bdiv_q (struct speed_params *);
double speed_mpn_mu_bdiv_qr (struct speed_params *);
double speed_mpn_broot (struct speed_params *);
@@ -2033,6 +2034,46 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int);
TMP_FREE; \
return t; \
}
+#define SPEED_ROUTINE_MPN_PI1_BDIV_R(function) \
+ { \
+ unsigned i; \
+ mp_ptr dp, tp, ap; \
+ mp_limb_t inv; \
+ double t; \
+ TMP_DECL; \
+ \
+ SPEED_RESTRICT_COND (s->size >= 1); \
+ \
+ TMP_MARK; \
+ SPEED_TMP_ALLOC_LIMBS (ap, 2*s->size, s->align_xp); \
+ SPEED_TMP_ALLOC_LIMBS (dp, s->size, s->align_yp); \
+ SPEED_TMP_ALLOC_LIMBS (tp, 2*s->size, s->align_wp2); \
+ \
+ MPN_COPY (ap, s->xp, s->size); \
+ MPN_COPY (ap+s->size, s->xp, s->size); \
+ \
+ /* divisor must be odd */ \
+ MPN_COPY (dp, s->yp, s->size); \
+ dp[0] |= 1; \
+ binvert_limb (inv, dp[0]); \
+ inv = -inv; \
+ \
+ speed_operand_src (s, ap, 2*s->size); \
+ speed_operand_dst (s, tp, 2*s->size); \
+ speed_operand_src (s, dp, s->size); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do { \
+ MPN_COPY (tp, ap, 2*s->size); \
+ function (tp, 2*s->size, dp, s->size, inv); \
+ } while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ TMP_FREE; \
+ return t; \
+ }
#define SPEED_ROUTINE_MPN_MU_BDIV_Q(function,itchfn) \
{ \
unsigned i; \