summaryrefslogtreecommitdiff
path: root/tune
diff options
context:
space:
mode:
authorTorbjorn Granlund <tg@gmplib.org>2016-12-31 22:09:11 +0100
committerTorbjorn Granlund <tg@gmplib.org>2016-12-31 22:09:11 +0100
commit3d92bcc42361db059031dca530e76dc16da48a27 (patch)
treec9c6d2334c8a7088ce876e9675055b6154d94c57 /tune
parentad7bd8ca59f4d6614e6bc980f931c69a581fee7e (diff)
downloadgmp-3d92bcc42361db059031dca530e76dc16da48a27.tar.gz
(relspeed_div_1_vs_mul_1): New function.
Diffstat (limited to 'tune')
-rw-r--r--tune/tuneup.c46
1 files changed, 45 insertions, 1 deletions
diff --git a/tune/tuneup.c b/tune/tuneup.c
index f34e1f8a0..b254d4788 100644
--- a/tune/tuneup.c
+++ b/tune/tuneup.c
@@ -1,6 +1,6 @@
/* Create tuned thresholds for various algorithms.
-Copyright 1999-2003, 2005, 2006, 2008-2012 Free Software Foundation, Inc.
+Copyright 1999-2003, 2005, 2006, 2008-2016 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
@@ -1194,6 +1194,47 @@ fft (struct fft_param_t *p)
}
}
+/* Compare mpn_mul_1 to whatever fast exact single-limb division we have. This
+ is currently mpn_divexact_1, but will become mpn_bdiv_1_qr_pi2 or somesuch.
+ This is used in get_str and set_str. */
+void
+relspeed_div_1_vs_mul_1 (void)
+{
+ const size_t max_opsize = 100;
+ const mp_limb_t fake_big_base = (~CNST_LIMB(0)) / 3;
+ mp_size_t n;
+ long j;
+ mp_limb_t rp[max_opsize];
+ mp_limb_t ap[max_opsize];
+ double multime, divtime;
+
+ mpn_random (ap, max_opsize);
+
+ multime = 0;
+ for (n = max_opsize; n > 1; n--)
+ {
+ mpn_mul_1 (rp, ap, n, fake_big_base);
+ speed_starttime ();
+ for (j = speed_precision; j != 0 ; j--)
+ mpn_mul_1 (rp, ap, n, fake_big_base);
+ multime += speed_endtime () / n;
+ }
+
+ divtime = 0;
+ for (n = max_opsize; n > 1; n--)
+ {
+ /* Make input divisible for good measure. */
+ ap[n - 1] = mpn_mul_1 (ap, ap, n - 1, fake_big_base);
+
+ mpn_divexact_1 (rp, ap, n, fake_big_base);
+ speed_starttime ();
+ for (j = speed_precision; j != 0 ; j--)
+ mpn_divexact_1 (rp, ap, n, fake_big_base);
+ divtime += speed_endtime () / n;
+ }
+
+ print_define ("DIV_1_VS_MUL_1_PERCENT", (int) (100 * divtime/multime));
+}
/* Start karatsuba from 4, since the Cray t90 ieee code is much faster at 2,
@@ -2862,6 +2903,9 @@ all (void)
tune_modexact_1_odd ();
printf("\n");
+ relspeed_div_1_vs_mul_1 ();
+ printf("\n");
+
tune_mul_n ();
printf("\n");