diff options
author | Torbjorn Granlund <tege@gmplib.org> | 2011-11-13 21:31:57 +0100 |
---|---|---|
committer | Torbjorn Granlund <tege@gmplib.org> | 2011-11-13 21:31:57 +0100 |
commit | e037315eefee1b249bbe052bfd84c1a1c01c6f72 (patch) | |
tree | 2b35e55f2dd56d23f887d8d0b3e551f5e8628b25 | |
parent | 890e8c8008d6518223533612dfe95b07db2c696d (diff) | |
download | gmp-e037315eefee1b249bbe052bfd84c1a1c01c6f72.tar.gz |
Add support for POWM_SEC_TABLE table.
-rw-r--r-- | mpn/generic/powm_sec.c | 14 | ||||
-rw-r--r-- | tune/Makefile.am | 2 | ||||
-rw-r--r-- | tune/tuneup.c | 135 |
3 files changed, 147 insertions, 4 deletions
diff --git a/mpn/generic/powm_sec.c b/mpn/generic/powm_sec.c index 3a6f55403..c6358947b 100644 --- a/mpn/generic/powm_sec.c +++ b/mpn/generic/powm_sec.c @@ -189,15 +189,27 @@ getbits (const mp_limb_t *p, mp_bitcnt_t bi, int nbits) } } +#ifndef POWM_SEC_TABLE +#if GMP_NUMB_BITS < 50 +#define POWM_SEC_TABLE 2,33,96,780,2741 +#else +#define POWM_SEC_TABLE 2,130,524,2578 +#endif +#endif + +#if TUNE_PROGRAM_BUILD +extern int win_size (mp_bitcnt_t); +#else static inline int win_size (mp_bitcnt_t eb) { int k; - static mp_bitcnt_t x[] = {0,4,27,100,325,1026,2905,7848,20457,51670,~(mp_bitcnt_t)0}; + static mp_bitcnt_t x[] = {0,POWM_SEC_TABLE,~(mp_bitcnt_t)0}; for (k = 1; eb > x[k]; k++) ; return k; } +#endif /* Convert U to REDC form, U_r = B^n * U mod M */ static void diff --git a/tune/Makefile.am b/tune/Makefile.am index 117e5ca2c..38b1fe9d2 100644 --- a/tune/Makefile.am +++ b/tune/Makefile.am @@ -132,7 +132,7 @@ TUNE_MPN_SRCS_BASIC = div_qr_2.c bdiv_q.c bdiv_qr.c \ invertappr.c invert.c binvert.c divrem_2.c gcd.c gcdext.c \ get_str.c set_str.c matrix22_mul.c \ hgcd.c hgcd_appr.c hgcd_reduce.c \ - mul_n.c sqr.c \ + mul_n.c sqr.c powm_sec.c \ mullo_n.c mul_fft.c mul.c tdiv_qr.c mulmod_bnm1.c sqrmod_bnm1.c \ mulmid.c mulmid_n.c toom42_mulmid.c \ nussbaumer_mul.c toom6h_mul.c toom8h_mul.c toom6_sqr.c toom8_sqr.c \ diff --git a/tune/tuneup.c b/tune/tuneup.c index ce1db103d..c30d19d6b 100644 --- a/tune/tuneup.c +++ b/tune/tuneup.c @@ -192,7 +192,6 @@ mp_size_t binv_newton_threshold = MP_SIZE_T_MAX; mp_size_t redc_1_to_redc_2_threshold = MP_SIZE_T_MAX; mp_size_t redc_1_to_redc_n_threshold = MP_SIZE_T_MAX; mp_size_t redc_2_to_redc_n_threshold = MP_SIZE_T_MAX; -mp_size_t powm_threshold = MP_SIZE_T_MAX; mp_size_t matrix22_strassen_threshold = MP_SIZE_T_MAX; mp_size_t hgcd_threshold = MP_SIZE_T_MAX; mp_size_t hgcd_appr_threshold = MP_SIZE_T_MAX; @@ -1801,6 +1800,134 @@ tune_gcdext_dc (void) one (&gcdext_dc_threshold, ¶m); } +/* In tune_powm_sec we compute the table used by the win_size function. The + cutoff points are in exponent bits, disregarding other operand sizes. It is + not possible to use the one framework since it currently uses a granilarity + of full limbs. +*/ + +/* This win_size replaces the variant in the powm code, allowing us to + control k in the k-ary algorithms. */ +int winsize; +int +win_size (mp_bitcnt_t eb) +{ + return winsize; +} + +void +tune_powm_sec (void) +{ + mp_size_t n; + int k, i; + mp_size_t itch; + mp_bitcnt_t nbits, nbits_next, possible_nbits_cutoff; + const int n_max = 3000 / GMP_NUMB_BITS; + const int n_measurements = 5; + mp_ptr rp, bp, ep, mp, tp; + double ttab[n_measurements], tk, tkp1; + TMP_DECL; + TMP_MARK; + + possible_nbits_cutoff = 0; + + k = 1; + + winsize = 10; /* the itch function needs this */ + itch = mpn_powm_sec_itch (n_max, n_max, n_max); + + rp = TMP_ALLOC_LIMBS (n_max); + bp = TMP_ALLOC_LIMBS (n_max); + ep = TMP_ALLOC_LIMBS (n_max); + mp = TMP_ALLOC_LIMBS (n_max); + tp = TMP_ALLOC_LIMBS (itch); + + mpn_random (bp, n_max); + mpn_random (mp, n_max); + mp[0] |= 1; + +/* How about taking the M operand size into account? + + An operation R=powm(B,E,N) will take time O(log(E)*M(log(N))) (assuming + B = O(M)). + + Using k-ary and no sliding window, the precomputation will need time + O(2^(k-1)*M(log(N))) and the main computation will need O(log(E)*S(N)) + + O(log(E)/k*M(N)), for the squarings, multiplications, respectively. + + An operation R=powm_sec(B,E,N) will take time like powm. + + Using k-ary, the precomputation will need time O(2^k*M(log(N))) and the + main computation will need O(log(E)*S(N)) + O(log(E)/k*M(N)) + + O(log(E)/k*2^k*log(N)), for the squarings, multiplications, and full + table reads, respectively. */ + + printf ("#define POWM_SEC_TABLE "); + + for (nbits = 1; nbits <= n_max * GMP_NUMB_BITS; ) + { + n = (nbits - 1) / GMP_NUMB_BITS + 1; + + /* Generate E such that sliding-window for k and k+1 works equally + well/poorly (but sliding is not used in powm_sec, of course). */ + for (i = 0; i < n; i++) + ep[i] = ~CNST_LIMB(0); + + /* Truncate E to be exactly nbits large. */ + if (nbits % GMP_NUMB_BITS != 0) + mpn_rshift (ep, ep, n, GMP_NUMB_BITS - nbits % GMP_NUMB_BITS); + ep[n - 1] |= CNST_LIMB(1) << (nbits - 1) % GMP_NUMB_BITS; + + winsize = k; + for (i = 0; i < n_measurements; i++) + { + speed_starttime (); + mpn_powm_sec (rp, bp, n, ep, n, mp, n, tp); + ttab[i] = speed_endtime (); + } + tk = median (ttab, n_measurements); + + winsize = k + 1; + speed_starttime (); + for (i = 0; i < n_measurements; i++) + { + speed_starttime (); + mpn_powm_sec (rp, bp, n, ep, n, mp, n, tp); + ttab[i] = speed_endtime (); + } + tkp1 = median (ttab, n_measurements); +/* + printf ("testing: %ld, %d", nbits, k, ep[n-1]); + printf (" %10.5f %10.5f\n", tk, tkp1); +*/ + if (tkp1 < tk) + { + if (possible_nbits_cutoff) + { + /* Two consecutive sizes indicate k increase, obey. */ + if (k > 1) + printf (","); + printf ("%ld", (long) possible_nbits_cutoff); + k++; + possible_nbits_cutoff = 0; + } + else + { + /* One measurement indicate k increase, save nbits for further + consideration. */ + possible_nbits_cutoff = nbits; + } + } + else + possible_nbits_cutoff = 0; + + nbits_next = nbits * 65 / 64; + nbits = nbits_next + (nbits_next == nbits); + } + printf ("\n"); + TMP_FREE; +} + /* size_extra==1 reflects the fact that with high<divisor one division is always skipped. Forcing high<divisor while testing ensures consistency @@ -1896,7 +2023,6 @@ tune_mod_1 (void) { static struct param_t param; double t1, t2; - int method; s.size = 10; s.r = randlimb_half (); @@ -2575,6 +2701,11 @@ all (void) tune_sqrmod_bnm1 (); printf("\n"); +#if 1 + tune_powm_sec (); + printf("\n"); +#endif + tune_fft_mul (); printf("\n"); |