From 7bae50bedb078dcca5716cb268d4a4e44bb59694 Mon Sep 17 00:00:00 2001 From: Torbjorn Granlund Date: Sun, 27 Dec 2009 20:30:53 +0100 Subject: (mpn_sqr): New name for mpn_sqr_n. --- ChangeLog | 2 ++ NEWS | 2 +- configure.in | 2 +- doc/gmp.texi | 4 +-- gmp-h.in | 3 ++ gmp-impl.h | 4 +-- mpf/get_str.c | 2 +- mpf/set_str.c | 2 +- mpn/asm-defs.m4 | 4 --- mpn/generic/fib2_ui.c | 4 +-- mpn/generic/get_str.c | 2 +- mpn/generic/mul.c | 2 +- mpn/generic/mul_fft.c | 2 +- mpn/generic/perfpow.c | 2 +- mpn/generic/pow_1.c | 8 ++--- mpn/generic/powlo.c | 6 ++-- mpn/generic/powm.c | 52 +++++++++++++-------------- mpn/generic/powm_sec.c | 6 ++-- mpn/generic/remove.c | 2 +- mpn/generic/set_str.c | 2 +- mpn/generic/sqr.c | 88 +++++++++++++++++++++++++++++++++++++++++++++ mpn/generic/sqr_n.c | 88 --------------------------------------------- mpn/generic/sqrmod_bnm1.c | 8 ++--- mpn/generic/sqrtrem.c | 2 +- mpn/generic/toom2_sqr.c | 8 ++--- mpn/generic/toom3_sqr.c | 16 ++++----- mpn/generic/toom4_sqr.c | 16 ++++----- mpn/x86/fat/gmp-mparam.h | 2 +- mpn/x86_64/fat/gmp-mparam.h | 2 +- mpz/lucnum_ui.c | 2 +- mpz/n_pow_ui.c | 12 +++---- mpz/powm_ui.c | 2 +- tests/devel/try.c | 2 +- tune/Makefile.am | 51 +------------------------- tune/common.c | 4 +-- tune/speed.c | 2 +- tune/speed.h | 7 +--- tune/tuneup.c | 14 ++++---- 38 files changed, 193 insertions(+), 246 deletions(-) create mode 100644 mpn/generic/sqr.c delete mode 100644 mpn/generic/sqr_n.c diff --git a/ChangeLog b/ChangeLog index 2ccddc484..65f7e38d6 100644 --- a/ChangeLog +++ b/ChangeLog @@ -6,6 +6,8 @@ 2009-12-27 Torbjorn Granlund + * (mpn_sqr): New name for mpn_sqr_n. Many files affected. + * tune/tuneup.c (tune_mullo): Up step_factor for MULLO_MUL_N_THRESHOLD. (tune_invertappr, tune_invert, tune_binvert): Let max_size default. diff --git a/NEWS b/NEWS index 7e67f2648..37e7c3cbb 100644 --- a/NEWS +++ b/NEWS @@ -40,7 +40,7 @@ Changes between GMP version 4.3.X and 4.4.0 average, where Q is the quotient. Features: - * New mpn functions: mpn_sqr_n, mpn_and_n, mpn_ior_n, mpn_xor_n, + * New mpn functions: mpn_sqr, mpn_and_n, mpn_ior_n, mpn_xor_n, mpn_nand_n, mpn_nior_n, mpn_xnor_n, mpn_andn_n, mpn_iorn_n. * Support for fat binaries for 64-bit x86 processors. * New type, mp_bitcnt_t for bignum bit counts. diff --git a/configure.in b/configure.in index cbe14f16c..6137efcae 100644 --- a/configure.in +++ b/configure.in @@ -2495,7 +2495,7 @@ gmp_mpn_functions="$extra_functions \ submul_1 lshift rshift dive_1 diveby3 divis divrem divrem_1 divrem_2 \ fib2_ui mod_1 mod_34lsub1 mode1o pre_divrem_1 pre_mod_1 dump \ mod_1_1 mod_1_2 mod_1_3 mod_1_4 \ - mul mul_fft mul_n sqr_n mul_basecase sqr_basecase nussbaumer_mul \ + mul mul_fft mul_n sqr mul_basecase sqr_basecase nussbaumer_mul \ random random2 pow_1 \ rootrem sqrtrem get_str set_str scan0 scan1 popcount hamdist cmp \ perfsqr perfpow \ diff --git a/doc/gmp.texi b/doc/gmp.texi index 234ac9243..e77e2289a 100644 --- a/doc/gmp.texi +++ b/doc/gmp.texi @@ -5175,7 +5175,7 @@ The destination has to have space for 2*@var{n} limbs, even if the product's most significant limb is zero. No overlap is permitted between the destination and either source. -If the two input operands are the same, use @code{mpn_sqr_n}. +If the two input operands are the same, use @code{mpn_sqr}. @end deftypefun @deftypefun mp_limb_t mpn_mul (mp_limb_t *@var{rp}, const mp_limb_t *@var{s1p}, mp_size_t @var{s1n}, const mp_limb_t *@var{s2p}, mp_size_t @var{s2n}) @@ -5190,7 +5190,7 @@ destination and either source. This function requires that @var{s1n} is greater than or equal to @var{s2n}. @end deftypefun -@deftypefun void mpn_sqr_n (mp_limb_t *@var{rp}, const mp_limb_t *@var{s1p}, mp_size_t @var{n}) +@deftypefun void mpn_sqr (mp_limb_t *@var{rp}, const mp_limb_t *@var{s1p}, mp_size_t @var{n}) Compute the square of @{@var{s1p}, @var{n}@} and write the 2*@var{n}-limb result to @var{rp}. diff --git a/gmp-h.in b/gmp-h.in index 31410c1df..af79f66a9 100644 --- a/gmp-h.in +++ b/gmp-h.in @@ -1568,6 +1568,9 @@ __GMP_DECLSPEC mp_limb_t mpn_mul_1 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, m #define mpn_mul_n __MPN(mul_n) __GMP_DECLSPEC void mpn_mul_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)); +#define mpn_sqr __MPN(sqr) +__GMP_DECLSPEC void mpn_sqr __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t)); + #define mpn_neg_n __MPN(neg_n) #if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpn_neg_n) __GMP_DECLSPEC mp_limb_t mpn_neg_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t)); diff --git a/gmp-impl.h b/gmp-impl.h index f4bf1174b..eb0c90cf1 100644 --- a/gmp-impl.h +++ b/gmp-impl.h @@ -908,8 +908,8 @@ __GMP_DECLSPEC void mpn_mullo_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_s #define mpn_mullo_basecase __MPN(mullo_basecase) __GMP_DECLSPEC void mpn_mullo_basecase __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)); -#define mpn_sqr_n __MPN(sqr_n) -__GMP_DECLSPEC void mpn_sqr_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t)); +#define mpn_sqr __MPN(sqr) +__GMP_DECLSPEC void mpn_sqr __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t)); #ifndef mpn_sqr_basecase /* if not done with cpuvec in a fat binary */ #define mpn_sqr_basecase __MPN(sqr_basecase) diff --git a/mpf/get_str.c b/mpf/get_str.c index 30b12e7c2..447bfdbb9 100644 --- a/mpf/get_str.c +++ b/mpf/get_str.c @@ -67,7 +67,7 @@ mpn_pow_1_highpart (mp_ptr rp, mp_size_t *ignp, count_leading_zeros (cnt, exp); for (i = GMP_LIMB_BITS - cnt - 2; i >= 0; i--) { - mpn_sqr_n (tp, rp + off, rn); + mpn_sqr (tp, rp + off, rn); rn = 2 * rn; rn -= tp[rn - 1] == 0; ign <<= 1; diff --git a/mpf/set_str.c b/mpf/set_str.c index ce8a6bac3..01a175fa6 100644 --- a/mpf/set_str.c +++ b/mpf/set_str.c @@ -72,7 +72,7 @@ mpn_pow_1_highpart (mp_ptr rp, mp_size_t *ignp, count_leading_zeros (cnt, exp); for (i = GMP_LIMB_BITS - cnt - 2; i >= 0; i--) { - mpn_sqr_n (tp, rp + off, rn); + mpn_sqr (tp, rp + off, rn); rn = 2 * rn; rn -= tp[rn - 1] == 0; ign <<= 1; diff --git a/mpn/asm-defs.m4 b/mpn/asm-defs.m4 index f66ac66a7..9ad92c182 100644 --- a/mpn/asm-defs.m4 +++ b/mpn/asm-defs.m4 @@ -1347,8 +1347,6 @@ define_mpn(hamdist) define_mpn(invert_limb) define_mpn(ior_n) define_mpn(iorn_n) -define_mpn(kara_mul_n) -define_mpn(kara_sqr_n) define_mpn(lshift) define_mpn(lshiftc) define_mpn(mod_1_1p) @@ -1410,8 +1408,6 @@ define_mpn(sub_n) define_mpn(sub_nc) define_mpn(submul_1) define_mpn(submul_1c) -define_mpn(toom3_mul_n) -define_mpn(toom3_sqr_n) define_mpn(umul_ppmm) define_mpn(umul_ppmm_r) define_mpn(udiv_qrnnd) diff --git a/mpn/generic/fib2_ui.c b/mpn/generic/fib2_ui.c index 437375353..ddf93faed 100644 --- a/mpn/generic/fib2_ui.c +++ b/mpn/generic/fib2_ui.c @@ -105,8 +105,8 @@ mpn_fib2_ui (mp_ptr fp, mp_ptr f1p, unsigned long int n) /* f1p[size-1] might be zero, but this occurs rarely, so it's not worth bothering checking for it */ ASSERT (alloc >= 2*size); - mpn_sqr_n (xp, fp, size); - mpn_sqr_n (fp, f1p, size); + mpn_sqr (xp, fp, size); + mpn_sqr (fp, f1p, size); size *= 2; /* Shrink if possible. Since fp was normalized there'll be at diff --git a/mpn/generic/get_str.c b/mpn/generic/get_str.c index a00fb805f..ac4fb52a9 100644 --- a/mpn/generic/get_str.c +++ b/mpn/generic/get_str.c @@ -473,7 +473,7 @@ mpn_get_str (unsigned char *str, int base, mp_ptr up, mp_size_t un) ASSERT_ALWAYS (powtab_mem_ptr < powtab_mem + mpn_dc_get_str_powtab_alloc (un)); - mpn_sqr_n (t, p, n); + mpn_sqr (t, p, n); digits_in_base *= 2; n *= 2; n -= t[n - 1] == 0; diff --git a/mpn/generic/mul.c b/mpn/generic/mul.c index 02134e573..055f78fde 100644 --- a/mpn/generic/mul.c +++ b/mpn/generic/mul.c @@ -84,7 +84,7 @@ mpn_mul (mp_ptr prodp, if (un == vn) { if (up == vp) - mpn_sqr_n (prodp, up, un); + mpn_sqr (prodp, up, un); else mpn_mul_n (prodp, up, vp, un); } diff --git a/mpn/generic/mul_fft.c b/mpn/generic/mul_fft.c index f10185eea..1315b0cf5 100644 --- a/mpn/generic/mul_fft.c +++ b/mpn/generic/mul_fft.c @@ -529,7 +529,7 @@ mpn_fft_mul_modF_K (mp_ptr *ap, mp_ptr *bp, mp_size_t n, int K) a = *ap++; b = *bp++; if (sqr) - mpn_sqr_n (tp, a, n); + mpn_sqr (tp, a, n); else mpn_mul_n (tp, b, a, n); if (a[n] != 0) diff --git a/mpn/generic/perfpow.c b/mpn/generic/perfpow.c index 54143a790..ccd2b6891 100644 --- a/mpn/generic/perfpow.c +++ b/mpn/generic/perfpow.c @@ -405,7 +405,7 @@ mpn_perfect_power_p (mp_srcptr np, mp_size_t nn) exp = 1; while (2 * pn - 1 <= ncn) { - mpn_sqr_n (next, prev, pn); + mpn_sqr (next, prev, pn); xn = 2 * pn; xn -= (next[xn - 1] == 0); diff --git a/mpn/generic/pow_1.c b/mpn/generic/pow_1.c index 88d66ea0e..d379836e3 100644 --- a/mpn/generic/pow_1.c +++ b/mpn/generic/pow_1.c @@ -71,7 +71,7 @@ mpn_pow_1 (mp_ptr rp, mp_srcptr bp, mp_size_t bn, mp_limb_t exp, mp_ptr tp) if ((cnt & 1) != 0) MP_PTR_SWAP (rp, tp); - mpn_sqr_n (rp, bp, bn); + mpn_sqr (rp, bp, bn); rn = 2 * bn; rn -= rp[rn - 1] == 0; for (i = GMP_LIMB_BITS - cnt - 1;;) @@ -86,7 +86,7 @@ mpn_pow_1 (mp_ptr rp, mp_srcptr bp, mp_size_t bn, mp_limb_t exp, mp_ptr tp) if (--i == 0) break; - mpn_sqr_n (tp, rp, rn); + mpn_sqr (tp, rp, rn); rn = 2 * rn; rn -= tp[rn - 1] == 0; MP_PTR_SWAP (rp, tp); } @@ -96,7 +96,7 @@ mpn_pow_1 (mp_ptr rp, mp_srcptr bp, mp_size_t bn, mp_limb_t exp, mp_ptr tp) if (((par ^ cnt) & 1) == 0) MP_PTR_SWAP (rp, tp); - mpn_sqr_n (rp, bp, bn); + mpn_sqr (rp, bp, bn); rn = 2 * bn; rn -= rp[rn - 1] == 0; for (i = GMP_LIMB_BITS - cnt - 1;;) @@ -111,7 +111,7 @@ mpn_pow_1 (mp_ptr rp, mp_srcptr bp, mp_size_t bn, mp_limb_t exp, mp_ptr tp) if (--i == 0) break; - mpn_sqr_n (tp, rp, rn); + mpn_sqr (tp, rp, rn); rn = 2 * rn; rn -= tp[rn - 1] == 0; MP_PTR_SWAP (rp, tp); } diff --git a/mpn/generic/powlo.c b/mpn/generic/powlo.c index 23990ada8..7eb64540d 100644 --- a/mpn/generic/powlo.c +++ b/mpn/generic/powlo.c @@ -95,7 +95,7 @@ mpn_powlo (mp_ptr rp, mp_srcptr bp, b2p = tp + 2*n; /* Store b^2 in b2. */ - mpn_sqr_n (tp, bp, n); /* FIXME: Use "mpn_sqrlo" */ + mpn_sqr (tp, bp, n); /* FIXME: Use "mpn_sqrlo" */ MPN_COPY (b2p, tp, n); /* Precompute odd powers of b and put them in the temporary area at pp. */ @@ -122,7 +122,7 @@ mpn_powlo (mp_ptr rp, mp_srcptr bp, { while (getbit (ep, ebi) == 0) { - mpn_sqr_n (tp, rp, n); /* FIXME: Use "mpn_sqrlo" */ + mpn_sqr (tp, rp, n); /* FIXME: Use "mpn_sqrlo" */ MPN_COPY (rp, tp, n); ebi--; if (ebi == 0) @@ -149,7 +149,7 @@ mpn_powlo (mp_ptr rp, mp_srcptr bp, do { - mpn_sqr_n (tp, rp, n); + mpn_sqr (tp, rp, n); MPN_COPY (rp, tp, n); this_windowsize--; } diff --git a/mpn/generic/powm.c b/mpn/generic/powm.c index 712de6c94..7010357f1 100644 --- a/mpn/generic/powm.c +++ b/mpn/generic/powm.c @@ -166,7 +166,7 @@ mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn, until the result is greater than the mod argument. */ for (;;) { - mpn_sqr_n (tp, this_pp, tn); + mpn_sqr (tp, this_pp, tn); tn = tn * 2 - 1, tn += tp[tn] != 0; if (getbit (ep, ebi) != 0) mpn_mul (..., tp, tn, bp, bn); @@ -212,7 +212,7 @@ mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn, b2p = tp + 2*n; /* Store b^2 in b2. */ - mpn_sqr_n (tp, this_pp, n); + mpn_sqr (tp, this_pp, n); #if WANT_REDC_2 if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD)) mpn_redc_1 (b2p, tp, mp, n, mip[0]); @@ -260,7 +260,7 @@ mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn, { \ while (getbit (ep, ebi) == 0) \ { \ - MPN_SQR_N (tp, rp, n); \ + MPN_SQR (tp, rp, n); \ MPN_REDUCE (rp, tp, mp, n, mip); \ ebi--; \ if (ebi == 0) \ @@ -288,7 +288,7 @@ mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn, \ do \ { \ - MPN_SQR_N (tp, rp, n); \ + MPN_SQR (tp, rp, n); \ MPN_REDUCE (rp, tp, mp, n, mip); \ this_windowsize--; \ } \ @@ -304,20 +304,20 @@ mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn, if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD)) { #undef MPN_MUL_N -#undef MPN_SQR_N +#undef MPN_SQR #undef MPN_REDUCE #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n) -#define MPN_SQR_N(r,a,n) mpn_sqr_basecase (r,a,n) +#define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n) #define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_1 (rp, tp, mp, n, mip[0]) INNERLOOP; } else if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD)) { #undef MPN_MUL_N -#undef MPN_SQR_N +#undef MPN_SQR #undef MPN_REDUCE #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n) -#define MPN_SQR_N(r,a,n) mpn_sqr_basecase (r,a,n) +#define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n) #define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_2 (rp, tp, mp, n, mip) INNERLOOP; } @@ -325,20 +325,20 @@ mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn, if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD)) { #undef MPN_MUL_N -#undef MPN_SQR_N +#undef MPN_SQR #undef MPN_REDUCE #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n) -#define MPN_SQR_N(r,a,n) mpn_sqr_basecase (r,a,n) +#define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n) #define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_1 (rp, tp, mp, n, mip[0]) INNERLOOP; } else if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD)) { #undef MPN_MUL_N -#undef MPN_SQR_N +#undef MPN_SQR #undef MPN_REDUCE #define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n) -#define MPN_SQR_N(r,a,n) mpn_sqr_n (r,a,n) +#define MPN_SQR(r,a,n) mpn_sqr (r,a,n) #define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_1 (rp, tp, mp, n, mip[0]) INNERLOOP; } @@ -346,20 +346,20 @@ mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn, else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD)) { #undef MPN_MUL_N -#undef MPN_SQR_N +#undef MPN_SQR #undef MPN_REDUCE #define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n) -#define MPN_SQR_N(r,a,n) mpn_sqr_n (r,a,n) +#define MPN_SQR(r,a,n) mpn_sqr (r,a,n) #define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_2 (rp, tp, mp, n, mip) INNERLOOP; } else { #undef MPN_MUL_N -#undef MPN_SQR_N +#undef MPN_SQR #undef MPN_REDUCE #define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n) -#define MPN_SQR_N(r,a,n) mpn_sqr_n (r,a,n) +#define MPN_SQR(r,a,n) mpn_sqr (r,a,n) #define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip) INNERLOOP; } @@ -369,20 +369,20 @@ mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn, if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD)) { #undef MPN_MUL_N -#undef MPN_SQR_N +#undef MPN_SQR #undef MPN_REDUCE #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n) -#define MPN_SQR_N(r,a,n) mpn_sqr_basecase (r,a,n) +#define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n) #define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_1 (rp, tp, mp, n, mip[0]) INNERLOOP; } else if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD)) { #undef MPN_MUL_N -#undef MPN_SQR_N +#undef MPN_SQR #undef MPN_REDUCE #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n) -#define MPN_SQR_N(r,a,n) mpn_sqr_basecase (r,a,n) +#define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n) #define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip) INNERLOOP; } @@ -390,20 +390,20 @@ mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn, if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD)) { #undef MPN_MUL_N -#undef MPN_SQR_N +#undef MPN_SQR #undef MPN_REDUCE #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n) -#define MPN_SQR_N(r,a,n) mpn_sqr_basecase (r,a,n) +#define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n) #define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_1 (rp, tp, mp, n, mip[0]) INNERLOOP; } else if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD)) { #undef MPN_MUL_N -#undef MPN_SQR_N +#undef MPN_SQR #undef MPN_REDUCE #define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n) -#define MPN_SQR_N(r,a,n) mpn_sqr_n (r,a,n) +#define MPN_SQR(r,a,n) mpn_sqr (r,a,n) #define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_1 (rp, tp, mp, n, mip[0]) INNERLOOP; } @@ -411,10 +411,10 @@ mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn, else { #undef MPN_MUL_N -#undef MPN_SQR_N +#undef MPN_SQR #undef MPN_REDUCE #define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n) -#define MPN_SQR_N(r,a,n) mpn_sqr_n (r,a,n) +#define MPN_SQR(r,a,n) mpn_sqr (r,a,n) #define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip) INNERLOOP; } diff --git a/mpn/generic/powm_sec.c b/mpn/generic/powm_sec.c index e94fd8c57..315ae6e5e 100644 --- a/mpn/generic/powm_sec.c +++ b/mpn/generic/powm_sec.c @@ -98,12 +98,12 @@ along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ #ifndef SQR_BASECASE_MAX /* If SQR_BASECASE_MAX is now not defined, use mpn_sqr_basecase for any operand size. */ -#define mpn_local_sqr_n(rp,up,n,tp) mpn_sqr_basecase(rp,up,n) +#define mpn_local_sqr(rp,up,n,tp) mpn_sqr_basecase(rp,up,n) #else /* Define our own squaring function, which uses mpn_sqr_basecase for its allowed sizes, but its own code for larger sizes. */ static void -mpn_local_sqr_n (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_ptr tp) +mpn_local_sqr (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_ptr tp) { mp_size_t i; @@ -277,7 +277,7 @@ mpn_powm_sec (mp_ptr rp, mp_srcptr bp, mp_size_t bn, do { - mpn_local_sqr_n (tp, rp, n, tp + 2 * n); + mpn_local_sqr (tp, rp, n, tp + 2 * n); mpn_redc_1_sec (rp, tp, mp, n, minv); this_windowsize--; } diff --git a/mpn/generic/remove.c b/mpn/generic/remove.c index 1fe3cbc0f..427a46fbd 100644 --- a/mpn/generic/remove.c +++ b/mpn/generic/remove.c @@ -104,7 +104,7 @@ mpn_remove (mp_ptr wp, mp_size_t *wn, if (nn > qn) break; /* next power would be overlarge */ - mpn_sqr_n (np, pp, pn); + mpn_sqr (np, pp, pn); nn += np[nn] != 0; pp = np; pn = nn; diff --git a/mpn/generic/set_str.c b/mpn/generic/set_str.c index db916e8c5..83f5ac550 100644 --- a/mpn/generic/set_str.c +++ b/mpn/generic/set_str.c @@ -169,7 +169,7 @@ mpn_set_str_compute_powtab (powers_t *powtab, mp_ptr powtab_mem, mp_size_t un, i ASSERT_ALWAYS (powtab_mem_ptr < powtab_mem + mpn_dc_set_str_powtab_alloc (un)); - mpn_sqr_n (t, p, n); + mpn_sqr (t, p, n); n = 2 * n - 1; n += t[n] != 0; digits_in_base *= 2; #if 1 diff --git a/mpn/generic/sqr.c b/mpn/generic/sqr.c new file mode 100644 index 000000000..504dbfb2d --- /dev/null +++ b/mpn/generic/sqr.c @@ -0,0 +1,88 @@ +/* mpn_sqr -- square natural numbers. + +Copyright 1991, 1993, 1994, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, +2005, 2008, 2009 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +void +mpn_sqr (mp_ptr p, mp_srcptr a, mp_size_t n) +{ + ASSERT (n >= 1); + ASSERT (! MPN_OVERLAP_P (p, 2 * n, a, n)); + + if (BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD)) + { /* mul_basecase is faster than sqr_basecase on small sizes sometimes */ + mpn_mul_basecase (p, a, n, a, n); + } + else if (BELOW_THRESHOLD (n, SQR_TOOM2_THRESHOLD)) + { + mpn_sqr_basecase (p, a, n); + } + else if (BELOW_THRESHOLD (n, SQR_TOOM3_THRESHOLD)) + { + /* Allocate workspace of fixed size on stack: fast! */ + mp_limb_t ws[mpn_toom2_sqr_itch (SQR_TOOM3_THRESHOLD_LIMIT-1)]; + ASSERT (SQR_TOOM3_THRESHOLD <= SQR_TOOM3_THRESHOLD_LIMIT); + mpn_toom2_sqr (p, a, n, ws); + } + else if (BELOW_THRESHOLD (n, SQR_TOOM4_THRESHOLD)) + { + mp_ptr ws; + TMP_SDECL; + TMP_SMARK; + ws = TMP_SALLOC_LIMBS (mpn_toom3_sqr_itch (n)); + mpn_toom3_sqr (p, a, n, ws); + TMP_SFREE; + } + else if (BELOW_THRESHOLD (n, SQR_TOOM6_THRESHOLD)) + { + mp_ptr ws; + TMP_SDECL; + TMP_SMARK; + ws = TMP_SALLOC_LIMBS (mpn_toom4_sqr_itch (n)); + mpn_toom4_sqr (p, a, n, ws); + TMP_SFREE; + } + else if (BELOW_THRESHOLD (n, SQR_TOOM8_THRESHOLD)) + { + mp_ptr ws; + TMP_SDECL; + TMP_SMARK; + ws = TMP_SALLOC_LIMBS (mpn_toom6_sqr_itch (n)); + mpn_toom6_sqr (p, a, n, ws); + TMP_SFREE; + } + else if (BELOW_THRESHOLD (n, SQR_FFT_THRESHOLD)) + { + mp_ptr ws; + TMP_DECL; + TMP_MARK; + ws = TMP_ALLOC_LIMBS (mpn_toom8_sqr_itch (n)); + mpn_toom8_sqr (p, a, n, ws); + TMP_FREE; + } + else + { + /* The current FFT code allocates its own space. That should probably + change. */ + mpn_fft_mul (p, a, n, a, n); + } +} diff --git a/mpn/generic/sqr_n.c b/mpn/generic/sqr_n.c deleted file mode 100644 index e492b5bb2..000000000 --- a/mpn/generic/sqr_n.c +++ /dev/null @@ -1,88 +0,0 @@ -/* mpn_sqr_n -- square natural numbers. - -Copyright 1991, 1993, 1994, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, -2005, 2008, 2009 Free Software Foundation, Inc. - -This file is part of the GNU MP Library. - -The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of the GNU Lesser General Public License as published by -the Free Software Foundation; either version 3 of the License, or (at your -option) any later version. - -The GNU MP Library is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public -License for more details. - -You should have received a copy of the GNU Lesser General Public License -along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ - -#include "gmp.h" -#include "gmp-impl.h" -#include "longlong.h" - -void -mpn_sqr_n (mp_ptr p, mp_srcptr a, mp_size_t n) -{ - ASSERT (n >= 1); - ASSERT (! MPN_OVERLAP_P (p, 2 * n, a, n)); - - if (BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD)) - { /* mul_basecase is faster than sqr_basecase on small sizes sometimes */ - mpn_mul_basecase (p, a, n, a, n); - } - else if (BELOW_THRESHOLD (n, SQR_TOOM2_THRESHOLD)) - { - mpn_sqr_basecase (p, a, n); - } - else if (BELOW_THRESHOLD (n, SQR_TOOM3_THRESHOLD)) - { - /* Allocate workspace of fixed size on stack: fast! */ - mp_limb_t ws[mpn_toom2_sqr_itch (SQR_TOOM3_THRESHOLD_LIMIT-1)]; - ASSERT (SQR_TOOM3_THRESHOLD <= SQR_TOOM3_THRESHOLD_LIMIT); - mpn_toom2_sqr (p, a, n, ws); - } - else if (BELOW_THRESHOLD (n, SQR_TOOM4_THRESHOLD)) - { - mp_ptr ws; - TMP_SDECL; - TMP_SMARK; - ws = TMP_SALLOC_LIMBS (mpn_toom3_sqr_itch (n)); - mpn_toom3_sqr (p, a, n, ws); - TMP_SFREE; - } - else if (BELOW_THRESHOLD (n, SQR_TOOM6_THRESHOLD)) - { - mp_ptr ws; - TMP_SDECL; - TMP_SMARK; - ws = TMP_SALLOC_LIMBS (mpn_toom4_sqr_itch (n)); - mpn_toom4_sqr (p, a, n, ws); - TMP_SFREE; - } - else if (BELOW_THRESHOLD (n, SQR_TOOM8_THRESHOLD)) - { - mp_ptr ws; - TMP_SDECL; - TMP_SMARK; - ws = TMP_SALLOC_LIMBS (mpn_toom6_sqr_itch (n)); - mpn_toom6_sqr (p, a, n, ws); - TMP_SFREE; - } - else if (BELOW_THRESHOLD (n, SQR_FFT_THRESHOLD)) - { - mp_ptr ws; - TMP_DECL; - TMP_MARK; - ws = TMP_ALLOC_LIMBS (mpn_toom8_sqr_itch (n)); - mpn_toom8_sqr (p, a, n, ws); - TMP_FREE; - } - else - { - /* The current FFT code allocates its own space. That should probably - change. */ - mpn_fft_mul (p, a, n, a, n); - } -} diff --git a/mpn/generic/sqrmod_bnm1.c b/mpn/generic/sqrmod_bnm1.c index c1b192942..d07933c7d 100644 --- a/mpn/generic/sqrmod_bnm1.c +++ b/mpn/generic/sqrmod_bnm1.c @@ -40,7 +40,7 @@ mpn_bc_sqrmod_bnm1 (mp_ptr rp, mp_srcptr ap, mp_size_t rn, mp_ptr tp) ASSERT (0 < rn); - mpn_sqr_n (tp, ap, rn); + mpn_sqr (tp, ap, rn); cy = mpn_add_n (rp, tp, tp + rn, rn); /* If cy == 1, then the value of rp is at most B^rn - 2, so there can * be no overflow when adding in the carry. */ @@ -59,7 +59,7 @@ mpn_bc_sqrmod_bnp1 (mp_ptr rp, mp_srcptr ap, mp_size_t rn, mp_ptr tp) ASSERT (0 < rn); - mpn_sqr_n (tp, ap, rn + 1); + mpn_sqr (tp, ap, rn + 1); ASSERT (tp[2*rn+1] == 0); ASSERT (tp[2*rn] < GMP_NUMB_MAX); cy = tp[2*rn] + mpn_sub_n (rp, tp, tp+rn, rn); @@ -94,13 +94,13 @@ mpn_sqrmod_bnm1 (mp_ptr rp, mp_size_t rn, mp_srcptr ap, mp_size_t an, mp_ptr tp) { if (UNLIKELY (2*an <= rn)) { - mpn_sqr_n (rp, ap, an); + mpn_sqr (rp, ap, an); MPN_ZERO (rp + 2*an, rn - 2*an); } else { mp_limb_t cy; - mpn_sqr_n (tp, ap, an); + mpn_sqr (tp, ap, an); cy = mpn_add (rp, tp, rn, tp + rn, 2*an - rn); MPN_INCR_U (rp, rn, cy); } diff --git a/mpn/generic/sqrtrem.c b/mpn/generic/sqrtrem.c index ac878c508..87852f40d 100644 --- a/mpn/generic/sqrtrem.c +++ b/mpn/generic/sqrtrem.c @@ -239,7 +239,7 @@ mpn_dc_sqrtrem (mp_ptr sp, mp_ptr np, mp_size_t n) q >>= 1; if (c != 0) c = mpn_add_n (np + l, np + l, sp + l, h); - mpn_sqr_n (np + n, sp, l); + mpn_sqr (np + n, sp, l); b = q + mpn_sub_n (np, np, np + n, 2 * l); c -= (l == h) ? b : mpn_sub_1 (np + 2 * l, np + 2 * l, 1, (mp_limb_t) b); q = mpn_add_1 (sp + l, sp + l, h, q); diff --git a/mpn/generic/toom2_sqr.c b/mpn/generic/toom2_sqr.c index 4c27aeb4e..08fe7999a 100644 --- a/mpn/generic/toom2_sqr.c +++ b/mpn/generic/toom2_sqr.c @@ -45,7 +45,7 @@ along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ (SQR_TOOM3_THRESHOLD >= 2 * SQR_TOOM2_THRESHOLD) #endif -#define TOOM2_SQR_N_REC(p, a, n, ws) \ +#define TOOM2_SQR_REC(p, a, n, ws) \ do { \ if (! MAYBE_sqr_toom2 \ || BELOW_THRESHOLD (n, SQR_TOOM2_THRESHOLD)) \ @@ -104,13 +104,13 @@ mpn_toom2_sqr (mp_ptr pp, #define scratch_out scratch + 2 * n /* vm1, 2n limbs */ - TOOM2_SQR_N_REC (vm1, asm1, n, scratch_out); + TOOM2_SQR_REC (vm1, asm1, n, scratch_out); /* vinf, s+s limbs */ - TOOM2_SQR_N_REC (vinf, a1, s, scratch_out); + TOOM2_SQR_REC (vinf, a1, s, scratch_out); /* v0, 2n limbs */ - TOOM2_SQR_N_REC (v0, ap, n, scratch_out); + TOOM2_SQR_REC (v0, ap, n, scratch_out); /* H(v0) + L(vinf) */ cy = mpn_add_n (pp + 2 * n, v0 + n, vinf, n); diff --git a/mpn/generic/toom3_sqr.c b/mpn/generic/toom3_sqr.c index a85e546fe..f44fff451 100644 --- a/mpn/generic/toom3_sqr.c +++ b/mpn/generic/toom3_sqr.c @@ -51,7 +51,7 @@ along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ (SQR_TOOM4_THRESHOLD >= 3 * SQR_TOOM3_THRESHOLD) #endif -#define TOOM3_SQR_N_REC(p, a, n, ws) \ +#define TOOM3_SQR_REC(p, a, n, ws) \ do { \ if (MAYBE_sqr_basecase \ && BELOW_THRESHOLD (n, SQR_TOOM2_THRESHOLD)) \ @@ -154,7 +154,7 @@ mpn_toom3_sqr (mp_ptr pp, /* vm1, 2n+1 limbs */ #ifdef SMALLER_RECURSION - TOOM3_SQR_N_REC (vm1, asm1, n, scratch_out); + TOOM3_SQR_REC (vm1, asm1, n, scratch_out); cy = 0; if (asm1[n] != 0) cy = asm1[n] + mpn_add_n (vm1 + n, vm1 + n, asm1, n); @@ -162,18 +162,18 @@ mpn_toom3_sqr (mp_ptr pp, cy += mpn_add_n (vm1 + n, vm1 + n, asm1, n); vm1[2 * n] = cy; #else - TOOM3_SQR_N_REC (vm1, asm1, n + 1, scratch_out); + TOOM3_SQR_REC (vm1, asm1, n + 1, scratch_out); #endif - TOOM3_SQR_N_REC (v2, as2, n + 1, scratch_out); /* v2, 2n+1 limbs */ + TOOM3_SQR_REC (v2, as2, n + 1, scratch_out); /* v2, 2n+1 limbs */ - TOOM3_SQR_N_REC (vinf, a2, s, scratch_out); /* vinf, s+s limbs */ + TOOM3_SQR_REC (vinf, a2, s, scratch_out); /* vinf, s+s limbs */ vinf0 = vinf[0]; /* v1 overlaps with this */ #ifdef SMALLER_RECURSION /* v1, 2n+1 limbs */ - TOOM3_SQR_N_REC (v1, as1, n, scratch_out); + TOOM3_SQR_REC (v1, as1, n, scratch_out); if (as1[n] == 1) { cy = as1[n] + mpn_add_n (v1 + n, v1 + n, as1, n); @@ -203,11 +203,11 @@ mpn_toom3_sqr (mp_ptr pp, v1[2 * n] = cy; #else cy = vinf[1]; - TOOM3_SQR_N_REC (v1, as1, n + 1, scratch_out); + TOOM3_SQR_REC (v1, as1, n + 1, scratch_out); vinf[1] = cy; #endif - TOOM3_SQR_N_REC (v0, ap, n, scratch_out); /* v0, 2n limbs */ + TOOM3_SQR_REC (v0, ap, n, scratch_out); /* v0, 2n limbs */ mpn_toom_interpolate_5pts (pp, v2, vm1, n, s + s, 0, vinf0); } diff --git a/mpn/generic/toom4_sqr.c b/mpn/generic/toom4_sqr.c index e188ee0ae..68aec769c 100644 --- a/mpn/generic/toom4_sqr.c +++ b/mpn/generic/toom4_sqr.c @@ -55,7 +55,7 @@ along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ (SQR_FFT_THRESHOLD >= 4 * SQR_TOOM4_THRESHOLD) #endif -#define TOOM4_SQR_N_REC(p, a, n, ws) \ +#define TOOM4_SQR_REC(p, a, n, ws) \ do { \ if (MAYBE_sqr_basecase \ && BELOW_THRESHOLD (n, SQR_TOOM2_THRESHOLD)) \ @@ -111,8 +111,8 @@ mpn_toom4_sqr (mp_ptr pp, /* Compute apx = a0 + 2 a1 + 4 a2 + 8 a3 and amx = a0 - 2 a1 + 4 a2 - 8 a3. */ mpn_toom_eval_dgr3_pm2 (apx, amx, ap, n, s, tp); - TOOM4_SQR_N_REC (v2, apx, n + 1, tp); /* v2, 2n+1 limbs */ - TOOM4_SQR_N_REC (vm2, amx, n + 1, tp); /* vm2, 2n+1 limbs */ + TOOM4_SQR_REC (v2, apx, n + 1, tp); /* v2, 2n+1 limbs */ + TOOM4_SQR_REC (vm2, amx, n + 1, tp); /* vm2, 2n+1 limbs */ /* Compute apx = 8 a0 + 4 a1 + 2 a2 + a3 = (((2*a0 + a1) * 2 + a2) * 2 + a3 */ #if HAVE_NATIVE_mpn_addlsh1_n @@ -138,16 +138,16 @@ mpn_toom4_sqr (mp_ptr pp, ASSERT (apx[n] < 15); - TOOM4_SQR_N_REC (vh, apx, n + 1, tp); /* vh, 2n+1 limbs */ + TOOM4_SQR_REC (vh, apx, n + 1, tp); /* vh, 2n+1 limbs */ /* Compute apx = a0 + a1 + a2 + a3 and amx = a0 - a1 + a2 - a3. */ mpn_toom_eval_dgr3_pm1 (apx, amx, ap, n, s, tp); - TOOM4_SQR_N_REC (v1, apx, n + 1, tp); /* v1, 2n+1 limbs */ - TOOM4_SQR_N_REC (vm1, amx, n + 1, tp); /* vm1, 2n+1 limbs */ + TOOM4_SQR_REC (v1, apx, n + 1, tp); /* v1, 2n+1 limbs */ + TOOM4_SQR_REC (vm1, amx, n + 1, tp); /* vm1, 2n+1 limbs */ - TOOM4_SQR_N_REC (v0, a0, n, tp); - TOOM4_SQR_N_REC (vinf, a3, s, tp); /* vinf, 2s limbs */ + TOOM4_SQR_REC (v0, a0, n, tp); + TOOM4_SQR_REC (vinf, a3, s, tp); /* vinf, 2s limbs */ mpn_toom_interpolate_7pts (pp, n, 0, vm2, vm1, v2, vh, 2*s, tp); } diff --git a/mpn/x86/fat/gmp-mparam.h b/mpn/x86/fat/gmp-mparam.h index 051a87d19..45680ede4 100644 --- a/mpn/x86/fat/gmp-mparam.h +++ b/mpn/x86/fat/gmp-mparam.h @@ -35,7 +35,7 @@ along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ #define USE_PREINV_DIVREM_1 1 /* mpn_sqr_basecase is faster than mpn_mul_basecase at all sizes, no need - for mpn_sqr_n to call the latter. */ + for mpn_sqr to call the latter. */ #define SQR_BASECASE_THRESHOLD 0 /* Sensible fallbacks for these, when not taken from a cpu-specific diff --git a/mpn/x86_64/fat/gmp-mparam.h b/mpn/x86_64/fat/gmp-mparam.h index 7e71f715b..6e744c064 100644 --- a/mpn/x86_64/fat/gmp-mparam.h +++ b/mpn/x86_64/fat/gmp-mparam.h @@ -35,7 +35,7 @@ along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ #define USE_PREINV_DIVREM_1 1 /* mpn_sqr_basecase is faster than mpn_mul_basecase at all sizes, no need - for mpn_sqr_n to call the latter. */ + for mpn_sqr to call the latter. */ #define SQR_BASECASE_THRESHOLD 0 /* Sensible fallbacks for these, when not taken from a cpu-specific diff --git a/mpz/lucnum_ui.c b/mpz/lucnum_ui.c index 1215a04b4..1fb8ec86c 100644 --- a/mpz/lucnum_ui.c +++ b/mpz/lucnum_ui.c @@ -167,7 +167,7 @@ mpz_lucnum_ui (mpz_ptr ln, unsigned long n) TRACE (printf (" zeros=%d\n", zeros)); ASSERT (xalloc >= 2*lsize); - mpn_sqr_n (xp, lp, lsize); + mpn_sqr (xp, lp, lsize); lsize *= 2; lsize -= (xp[lsize-1] == 0); diff --git a/mpz/n_pow_ui.c b/mpz/n_pow_ui.c index c1d5e902d..6d527c36c 100644 --- a/mpz/n_pow_ui.c +++ b/mpz/n_pow_ui.c @@ -59,7 +59,7 @@ along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ from mpn_mul_1 or mpn_mul_2 in the bignum powering. It's felt that doing so would be more complicated than it's worth, and could well end up being a slowdown for small e. For big e on the other hand the algorithm is - dominated by mpn_sqr_n so there wouldn't much of a saving. The current + dominated by mpn_sqr so there wouldn't much of a saving. The current code can be viewed as simply doing the first few steps of the powering in a single or double limb where possible. @@ -79,10 +79,10 @@ along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ /* The following are for convenience, they update the size and check the alloc. */ -#define MPN_SQR_N(dst, alloc, src, size) \ +#define MPN_SQR(dst, alloc, src, size) \ do { \ ASSERT (2*(size) <= (alloc)); \ - mpn_sqr_n (dst, src, size); \ + mpn_sqr (dst, src, size); \ (size) *= 2; \ (size) -= ((dst)[(size)-1] == 0); \ } while (0) @@ -435,7 +435,7 @@ mpz_n_pow_ui (mpz_ptr r, mp_srcptr bp, mp_size_t bsize, unsigned long int e) i, e, rsize, ralloc, talloc); mpn_trace ("r", rp, rsize)); - MPN_SQR_N (tp, talloc, rp, rsize); + MPN_SQR (tp, talloc, rp, rsize); SWAP_RP_TP; if ((e & (1L << i)) != 0) MPN_MUL_2 (rp, rsize, ralloc, mult); @@ -467,7 +467,7 @@ mpz_n_pow_ui (mpz_ptr r, mp_srcptr bp, mp_size_t bsize, unsigned long int e) i, e, rsize, ralloc, talloc); mpn_trace ("r", rp, rsize)); - MPN_SQR_N (tp, talloc, rp, rsize); + MPN_SQR (tp, talloc, rp, rsize); SWAP_RP_TP; if ((e & (1L << i)) != 0) MPN_MUL_1 (rp, rsize, ralloc, blimb); @@ -496,7 +496,7 @@ mpz_n_pow_ui (mpz_ptr r, mp_srcptr bp, mp_size_t bsize, unsigned long int e) i, e, rsize, ralloc, talloc); mpn_trace ("r", rp, rsize)); - MPN_SQR_N (tp, talloc, rp, rsize); + MPN_SQR (tp, talloc, rp, rsize); SWAP_RP_TP; if ((e & (1L << i)) != 0) { diff --git a/mpz/powm_ui.c b/mpz/powm_ui.c index 6200f3333..64615d107 100644 --- a/mpz/powm_ui.c +++ b/mpz/powm_ui.c @@ -126,7 +126,7 @@ mpz_powm_ui (mpz_ptr r, mpz_srcptr b, unsigned long int el, mpz_srcptr m) while (c != 0) { - mpn_sqr_n (tp, xp, xn); + mpn_sqr (tp, xp, xn); tn = 2 * xn; tn -= tp[tn - 1] == 0; if (tn < mn) { diff --git a/tests/devel/try.c b/tests/devel/try.c index 516915f53..3a4f1dc28 100644 --- a/tests/devel/try.c +++ b/tests/devel/try.c @@ -1562,7 +1562,7 @@ const struct choice_t choice_array[] = { { TRY(mpn_mul), TYPE_MUL_MN }, { TRY(mpn_mul_n), TYPE_MUL_N }, - { TRY(mpn_sqr_n), TYPE_SQR }, + { TRY(mpn_sqr), TYPE_SQR }, { TRY_FUNFUN(umul_ppmm), TYPE_UMUL_PPMM, 2 }, #if HAVE_NATIVE_mpn_umul_ppmm diff --git a/tune/Makefile.am b/tune/Makefile.am index 2d5b05420..022aa7c17 100644 --- a/tune/Makefile.am +++ b/tune/Makefile.am @@ -125,7 +125,7 @@ TUNE_MPN_SRCS = $(TUNE_MPN_SRCS_BASIC) divrem_1.c mod_1.c TUNE_MPN_SRCS_BASIC = bdiv_q.c bdiv_qr.c \ dcpi1_div_qr.c dcpi1_divappr_q.c dcpi1_bdiv_qr.c dcpi1_bdiv_q.c \ invertappr.c invert.c binvert.c divrem_2.c gcd.c gcdext.c \ - get_str.c set_str.c matrix22_mul.c hgcd.c mul_n.c sqr_n.c \ + get_str.c set_str.c matrix22_mul.c hgcd.c mul_n.c sqr.c \ mullo_n.c mul_fft.c mul.c tdiv_qr.c mulmod_bnm1.c sqrmod_bnm1.c \ nussbaumer_mul.c toom6h_mul.c toom8h_mul.c toom6_sqr.c toom8_sqr.c \ toom22_mul.c toom2_sqr.c toom33_mul.c toom3_sqr.c toom44_mul.c toom4_sqr.c @@ -152,52 +152,3 @@ sqr_asm.asm: $(top_builddir)/mpn/sqr_basecase.asm include ../mpn/Makeasm.am - - -# "mk" is multiplication in the karatsuba range -# "st" is squaring in the toom-cook range, etc -# "g" forms produce graphs - -mk: - ./speed -s 5-40 -c mpn_mul_basecase mpn_kara_mul_n - -MTS = -s 50-150 -c -mt: - ./speed $(MTS) mpn_kara_mul_n mpn_toom3_mul_n -mtg: - ./speed $(MTS) -P mtg mpn_kara_mul_n mpn_toom3_mul_n - -sk: - ./speed -s 5-40 -c mpn_sqr_basecase mpn_kara_sqr_n - -STS = -s 50-150 -c -st: - ./speed $(STS) mpn_kara_sqr_n mpn_toom3_sqr_n -stg: - ./speed $(STS) -P stg mpn_kara_sqr_n mpn_toom3_sqr_n - -fib: - ./speed -s 40-60 -c mpz_fib_ui -fibg: - ./speed -s 10-300 -P fibg mpz_fib_ui - - -gcd: - ./speed -s 1-20 -c mpn_gcd - -udiv: - ./speed -s 1 -c udiv_qrnnd udiv_qrnnd_preinv udiv_qrnnd_preinv2norm invert_limb udiv_qrnnd_c - -divn: - ./speed -s 1-30 -c mpn_divrem_1_div.-1 mpn_divrem_1_inv.-1 -divun: - ./speed -s 1-30 -c mpn_divrem_1_div.12345 mpn_divrem_1_inv.12345 -modn: - ./speed -s 1-30 -c mpn_mod_1_div.-1 mpn_mod_1_inv.-1 -modun: - ./speed -s 1-30 -c mpn_mod_1_div.12345 mpn_mod_1_inv.12345 - - -graph: - ./speed -s 1-5000 -f 1.02 -P graph mpn_mul_n mpn_sqr - gnuplot graph.gnuplot diff --git a/tune/common.c b/tune/common.c index 63a30b39b..d44e62cb2 100644 --- a/tune/common.c +++ b/tune/common.c @@ -1003,9 +1003,9 @@ speed_mpn_mul_n (struct speed_params *s) SPEED_ROUTINE_MPN_MUL_N (mpn_mul_n); } double -speed_mpn_sqr_n (struct speed_params *s) +speed_mpn_sqr (struct speed_params *s) { - SPEED_ROUTINE_MPN_SQR (mpn_sqr_n); + SPEED_ROUTINE_MPN_SQR (mpn_sqr); } double speed_mpn_mul_n_sqr (struct speed_params *s) diff --git a/tune/speed.c b/tune/speed.c index 179bb29b8..a9e3508c5 100644 --- a/tune/speed.c +++ b/tune/speed.c @@ -294,7 +294,7 @@ const struct routine_t { #endif { "mpn_mul_n", speed_mpn_mul_n }, - { "mpn_sqr_n", speed_mpn_sqr_n }, + { "mpn_sqr", speed_mpn_sqr }, { "mpn_toom2_sqr", speed_mpn_toom2_sqr }, { "mpn_toom3_sqr", speed_mpn_toom3_sqr }, diff --git a/tune/speed.h b/tune/speed.h index 1ed5aeec1..a807c7c56 100644 --- a/tune/speed.h +++ b/tune/speed.h @@ -273,7 +273,7 @@ double speed_mpn_dc_set_str __GMP_PROTO ((struct speed_params *s)); double speed_mpn_set_str_pre __GMP_PROTO ((struct speed_params *s)); double speed_mpn_sqr_basecase __GMP_PROTO ((struct speed_params *s)); double speed_mpn_sqr_diagonal __GMP_PROTO ((struct speed_params *s)); -double speed_mpn_sqr_n __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_sqr __GMP_PROTO ((struct speed_params *s)); double speed_mpn_sqrtrem __GMP_PROTO ((struct speed_params *s)); double speed_mpn_rootrem __GMP_PROTO ((struct speed_params *s)); double speed_mpn_sub_n __GMP_PROTO ((struct speed_params *s)); @@ -445,11 +445,6 @@ mp_limb_t mpn_sb_divrem_mn_inv __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcpt mp_size_t mpn_set_str_basecase __GMP_PROTO ((mp_ptr, const unsigned char *, size_t, int)); void mpn_pre_set_str __GMP_PROTO ((mp_ptr, unsigned char *, size_t, powers_t *, mp_ptr)); -void mpn_toom3_mul_n_open __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr)); -void mpn_toom3_sqr_n_open __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr)); -void mpn_toom3_mul_n_mpn __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr)); -void mpn_toom3_sqr_n_mpn __GMP_PROTO((mp_ptr, mp_srcptr, mp_size_t, mp_ptr)); - void mpz_powm_mod __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr, mpz_srcptr)); void mpz_powm_redc __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr, mpz_srcptr)); diff --git a/tune/tuneup.c b/tune/tuneup.c index 6eeaa3d7f..a9fc89e69 100644 --- a/tune/tuneup.c +++ b/tune/tuneup.c @@ -992,7 +992,7 @@ tune_sqrmod_bnm1 (void) just for that. Start karatsuba from 4 same as MUL above. */ void -tune_sqr_n (void) +tune_sqr (void) { /* disabled until tuned */ SQR_FFT_THRESHOLD = MP_SIZE_T_MAX; @@ -1006,7 +1006,7 @@ tune_sqr_n (void) { static struct param_t param; param.name = "SQR_BASECASE_THRESHOLD"; - param.function = speed_mpn_sqr_n; + param.function = speed_mpn_sqr; param.min_size = 3; param.min_is_always = 1; param.max_size = TUNE_SQR_TOOM2_MAX; @@ -1017,7 +1017,7 @@ tune_sqr_n (void) { static struct param_t param; param.name = "SQR_TOOM2_THRESHOLD"; - param.function = speed_mpn_sqr_n; + param.function = speed_mpn_sqr; param.min_size = MAX (4, MPN_TOOM2_SQR_MINSIZE); param.max_size = TUNE_SQR_TOOM2_MAX; param.noprint = 1; @@ -1029,7 +1029,7 @@ tune_sqr_n (void) /* Karatsuba becomes faster than mul_basecase before sqr_basecase does. Arrange for the expression "BELOW_THRESHOLD (un, SQR_TOOM2_THRESHOLD))" which - selects mpn_sqr_basecase in mpn_sqr_n to be false, by setting + selects mpn_sqr_basecase in mpn_sqr to be false, by setting SQR_TOOM2_THRESHOLD to zero, making SQR_BASECASE_THRESHOLD the toom2 threshold. */ @@ -1053,7 +1053,7 @@ tune_sqr_n (void) static struct param_t param; mp_size_t toom3_start = MAX (sqr_toom2_threshold, sqr_basecase_threshold); - param.function = speed_mpn_sqr_n; + param.function = speed_mpn_sqr; param.name = "SQR_TOOM3_THRESHOLD"; param.min_size = MAX (toom3_start, MPN_TOOM3_SQR_MINSIZE); @@ -1927,7 +1927,7 @@ tune_fft_sqr (void) param.first_size = SQR_TOOM3_THRESHOLD / 2; param.max_size = option_fft_max_size; param.function = speed_mpn_mul_fft_sqr; - param.mul_function = speed_mpn_sqr_n; + param.mul_function = speed_mpn_sqr; param.sqr = 0; fft (¶m); } @@ -2012,7 +2012,7 @@ all (void) tune_mul (); printf("\n"); - tune_sqr_n (); + tune_sqr (); printf("\n"); tune_mulmod_bnm1 (); -- cgit v1.2.1