diff options
-rw-r--r-- | ChangeLog | 10 | ||||
-rw-r--r-- | configure.in | 1 | ||||
-rw-r--r-- | gen-fac.c | 16 | ||||
-rw-r--r-- | gmp-impl.h | 3 | ||||
-rw-r--r-- | mpz/2fac_ui.c | 10 | ||||
-rw-r--r-- | mpz/bin_uiui.c | 11 | ||||
-rw-r--r-- | mpz/oddfac_1.c | 20 |
7 files changed, 45 insertions, 26 deletions
@@ -1,3 +1,13 @@ +2012-04-30 Marco Bodrato <bodrato@mail.dm.unipi.it> + + * mpn/generic/comb_tables.c: New file. + * configure.in: Add it. + * gen-fac.c: Define table limits. + * gmp-impl.h (__gmp_oddfac_table, __gmp_odd2fac_table): Declare. + * mpz/2fac_ui.c: Use shared tables. + * mpz/bin_uiui.c: Likewise. + * mpz/oddfac_1.c: Likewise. + 2012-04-29 Torbjorn Granlund <tege@gmplib.org> * mpn/arm/aors_n.asm: Tune for more stable performance. diff --git a/configure.in b/configure.in index 2d2898d84..14088c6b6 100644 --- a/configure.in +++ b/configure.in @@ -2680,6 +2680,7 @@ gmp_mpn_functions="$extra_functions \ trialdiv remove \ and_n andn_n nand_n ior_n iorn_n nior_n xor_n xnor_n \ copyi copyd zero tabselect \ + comb_tables \ $gmp_mpn_functions_optional" define(GMP_MULFUNC_CHOICES, @@ -35,7 +35,7 @@ mpz_remove_twos (mpz_t x) int gen_consts (int numb, int nail, int limb) { - mpz_t x, mask, y; + mpz_t x, mask, y, last; unsigned long a, b; unsigned long ofl, ofe; @@ -54,6 +54,7 @@ gen_consts (int numb, int nail, int limb) printf ("#define ONE_LIMB_FACTORIAL_TABLE CNST_LIMB(0x1),CNST_LIMB(0x1"); mpz_init_set_ui (x, 1); + mpz_init (last); for (b = 2;; b++) { mpz_mul_ui (x, x, b); /* so b!=a */ @@ -74,6 +75,7 @@ gen_consts (int numb, int nail, int limb) for (b = 3;; b++) { for (a = b; (a & 1) == 0; a >>= 1); + mpz_set (last, x); mpz_mul_ui (x, x, a); if (mpz_sizeinbase (x, 2) > numb) break; @@ -81,6 +83,10 @@ gen_consts (int numb, int nail, int limb) mpz_out_str (stdout, 16, x); } printf (")\n"); + printf + ("#define ODD_FACTORIAL_TABLE_MAX CNST_LIMB(0x"); + mpz_out_str (stdout, 16, last); + printf (")\n"); ofl = b - 1; printf @@ -124,6 +130,7 @@ gen_consts (int numb, int nail, int limb) mpz_set_ui (x, 1); for (b = 3;; b+=2) { + mpz_set (last, x); mpz_mul_ui (x, x, b); if (mpz_sizeinbase (x, 2) > numb) break; @@ -131,6 +138,13 @@ gen_consts (int numb, int nail, int limb) mpz_out_str (stdout, 16, x); } printf (")\n"); + printf + ("#define ODD_DOUBLEFACTORIAL_TABLE_MAX CNST_LIMB(0x"); + mpz_out_str (stdout, 16, last); + printf (")\n"); + + printf + ("#define ODD_DOUBLEFACTORIAL_TABLE_LIMIT (%lu)\n", b - 2); printf ("\n/* This table x_1, x_2,... contains values s.t. x_n^n has <= GMP_NUMB_BITS bits */\n"); diff --git a/gmp-impl.h b/gmp-impl.h index 8700eaf2c..5e721e099 100644 --- a/gmp-impl.h +++ b/gmp-impl.h @@ -1915,6 +1915,9 @@ __GMP_DECLSPEC void mpn_copyd (mp_ptr, mp_srcptr, mp_size_t); __GMP_DECLSPEC extern const mp_limb_t __gmp_fib_table[]; #define FIB_TABLE(n) (__gmp_fib_table[(n)+1]) +extern const mp_limb_t __gmp_oddfac_table[]; +extern const mp_limb_t __gmp_odd2fac_table[]; + #define SIEVESIZE 512 /* FIXME: Allow gmp_init_primesieve to choose */ typedef struct { diff --git a/mpz/2fac_ui.c b/mpz/2fac_ui.c index b1606bfbd..ba719e1c8 100644 --- a/mpz/2fac_ui.c +++ b/mpz/2fac_ui.c @@ -50,10 +50,8 @@ mpz_2fac_ui (mpz_ptr x, unsigned long n) mpz_oddfac_1 (x, n >> 1, 0); mpz_mul_2exp (x, x, count); } else { /* n is odd */ - static const mp_limb_t tabled[] = { ONE_LIMB_ODD_DOUBLEFACTORIAL_TABLE }; - - if (n < 2 * numberof (tabled)) { - PTR (x)[0] = tabled[n >> 1]; + if (n <= ODD_DOUBLEFACTORIAL_TABLE_LIMIT) { + PTR (x)[0] = __gmp_odd2fac_table[n >> 1]; SIZ (x) = 1; } else if (BELOW_THRESHOLD (n, FAC_2DSC_THRESHOLD)) { /* odd basecase, */ mp_limb_t *factors, prod, max_prod, j; @@ -63,12 +61,12 @@ mpz_2fac_ui (mpz_ptr x, unsigned long n) TMP_SMARK; factors = TMP_SALLOC_LIMBS (1 + n / (2 * FACTORS_PER_LIMB)); - factors[0] = tabled[numberof (tabled) - 1]; + factors[0] = ODD_DOUBLEFACTORIAL_TABLE_MAX; j = 1; prod = n; max_prod = GMP_NUMB_MAX / FAC_2DSC_THRESHOLD; - while ((n -= 2) >= numberof (tabled) * 2 + 1) + while ((n -= 2) > ODD_DOUBLEFACTORIAL_TABLE_LIMIT) FACTOR_LIST_STORE (n, prod, max_prod, factors, j); factors[j++] = prod; diff --git a/mpz/bin_uiui.c b/mpz/bin_uiui.c index 34ac2e098..243eae377 100644 --- a/mpz/bin_uiui.c +++ b/mpz/bin_uiui.c @@ -182,10 +182,6 @@ static const unsigned long ftab[] = } while (0) #endif -/* Entry i contains (i!/2^t) where t is chosen such that the parenthesis - is an odd integer. */ -static const mp_limb_t fac[] = { ONE_LIMB_ODD_FACTORIAL_TABLE, ONE_LIMB_ODD_FACTORIAL_EXTTABLE }; - /* Entry i contains (i!/2^t)^(-1) where t is chosen such that the parenthesis is an odd integer. */ static const mp_limb_t facinv[] = { ONE_LIMB_ODD_FACTORIAL_INVERSES_TABLE }; @@ -233,7 +229,8 @@ mpz_bdiv_bin_uiui (mpz_ptr r, unsigned long int n, unsigned long int k) numfac = 1; j = ODD_FACTORIAL_TABLE_LIMIT + 1; - jjj = fac[ODD_FACTORIAL_TABLE_LIMIT]; + jjj = ODD_FACTORIAL_TABLE_MAX; + ASSERT (__gmp_oddfac_table[ODD_FACTORIAL_TABLE_LIMIT] == ODD_FACTORIAL_TABLE_MAX); while (1) { @@ -346,7 +343,7 @@ mpz_smallk_bin_uiui (mpz_ptr r, unsigned long int n, unsigned long int k) ASSERT (rn < alloc); - mpn_pi1_bdiv_q_1 (rp, rp, rn, fac[k], facinv[k - 2], + mpn_pi1_bdiv_q_1 (rp, rp, rn, __gmp_oddfac_table[k], facinv[k - 2], fac2cnt[k / 2 - 1] - i2cnt); /* A two-fold, branch-free normalisation is possible :*/ /* rn -= rp[rn - 1] == 0; */ @@ -368,7 +365,7 @@ mpz_smallk_bin_uiui (mpz_ptr r, unsigned long int n, unsigned long int k) static mp_limb_t bc_bin_uiui (unsigned int n, unsigned int k) { - return ((fac[n] * facinv[k - 2] * facinv[n - k - 2]) + return ((__gmp_oddfac_table[n] * facinv[k - 2] * facinv[n - k - 2]) << (fac2cnt[n / 2 - 1] - fac2cnt[k / 2 - 1] - fac2cnt[(n-k) / 2 - 1])) & GMP_NUMB_MASK; } diff --git a/mpz/oddfac_1.c b/mpz/oddfac_1.c index ef71d5c06..e3e73e542 100644 --- a/mpz/oddfac_1.c +++ b/mpz/oddfac_1.c @@ -282,13 +282,11 @@ log_n_max (mp_limb_t n) void mpz_oddfac_1 (mpz_ptr x, mp_limb_t n, unsigned flag) { - static const mp_limb_t tablef[] = { ONE_LIMB_ODD_FACTORIAL_TABLE }; - ASSERT (flag == 0 || (flag == 1 && n > ODD_FACTORIAL_TABLE_LIMIT && ABOVE_THRESHOLD (n, FAC_DSC_THRESHOLD))); if (n <= ODD_FACTORIAL_TABLE_LIMIT) { - PTR (x)[0] = tablef[n]; + PTR (x)[0] = __gmp_oddfac_table[n]; SIZ (x) = 1; } else @@ -300,7 +298,6 @@ mpz_oddfac_1 (mpz_ptr x, mp_limb_t n, unsigned flag) s = 0; { - static const mp_limb_t tabled[] = { ONE_LIMB_ODD_DOUBLEFACTORIAL_TABLE }; mp_limb_t tn; #if TUNE_PROGRAM_BUILD @@ -311,7 +308,7 @@ mpz_oddfac_1 (mpz_ptr x, mp_limb_t n, unsigned flag) for (tn = n; ABOVE_THRESHOLD (tn, FAC_DSC_THRESHOLD); s++) tn >>= 1; - if (tn >= numberof (tabled) * 2 + 1) { + if (tn > ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 1) { mp_limb_t prod, max_prod, i; mp_size_t j; TMP_SDECL; @@ -330,26 +327,25 @@ mpz_oddfac_1 (mpz_ptr x, mp_limb_t n, unsigned flag) #endif do { - i = numberof (tabled) * 2 + 1; - factors[j++] = tabled[numberof (tabled) - 1]; + i = ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 2; + factors[j++] = ODD_DOUBLEFACTORIAL_TABLE_MAX; do { FACTOR_LIST_STORE (i, prod, max_prod, factors, j); i += 2; } while (i <= tn); max_prod <<= 1; tn >>= 1; - } while (tn >= numberof (tabled) * 2 + 1); + } while (tn > ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 1); factors[j++] = prod; - ASSERT (numberof (tablef) > numberof (tabled)); - factors[j++] = tabled[(tn - 1) >> 1]; - factors[j++] = tablef[tn >> 1]; + factors[j++] = __gmp_odd2fac_table[(tn - 1) >> 1]; + factors[j++] = __gmp_oddfac_table[tn >> 1]; mpz_prodlimbs (x, factors, j); TMP_SFREE; } else { MPZ_REALLOC (x, 2); - umul_ppmm (PTR (x)[1], PTR (x)[0], tabled[(tn - 1) >> 1], tablef[tn >> 1]); + umul_ppmm (PTR (x)[1], PTR (x)[0], __gmp_odd2fac_table[(tn - 1) >> 1], __gmp_oddfac_table[tn >> 1]); SIZ (x) = 2; } } |