summaryrefslogtreecommitdiff
path: root/mpz/fac_ui.c
diff options
context:
space:
mode:
authorMarco Bodrato <bodrato@mail.dm.unipi.it>2012-01-05 01:46:28 +0100
committerMarco Bodrato <bodrato@mail.dm.unipi.it>2012-01-05 01:46:28 +0100
commit8e252e348014297749e031d55359725fc7d91c02 (patch)
tree75462ee6f92d026af723ebf98c5481a58efffd13 /mpz/fac_ui.c
parentc9bb6775abee7ce2f8d998f95940a6567c777898 (diff)
downloadgmp-8e252e348014297749e031d55359725fc7d91c02.tar.gz
mpz_fac_ui: faster (odd)basecase.
Diffstat (limited to 'mpz/fac_ui.c')
-rw-r--r--mpz/fac_ui.c194
1 files changed, 101 insertions, 93 deletions
diff --git a/mpz/fac_ui.c b/mpz/fac_ui.c
index 2d1ff9f1f..84ce0de0e 100644
--- a/mpz/fac_ui.c
+++ b/mpz/fac_ui.c
@@ -2,8 +2,8 @@
Contributed to the GNU project by Marco Bodrato.
-Copyright 1991, 1993, 1994, 1995, 2000, 2001, 2002, 2003, 2011 Free
-Software Foundation, Inc.
+Copyright 1991, 1993, 1994, 1995, 2000, 2001, 2002, 2003, 2011, 2012
+Free Software Foundation, Inc.
This file is part of the GNU MP Library.
@@ -293,11 +293,12 @@ bitwise_primesieve (mp_ptr bit_array, mp_limb_t n)
/* FIXME: should be tuned */
#ifndef RECURSIVE_PROD_THRESHOLD
-#define RECURSIVE_PROD_THRESHOLD MUL_TOOM22_THRESHOLD
+#define RECURSIVE_PROD_THRESHOLD (MUL_TOOM22_THRESHOLD|(MUL_TOOM22_THRESHOLD>>1))
#endif
/* Computes the product of the j>1 limbs pointed by factors, puts the
result in x.
+ The list in {factors, j} is overwritten.
*/
static void
@@ -313,21 +314,21 @@ mpz_prodlimbs (mpz_ptr x, mp_limb_t *factors, mp_limb_t j)
if (BELOW_THRESHOLD (j, RECURSIVE_PROD_THRESHOLD)) {
mp_limb_t cy;
- PTR (x)[0] = factors[0];
- SIZ (x) = 1;
- prod = MPZ_REALLOC (x, j);
+ j--;
size = 1;
- i = 1;
- do
+ for (i = 1; i < j; i++)
{
- cy = mpn_mul_1 (prod, prod, size, factors[i]);
- prod[size] = cy;
+ cy = mpn_mul_1 (factors, factors, size, factors[i]);
+ factors[size] = cy;
size += cy != 0;
- i++;
- } while (i < j);
+ };
+
+ prod = MPZ_REALLOC (x, size + 1);
- SIZ (x) = size;
+ cy = mpn_mul_1 (prod, factors, size, factors[i]);
+ prod[size] = cy;
+ SIZ (x) = size + (cy != 0);;
} else {
mpz_t x1, x2;
TMP_DECL;
@@ -361,7 +362,7 @@ mpz_prodlimbs (mpz_ptr x, mp_limb_t *factors, mp_limb_t j)
do { \
mp_limb_t __q, __prime; \
__prime = (P); \
- FACTOR_LIST_APPEND(PR, MAX_PR, VEC, I); \
+ FACTOR_LIST_APPEND(PR, MAX_PR, VEC, I); \
__q = n; \
do { \
__q /= __prime; \
@@ -457,98 +458,108 @@ log_n_max (mp_limb_t n)
return log;
}
-/* mpz_bc_oddfac_1 computes the odd part of the factorial of the parameter n.
- I.e. n! = x 2^a, the result x is an odd positive integer.
- */
+/*********************************************************/
+/* Section factorial: fast factorial implementations */
+/*********************************************************/
+/* mpz_oddfac_1 computes the odd part of the factorial of the
+ parameter n. I.e. n! = x 2^a, where x is the returned value: an
+ odd positive integer.
+ */
static void
-mpz_bc_oddfac_1 (mpz_ptr x, mp_limb_t n)
+mpz_oddfac_1 (mpz_ptr x, mp_limb_t n)
{
- static const mp_limb_t table[] = { ONE_LIMB_ODD_FACTORIAL_TABLE };
-
- mp_limb_t *factors, prod, max_prod, i, j;
- TMP_SDECL;
-
- ASSERT (numberof (table) > 3);
-#if TUNE_PROGRAM_BUILD
- ASSERT (FAC_DSC_THRESHOLD_LIMIT >= FAC_DSC_THRESHOLD);
-#endif
+ static const mp_limb_t tablef[] = { ONE_LIMB_ODD_FACTORIAL_TABLE };
+ static const mp_limb_t tabled[] = { ONE_LIMB_ODD_DOUBLEFACTORIAL_TABLE };
- j = 0;
- prod = 1;
-
- TMP_SMARK;
- factors = TMP_SALLOC_LIMBS (1 + n / FACTORS_PER_LIMB);
-
- do {
- i = 3;
- max_prod = GMP_NUMB_MAX / n;
- do {
- FACTOR_LIST_STORE (i, prod, max_prod, factors, j);
- i += 2;
- } while (i <= n);
- n >>= 1;
- } while (n >= numberof (table));
-
- factors[j++] = prod;
- factors[j++] = table[n];
- mpz_prodlimbs (x, factors, j);
+ if (n < numberof (tablef))
+ {
+ PTR (x)[0] = tablef[n];
+ SIZ (x) = 1;
+ }
+ else
+ {
+ unsigned s;
+ mp_limb_t *factors;
- TMP_SFREE;
-}
+ ASSERT (n <= GMP_NUMB_MAX);
-/*********************************************************/
-/* Section factorial: fast factorial implementations */
-/*********************************************************/
+ s = 0;
+ {
+ mp_limb_t tn;
+ mp_limb_t prod, max_prod, i, j;
+ TMP_SDECL;
-/* mpz_dsc_oddfac_1 computes the odd part of the factorial of the parameter n.
- I.e. n! = x 2^a, the result x is an odd positive integer.
+ ASSERT (numberof (tablef) > 3);
+#if TUNE_PROGRAM_BUILD
+ ASSERT (FAC_DSC_THRESHOLD_LIMIT >= FAC_DSC_THRESHOLD);
+#endif
- Uses the algorithm described by Peter Luschny in "Divide, Swing and
- Conquer the Factorial!".
- */
+ /* Compute the number of recursive steps for the DSC algorithm. */
+ for (tn = n; ABOVE_THRESHOLD (tn, FAC_DSC_THRESHOLD); s++)
+ tn >>= 1;
-static void
-mpz_dsc_oddfac_1 (mpz_ptr x, mp_limb_t n)
-{
- mpz_t swing;
- mp_limb_t *sieve, *factors;
- mp_size_t size;
- unsigned s;
- TMP_DECL;
+ j = 0;
+ prod = 1;
- ASSERT (n <= GMP_NUMB_MAX);
+ TMP_SMARK;
+ factors = TMP_SALLOC_LIMBS (1 + tn / FACTORS_PER_LIMB);
- s = 1;
- size = n;
- for ( ; (size >>= 1) > FAC_DSC_THRESHOLD; s++)
- ;
- mpz_bc_oddfac_1 (x, size);
+ for (; (tn - 1) >> 1 >= numberof (tabled); tn >>= 1) {
+ i = numberof (tabled) * 2 + 1;
+ max_prod = GMP_NUMB_MAX / tn;
+ factors[j++] = tabled[numberof (tabled) - 1];
+ do {
+ FACTOR_LIST_STORE (i, prod, max_prod, factors, j);
+ i += 2;
+ } while (i <= tn);
+ }
- TMP_MARK;
- sieve = TMP_ALLOC_LIMBS (primesieve_size (n));
+ factors[j] = prod;
+ j += prod > 1;
+ factors[j++] = tabled[(tn - 1) >> 1];
+ factors[j++] = tablef[tn >> 1];
+ mpz_prodlimbs (x, factors, j);
- size = (bitwise_primesieve (sieve, n) + 1) / log_n_max (n) + 1;
+ TMP_SFREE;
+ }
- MPZ_TMP_INIT (swing, size << 1);
- factors = PTR(swing) + size;
- do {
- mpz_t square;
- TMP_DECL;
+ if (s != 0)
+ /* Use the algorithm described by Peter Luschny in "Divide,
+ Swing and Conquer the Factorial!".
+ */
+ {
+ mpz_t swing;
+ mp_limb_t *sieve;
+ mp_size_t size;
+ TMP_DECL;
- s--;
- mpz_oddswing_1 (swing, n>>s, sieve, factors);
+ TMP_MARK;
+ sieve = TMP_ALLOC_LIMBS (primesieve_size (n));
- TMP_MARK;
- size = SIZ (x);
- MPZ_TMP_INIT (square, size << 1);
- mpn_sqr (PTR (square), PTR (x), size);
- SIZ (square) = (size << 1) - (PTR (square)[(size << 1) - 1] == 0);
- mpz_mul (x, square, swing); /* n!= n$ * floor(n/2)!^2 */
+ size = (bitwise_primesieve (sieve, n) + 1) / log_n_max (n) + 1;
- TMP_FREE;
- } while (s != 0);
- TMP_FREE;
+ MPZ_TMP_INIT (swing, size << 1);
+ factors = PTR(swing) + size;
+ do {
+ mpz_t square;
+ TMP_DECL;
+
+ s--;
+ mpz_oddswing_1 (swing, n>>s, sieve, factors);
+
+ TMP_MARK;
+ size = SIZ (x);
+ MPZ_TMP_INIT (square, size << 1);
+ mpn_sqr (PTR (square), PTR (x), size);
+ SIZ (square) = (size << 1) - (PTR (square)[(size << 1) - 1] == 0);
+ mpz_mul (x, square, swing); /* n!= n$ * floor(n/2)!^2 */
+
+ TMP_FREE;
+ } while (s != 0);
+ TMP_FREE;
+ }
+ }
}
/* Computes n!, the factorial of n.
@@ -591,10 +602,7 @@ mpz_fac_ui (mpz_ptr x, unsigned long n)
else
{
mp_limb_t count;
- if (BELOW_THRESHOLD (n, FAC_DSC_THRESHOLD))
- mpz_bc_oddfac_1 (x, n);
- else
- mpz_dsc_oddfac_1 (x, n);
+ mpz_oddfac_1 (x, n);
popc_limb (count, n);
mpz_mul_2exp (x, x, n - count);
}