diff options
author | Marco Bodrato <bodrato@mail.dm.unipi.it> | 2012-02-03 13:39:33 +0100 |
---|---|---|
committer | Marco Bodrato <bodrato@mail.dm.unipi.it> | 2012-02-03 13:39:33 +0100 |
commit | 517845eb219f9b87765c8a923701bb163234ab8d (patch) | |
tree | 9ed47772a62749dc869e41d3154c603e482507dc /mpz/oddfac_1.c | |
parent | 93a0e2aa369311c5a62b6b2b15979109c8bcdc29 (diff) | |
download | gmp-517845eb219f9b87765c8a923701bb163234ab8d.tar.gz |
mpz_oddfac_1: removed from mpz/fac_ui.c, now in a new file.
Diffstat (limited to 'mpz/oddfac_1.c')
-rw-r--r-- | mpz/oddfac_1.c | 592 |
1 files changed, 592 insertions, 0 deletions
diff --git a/mpz/oddfac_1.c b/mpz/oddfac_1.c new file mode 100644 index 000000000..ccfee8179 --- /dev/null +++ b/mpz/oddfac_1.c @@ -0,0 +1,592 @@ +/* mpz_oddfac_1(RESULT, N) -- Set RESULT to the odd factor of N!. + +Contributed to the GNU project by Marco Bodrato. + +THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. +IT IS ONLY SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. +IN FACT, IT IS ALMOST GUARANTEED THAT IT WILL CHANGE OR +DISAPPEAR IN A FUTURE GNU MP RELEASE. + +Copyright 2010, 2011, 2012 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +#include "fac_ui.h" + +/* TODO: + - split this file in smaller parts with functions that can be recycled for different computations. + */ + +/**************************************************************/ +/* Section macros: common macros, for mswing/fac/bin (&sieve) */ +/**************************************************************/ + +#define FACTOR_LIST_APPEND(PR, MAX_PR, VEC, I) \ + if ((PR) > (MAX_PR)) { \ + (VEC)[(I)++] = (PR); \ + (PR) = 1; \ + } + +#define FACTOR_LIST_STORE(P, PR, MAX_PR, VEC, I) \ + do { \ + if ((PR) > (MAX_PR)) { \ + (VEC)[(I)++] = (PR); \ + (PR) = (P); \ + } else \ + (PR) *= (P); \ + } while (0) + +#define LOOP_ON_SIEVE_CONTINUE(prime,end,sieve) \ + __max_i = (end); \ + \ + do { \ + ++__i; \ + if (((sieve)[__index] & __mask) == 0) \ + { \ + (prime) = id_to_n(__i) + +#define LOOP_ON_SIEVE_BEGIN(prime,start,end,off,sieve) \ + do { \ + mp_limb_t __mask, __index, __max_i, __i; \ + \ + __i = (start)-(off); \ + __index = __i / GMP_LIMB_BITS; \ + __mask = CNST_LIMB(1) << (__i % GMP_LIMB_BITS); \ + __i += (off); \ + \ + LOOP_ON_SIEVE_CONTINUE(prime,end,sieve) + +#define LOOP_ON_SIEVE_STOP \ + } \ + __mask = __mask << 1 | __mask >> (GMP_LIMB_BITS-1); \ + __index += __mask & 1; \ + } while (__i <= __max_i) \ + +#define LOOP_ON_SIEVE_END \ + LOOP_ON_SIEVE_STOP; \ + } while (0) + +/*********************************************************/ +/* Section sieve: sieving functions and tools for primes */ +/*********************************************************/ + +static mp_limb_t +bit_to_n (mp_limb_t bit) { return (bit*3+4)|1; } + +/* id_to_n (x) = bit_to_n (x-1) = (id*3+1)|1*/ +static mp_limb_t +id_to_n (mp_limb_t id) { return id*3+1+(id&1); } + +/* n_to_bit (n) = ((n-1)&(-CNST_LIMB(2)))/3U-1 */ +static mp_limb_t +n_to_bit (mp_limb_t n) { return ((n-5)|1)/3U; } + +static mp_size_t +primesieve_size (mp_limb_t n) { return n_to_bit(n) / GMP_LIMB_BITS + 1; } + +#if GMP_LIMB_BITS > 61 +#define SIEVE_SEED CNST_LIMB(0x3294C9E069128480) +#define SEED_LIMIT 202 +#else +#if GMP_LIMB_BITS > 30 +#define SIEVE_SEED CNST_LIMB(0x69128480) +#define SEED_LIMIT 114 +#else +#if GMP_LIMB_BITS > 15 +#define SIEVE_SEED CNST_LIMB(0x8480) +#define SEED_LIMIT 54 +#else +#if GMP_LIMB_BITS > 7 +#define SIEVE_SEED CNST_LIMB(0x80) +#define SEED_LIMIT 34 +#else +#define SIEVE_SEED CNST_LIMB(0x0) +#define SEED_LIMIT 24 +#endif /* 7 */ +#endif /* 15 */ +#endif /* 30 */ +#endif /* 61 */ + +static void +first_block_primesieve (mp_ptr bit_array, mp_limb_t n) +{ + mp_size_t bits, limbs; + + ASSERT (n > 4); + + bits = n_to_bit(n); + limbs = bits / GMP_LIMB_BITS + 1; + + /* FIXME: We can skip 5 too, filling with a 5-part pattern. */ + MPN_ZERO (bit_array, limbs); + bit_array[0] = SIEVE_SEED; + + if ((bits + 1) % GMP_LIMB_BITS != 0) + bit_array[limbs-1] |= MP_LIMB_T_MAX << ((bits + 1) % GMP_LIMB_BITS); + + if (n > SEED_LIMIT) { + mp_limb_t mask, index, i; + + ASSERT (n > 49); + + mask = 1; + index = 0; + i = 1; + do { + if ((bit_array[index] & mask) == 0) + { + mp_size_t step, lindex; + mp_limb_t lmask; + unsigned maskrot; + + step = id_to_n(i); +/* lindex = n_to_bit(id_to_n(i)*id_to_n(i)); */ + lindex = i*(step+1)-1+(-(i&1)&(i+1)); +/* lindex = i*(step+1+(i&1))-1+(i&1); */ + if (lindex > bits) + break; + + step <<= 1; + maskrot = step % GMP_LIMB_BITS; + + lmask = CNST_LIMB(1) << (lindex % GMP_LIMB_BITS); + do { + bit_array[lindex / GMP_LIMB_BITS] |= lmask; + lmask = lmask << maskrot | lmask >> (GMP_LIMB_BITS - maskrot); + lindex += step; + } while (lindex <= bits); + +/* lindex = n_to_bit(id_to_n(i)*bit_to_n(i)); */ + lindex = i*(i*3+6)+(i&1); + + lmask = CNST_LIMB(1) << (lindex % GMP_LIMB_BITS); + for ( ; lindex <= bits; lindex += step) { + bit_array[lindex / GMP_LIMB_BITS] |= lmask; + lmask = lmask << maskrot | lmask >> (GMP_LIMB_BITS - maskrot); + }; + } + mask = mask << 1 | mask >> (GMP_LIMB_BITS-1); + index += mask & 1; + i++; + } while (1); + } +} + +static void +block_resieve (mp_ptr bit_array, mp_size_t limbs, mp_limb_t offset, + mp_srcptr sieve, mp_limb_t sieve_bits) +{ + mp_size_t bits, step; + + ASSERT (limbs > 0); + + bits = limbs * GMP_LIMB_BITS - 1; + + /* FIXME: We can skip 5 too, filling with a 5-part pattern. */ + MPN_ZERO (bit_array, limbs); + + LOOP_ON_SIEVE_BEGIN(step,0,sieve_bits,0,sieve); + { + mp_size_t lindex; + mp_limb_t lmask; + unsigned maskrot; + +/* lindex = n_to_bit(id_to_n(i)*id_to_n(i)); */ + lindex = __i*(step+1)-1+(-(__i&1)&(__i+1)); +/* lindex = __i*(step+1+(__i&1))-1+(__i&1); */ + if (lindex > bits + offset) + break; + + step <<= 1; + maskrot = step % GMP_LIMB_BITS; + + if (lindex < offset) + lindex += step * ((offset - lindex - 1) / step + 1); + + lindex -= offset; + + lmask = CNST_LIMB(1) << (lindex % GMP_LIMB_BITS); + for ( ; lindex <= bits; lindex += step) { + bit_array[lindex / GMP_LIMB_BITS] |= lmask; + lmask = lmask << maskrot | lmask >> (GMP_LIMB_BITS - maskrot); + }; + +/* lindex = n_to_bit(id_to_n(i)*bit_to_n(i)); */ + lindex = __i*(__i*3+6)+(__i&1); + if (lindex > bits + offset) + continue; + + if (lindex < offset) + lindex += step * ((offset - lindex - 1) / step + 1); + + lindex -= offset; + + lmask = CNST_LIMB(1) << (lindex % GMP_LIMB_BITS); + for ( ; lindex <= bits; lindex += step) { + bit_array[lindex / GMP_LIMB_BITS] |= lmask; + lmask = lmask << maskrot | lmask >> (GMP_LIMB_BITS - maskrot); + }; + } + LOOP_ON_SIEVE_END; +} + +#define BLOCK_SIZE 2048 + +/* Fills bit_array with the characteristic function of composite + numbers up to the parameter n. I.e. a bit set to "1" represent a + composite, a "0" represent a prime. + + The primesieve_size(n) limbs pointed to by bit_array are + overwritten. The returned value counts prime integers in the + interval [4, n]. Note that n > 4. + + Even numbers and multiples of 3 are excluded "a priori", only + numbers equivalent to +/- 1 mod 6 have their bit in the array. + + Once sieved, if the bit b is ZERO it represent a prime, the + represented prime is bit_to_n(b), if the LSbit is bit 0, or + id_to_n(b), if you call "1" the first bit. + */ + +static mp_limb_t +bitwise_primesieve (mp_ptr bit_array, mp_limb_t n) +{ + mp_size_t size; + mp_limb_t bits; + + ASSERT (n > 4); + + bits = n_to_bit(n); + size = bits / GMP_LIMB_BITS + 1; + + if (size > BLOCK_SIZE * 2) { + mp_size_t off; + off = BLOCK_SIZE + (size % BLOCK_SIZE); + first_block_primesieve (bit_array, id_to_n (off * GMP_LIMB_BITS)); + for ( ; off < size; off += BLOCK_SIZE) + block_resieve (bit_array + off, BLOCK_SIZE, off * GMP_LIMB_BITS, bit_array, off * GMP_LIMB_BITS - 1); + } else { + first_block_primesieve (bit_array, n); + } + + if ((bits + 1) % GMP_LIMB_BITS != 0) + bit_array[size-1] |= MP_LIMB_T_MAX << ((bits + 1) % GMP_LIMB_BITS); + + + return size * GMP_LIMB_BITS - mpn_popcount (bit_array, size); +} + +#undef BLOCK_SIZE +#undef SEED_LIMIT +#undef SIEVE_SEED + +/*********************************************************/ +/* Section mswing: 2-multiswing factorial */ +/*********************************************************/ + +/* Returns an approximation of the sqare root of x. * + * It gives: x <= limb_apprsqrt (x) ^ 2 < x * 9/4 */ +static mp_limb_t +limb_apprsqrt (mp_limb_t x) +{ + int s; + + ASSERT (x > 2); + count_leading_zeros (s, x - 1); + s = GMP_LIMB_BITS - 1 - s; + return (CNST_LIMB(1) << (s >> 1)) + (CNST_LIMB(1) << ((s - 1) >> 1)); +} + +#define SWING_A_PRIME(P, N, PR, MAX_PR, VEC, I) \ + do { \ + mp_limb_t __q, __prime; \ + __prime = (P); \ + FACTOR_LIST_APPEND(PR, MAX_PR, VEC, I); \ + __q = (N); \ + do { \ + __q /= __prime; \ + if ((__q & 1) != 0) (PR) *= __prime; \ + } while (__q >= __prime); \ + } while (0) + +#define SH_SWING_A_PRIME(P, N, PR, MAX_PR, VEC, I) \ + do { \ + mp_limb_t __prime; \ + __prime = (P); \ + if ((((N) / __prime) & 1) != 0) \ + FACTOR_LIST_STORE(__prime, PR, MAX_PR, VEC, I); \ + } while (0) + +/* mpz_2multiswing_1 computes the odd part of the 2-multiswing + factorial of the parameter n. The result x is an odd positive + integer so that multiswing(n,2) = x 2^a. + + Uses the algorithm described by Peter Luschny in "Divide, Swing and + Conquer the Factorial!". + + The pointer sieve points to primesieve_size(n) limbs containing a + bit-array where primes are marked as 0. + Enough (FIXME: explain :-) limbs must be pointed by factors. + */ + +static void +mpz_2multiswing_1 (mpz_ptr x, mp_limb_t n, mp_ptr sieve, mp_ptr factors) +{ + mp_limb_t prod, max_prod; + mp_size_t j; + + ASSERT (n >= 15); + + j = 0; + prod = -(n & 1); + n &= ~ CNST_LIMB(1); /* n-1, if n is odd */ + + prod = (prod & n) + 1; /* the original n, if it was odd, 1 otherwise */ + max_prod = GMP_NUMB_MAX / (n-1); + + /* Handle prime = 3 separately. */ + SWING_A_PRIME (3, n, prod, max_prod, factors, j); + + /* Swing primes from 5 to n/3 */ + if (1 || n > 5*3) { + mp_limb_t s; + + if (1 || n >= 5*5) { + mp_limb_t prime; + + s = limb_apprsqrt(n); + ASSERT (s >= 5); + s = n_to_bit (s); + LOOP_ON_SIEVE_BEGIN (prime, n_to_bit (5), s, 0,sieve); + SWING_A_PRIME (prime, n, prod, max_prod, factors, j); + LOOP_ON_SIEVE_END; + s++; + } else + s = n_to_bit (5); + + ASSERT (max_prod <= GMP_NUMB_MAX / 3); + max_prod *= 3; + ASSERT (bit_to_n (s) * bit_to_n (s) > n); + ASSERT (s <= n_to_bit (n / 3)); + { + mp_limb_t prime; + + LOOP_ON_SIEVE_BEGIN (prime, s, n_to_bit (n/3), 0, sieve); + SH_SWING_A_PRIME (prime, n, prod, max_prod, factors, j); + LOOP_ON_SIEVE_END; + } + max_prod /= 3; + } + + /* Store primes from (n+1)/2 to n */ + { + mp_limb_t prime; + LOOP_ON_SIEVE_BEGIN (prime, n_to_bit (n >> 1) + 1, n_to_bit (n), 0,sieve); + FACTOR_LIST_STORE (prime, prod, max_prod, factors, j); + LOOP_ON_SIEVE_END; + } + + if (j != 0) + { + factors[j++] = prod; + mpz_prodlimbs (x, factors, j); + } + else + { + PTR (x)[0] = prod; + SIZ (x) = 1; + } +} + +#undef SWING_A_PRIME +#undef SH_SWING_A_PRIME +#undef LOOP_ON_SIEVE_END +#undef LOOP_ON_SIEVE_STOP +#undef LOOP_ON_SIEVE_BEGIN +#undef LOOP_ON_SIEVE_CONTINUE +#undef FACTOR_LIST_APPEND + +/*********************************************************/ +/* Section oddfac: odd factorial, needed also by binomial*/ +/*********************************************************/ + +/* This is intended for constant THRESHOLDs only, where the compiler + can completely fold the result. */ +#define LOG2C(n) \ + (((n) >= 0x1) + ((n) >= 0x2) + ((n) >= 0x4) + ((n) >= 0x8) + \ + ((n) >= 0x10) + ((n) >= 0x20) + ((n) >= 0x40) + ((n) >= 0x80) + \ + ((n) >= 0x100) + ((n) >= 0x200) + ((n) >= 0x400) + ((n) >= 0x800) + \ + ((n) >= 0x1000) + ((n) >= 0x2000) + ((n) >= 0x4000) + ((n) >= 0x8000)) + +#if TUNE_PROGRAM_BUILD +#define FACTORS_PER_LIMB (GMP_NUMB_BITS / (LOG2C(FAC_DSC_THRESHOLD_LIMIT-1)+1)) +#else +#define FACTORS_PER_LIMB (GMP_NUMB_BITS / (LOG2C(FAC_DSC_THRESHOLD-1)+1)) +#endif + +/* n^log <= GMP_NUMB_MAX, a limb can store log factors less than n */ +static unsigned +log_n_max (mp_limb_t n) +{ + unsigned log; + + if (n < CNST_LIMB(1) << (GMP_NUMB_BITS/8)) + log = 8; + else + { + mp_limb_t max, cur; + log = 1; + max = GMP_NUMB_MAX / n; + for (cur = n; cur <= max; cur *= n) log++; + } + + return log; +} + +/* mpz_oddfac_1 computes the odd part of the factorial of the + parameter n. I.e. n! = x 2^a, where x is the returned value: an + odd positive integer. + */ +void +mpz_oddfac_1 (mpz_ptr x, mp_limb_t n) +{ + static const mp_limb_t tablef[] = { ONE_LIMB_ODD_FACTORIAL_TABLE }; + static const mp_limb_t tabled[] = { ONE_LIMB_ODD_DOUBLEFACTORIAL_TABLE }; + + if (n < numberof (tablef)) + { + PTR (x)[0] = tablef[n]; + SIZ (x) = 1; + } + else + { + unsigned s; + mp_ptr factors; + + ASSERT (n <= GMP_NUMB_MAX); + + s = 0; + { + mp_limb_t tn; + mp_size_t j; + TMP_SDECL; + +#if TUNE_PROGRAM_BUILD + ASSERT (FAC_DSC_THRESHOLD_LIMIT >= FAC_DSC_THRESHOLD); +#endif + + /* Compute the number of recursive steps for the DSC algorithm. */ + for (tn = n; ABOVE_THRESHOLD (tn, FAC_DSC_THRESHOLD); s++) + tn >>= 1; + + j = 0; + + TMP_SMARK; + factors = TMP_SALLOC_LIMBS (1 + tn / FACTORS_PER_LIMB); + ASSERT (tn >= FACTORS_PER_LIMB); + + if (tn >= numberof (tabled) * 2 + 1) { + mp_limb_t prod, max_prod, i; + + prod = 1; +#if TUNE_PROGRAM_BUILD + max_prod = GMP_NUMB_MAX / FAC_DSC_THRESHOLD_LIMIT; +#else + max_prod = GMP_NUMB_MAX / FAC_DSC_THRESHOLD; +#endif + + do { + i = numberof (tabled) * 2 + 1; + factors[j++] = tabled[numberof (tabled) - 1]; + do { + FACTOR_LIST_STORE (i, prod, max_prod, factors, j); + i += 2; + } while (i <= tn); + max_prod <<= 1; + tn >>= 1; + } while (tn >= numberof (tabled) * 2 + 1); + + factors[j++] = prod; + } + + ASSERT (numberof (tablef) > numberof (tabled)); + factors[j++] = tabled[(tn - 1) >> 1]; + factors[j++] = tablef[tn >> 1]; + mpz_prodlimbs (x, factors, j); + + TMP_SFREE; + } + + if (s != 0) + /* Use the algorithm described by Peter Luschny in "Divide, + Swing and Conquer the Factorial!". + */ + { + mpz_t mswing; + mp_ptr sieve; + mp_size_t size; + TMP_DECL; + + TMP_MARK; + + size = n / GMP_NUMB_BITS; + ASSERT (primesieve_size (n - 1) <= (size + 4) - (size / 2 + 3)); + /* 2-multiswing(n) < 2^(n-1)*sqrt(n/pi) < 2^(n+GMP_NUMB_BITS); + one more can be overwritten by mul, another for the sieve */ + MPZ_TMP_INIT (mswing, size + 4); + /* Put the sieve on the second half, it will be overwritten by the last mswing. */ + sieve = PTR(mswing) + size / 2 + 3; + + size = (bitwise_primesieve (sieve, n - 1) + 1) / log_n_max (n) + 1; + + factors = TMP_ALLOC_LIMBS (size); + do { + mp_ptr square; + mp_size_t nx, ns; + mp_limb_t cy; + TMP_DECL; + + s--; + mpz_2multiswing_1 (mswing, n >> s, sieve, factors); + + TMP_MARK; + nx = SIZ (x); + size = nx << 1; + square = TMP_ALLOC_LIMBS (size); + mpn_sqr (square, PTR (x), nx); + ns = SIZ(mswing); + size -= (square[size - 1] == 0); + nx = size + ns; + MPZ_REALLOC (x, nx); + ASSERT (ns <= size); + cy = mpn_mul (PTR(x), square, size, PTR(mswing), ns); /* n!= n$ * floor(n/2)!^2 */ + + TMP_FREE; + SIZ(x) = nx - (cy == 0); + } while (s != 0); + TMP_FREE; + } + } +} + +#undef LOG2C +#undef FACTORS_PER_LIMB +#undef FACTOR_LIST_STORE + |