/* mpz_bin_ui(RESULT, N, K) -- Set RESULT to N over K. Copyright 1998-2002, 2012, 2013, 2015, 2017-2018, 2020 Free Software Foundation, Inc. This file is part of the GNU MP Library. The GNU MP Library is free software; you can redistribute it and/or modify it under the terms of either: * the GNU Lesser General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. or * the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. or both in parallel, as here. The GNU MP Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received copies of the GNU General Public License and the GNU Lesser General Public License along with the GNU MP Library. If not, see https://www.gnu.org/licenses/. */ #include "gmp-impl.h" /* How many special cases? Minimum is 2: 0 and 1; * also 3 {0,1,2} and 5 {0,1,2,3,4} are implemented. */ #define APARTAJ_KALKULOJ 2 /* Whether to use (1) or not (0) the function mpz_bin_uiui whenever * the operands fit. */ #define UZU_BIN_UIUI 0 /* Whether to use a shortcut to precompute the product of four * elements (1), or precompute only the product of a couple (0). * * In both cases the precomputed product is then updated with some * linear operations to obtain the product of the next four (1) * [or two (0)] operands. */ #define KVAROPE 1 static void posmpz_init (mpz_ptr r) { mp_ptr rp; ASSERT (SIZ (r) > 0); rp = SIZ (r) + MPZ_REALLOC (r, SIZ (r) + 2); *rp = 0; *++rp = 0; } /* Equivalent to mpz_add_ui (r, r, in), but faster when 0 < SIZ (r) < ALLOC (r) and limbs above SIZ (r) contain 0. */ static void posmpz_inc_ui (mpz_ptr r, unsigned long in) { #if BITS_PER_ULONG > GMP_NUMB_BITS mpz_add_ui (r, r, in); #else ASSERT (SIZ (r) > 0); MPN_INCR_U (PTR (r), SIZ (r) + 1, in); SIZ (r) += PTR (r)[SIZ (r)]; #endif } /* Equivalent to mpz_sub_ui (r, r, in), but faster when 0 < SIZ (r) and we know in advance that the result is positive. */ static void posmpz_dec_ui (mpz_ptr r, unsigned long in) { #if BITS_PER_ULONG > GMP_NUMB_BITS mpz_sub_ui (r, r, in); #else ASSERT (mpz_cmp_ui (r, in) >= 0); MPN_DECR_U (PTR (r), SIZ (r), in); SIZ (r) -= (PTR (r)[SIZ (r)-1] == 0); #endif } /* Equivalent to mpz_tdiv_q_2exp (r, r, 1), but faster when 0 < SIZ (r) and we know in advance that the result is positive. */ static void posmpz_rsh1 (mpz_ptr r) { mp_ptr rp; mp_size_t rn; rn = SIZ (r); rp = PTR (r); ASSERT (rn > 0); mpn_rshift (rp, rp, rn, 1); SIZ (r) -= rp[rn - 1] == 0; } /* Computes r = n(n+(2*k-1))/2 It uses a sqare instead of a product, computing r = ((n+k-1)^2 + n - (k-1)^2)/2 As a side effect, sets t = n+k-1 */ static void mpz_hmul_nbnpk (mpz_ptr r, mpz_srcptr n, unsigned long int k, mpz_ptr t) { ASSERT (k > 0 && SIZ(n) > 0); --k; mpz_add_ui (t, n, k); mpz_mul (r, t, t); mpz_add (r, r, n); posmpz_rsh1 (r); if (LIKELY (k <= (1UL << (BITS_PER_ULONG / 2)))) posmpz_dec_ui (r, (k + (k & 1))*(k >> 1)); else { mpz_t tmp; mpz_init_set_ui (tmp, (k + (k & 1))); mpz_mul_ui (tmp, tmp, k >> 1); mpz_sub (r, r, tmp); mpz_clear (tmp); } } #if KVAROPE static void rek_raising_fac4 (mpz_ptr r, mpz_ptr p, mpz_ptr P, unsigned long int k, unsigned long int lk, mpz_ptr t) { if (k - lk < 5) { do { posmpz_inc_ui (p, 4*k+2); mpz_addmul_ui (P, p, 4*k); posmpz_dec_ui (P, k); mpz_mul (r, r, P); } while (--k > lk); } else { mpz_t lt; unsigned long int m; ALLOC (lt) = 0; SIZ (lt) = 0; if (t == NULL) t = lt; m = ((k + lk) >> 1) + 1; rek_raising_fac4 (r, p, P, k, m, t); posmpz_inc_ui (p, 4*m+2); mpz_addmul_ui (P, p, 4*m); posmpz_dec_ui (P, m); mpz_set (t, P); rek_raising_fac4 (t, p, P, m - 1, lk, NULL); mpz_mul (r, r, t); mpz_clear (lt); } } /* Computes (n+1)(n+2)...(n+k)/2^(k/2 +k/4) using the helper function rek_raising_fac4, and exploiting an idea inspired by a piece of code that Fredrik Johansson wrote and by a comment by Niels Möller. Assume k = 4i then compute: p = (n+1)(n+4i)/2 - i (n+1+1)(n+4i)/2 = p + i + (n+4i)/2 (n+1+1)(n+4i-1)/2 = p + i + ((n+4i)-(n+1+1))/2 = p + i + (n-n+4i-2)/2 = p + 3i-1 P = (p + i)*(p+3i-1)/2 = (n+1)(n+2)(n+4i-1)(n+4i)/8 n' = n + 2 i' = i - 1 (n'-1)(n')(n'+4i'+1)(n'+4i'+2)/8 = P (n'-1)(n'+4i'+2)/2 - i' - 1 = p (n'-1+2)(n'+4i'+2)/2 - i' - 1 = p + (n'+4i'+2) (n'-1+2)(n'+4i'+2-2)/2 - i' - 1 = p + (n'+4i'+2) - (n'-1+2) = p + 4i' + 1 (n'-1+2)(n'+4i'+2-2)/2 - i' = p + 4i' + 2 p' = p + 4i' + 2 = (n'+1)(n'+4i')/2 - i' p' - 4i' - 2 = p (p' - 4i' - 2 + i)*(p' - 4i' - 2+3i-1)/2 = P (p' - 4i' - 2 + i' + 1)*(p' - 4i' - 2 + 3i' + 3 - 1)/2 = P (p' - 3i' - 1)*(p' - i')/2 = P (p' - 3i' - 1 + 4i' + 1)*(p' - i' + 4i' - 1)/2 = P + (4i' + 1)*(p' - i')/2 + (p' - 3i' - 1 + 4i' + 1)*(4i' - 1)/2 (p' + i')*(p' + 3i' - 1)/2 = P + (4i')*(p' + p')/2 + (p' - i' - (p' + i'))/2 (p' + i')*(p' + 3i' - 1)/2 = P + 4i'p' + (p' - i' - p' - i')/2 (p' + i')*(p' + 3i' - 1)/2 = P + 4i'p' - i' P' = P + 4i'p' - i' And compute the product P * P' * P" ... */ static void mpz_raising_fac4 (mpz_ptr r, mpz_ptr n, unsigned long int k, mpz_ptr t, mpz_ptr p) { ASSERT ((k >= APARTAJ_KALKULOJ) && (APARTAJ_KALKULOJ > 0)); posmpz_init (n); posmpz_inc_ui (n, 1); SIZ (r) = 0; if (k & 1) { mpz_set (r, n); posmpz_inc_ui (n, 1); } k >>= 1; if (APARTAJ_KALKULOJ < 2 && k == 0) return; mpz_hmul_nbnpk (p, n, k, t); posmpz_init (p); if (k & 1) { if (SIZ (r)) mpz_mul (r, r, p); else mpz_set (r, p); posmpz_inc_ui (p, k - 1); } k >>= 1; if (APARTAJ_KALKULOJ < 4 && k == 0) return; mpz_hmul_nbnpk (t, p, k, n); if (SIZ (r)) mpz_mul (r, r, t); else mpz_set (r, t); if (APARTAJ_KALKULOJ > 8 || k > 1) { posmpz_dec_ui (p, k); rek_raising_fac4 (r, p, t, k - 1, 0, n); } } #else /* KVAROPE */ static void rek_raising_fac (mpz_ptr r, mpz_ptr n, unsigned long int k, unsigned long int lk, mpz_ptr t1, mpz_ptr t2) { /* Should the threshold depend on SIZ (n) ? */ if (k - lk < 10) { do { posmpz_inc_ui (n, k); mpz_mul (r, r, n); --k; } while (k > lk); } else { mpz_t t3; unsigned long int m; m = ((k + lk) >> 1) + 1; rek_raising_fac (r, n, k, m, t1, t2); posmpz_inc_ui (n, m); if (t1 == NULL) { mpz_init_set (t3, n); t1 = t3; } else { ALLOC (t3) = 0; mpz_set (t1, n); } rek_raising_fac (t1, n, m - 1, lk, t2, NULL); mpz_mul (r, r, t1); mpz_clear (t3); } } /* Computes (n+1)(n+2)...(n+k)/2^(k/2) using the helper function rek_raising_fac, and exploiting an idea inspired by a piece of code that Fredrik Johansson wrote. Force an even k = 2i then compute: p = (n+1)(n+2i)/2 i' = i - 1 p == (n+1)(n+2i'+2)/2 p' = p + i' == (n+2)(n+2i'+1)/2 n' = n + 1 p'== (n'+1)(n'+2i')/2 == (n+1 +1)(n+2i -1)/2 And compute the product p * p' * p" ... */ static void mpz_raising_fac (mpz_ptr r, mpz_ptr n, unsigned long int k, mpz_ptr t, mpz_ptr p) { unsigned long int hk; ASSERT ((k >= APARTAJ_KALKULOJ) && (APARTAJ_KALKULOJ > 1)); mpz_add_ui (n, n, 1); hk = k >> 1; mpz_hmul_nbnpk (p, n, hk, t); if ((k & 1) != 0) { mpz_add_ui (t, t, hk + 1); mpz_mul (r, t, p); } else { mpz_set (r, p); } if ((APARTAJ_KALKULOJ > 3) || (hk > 1)) { posmpz_init (p); rek_raising_fac (r, p, hk - 1, 0, t, n); } } #endif /* KVAROPE */ /* This is a poor implementation. Look at bin_uiui.c for improvement ideas. In fact consider calling mpz_bin_uiui() when the arguments fit, leaving the code here only for big n. The identity bin(n,k) = (-1)^k * bin(-n+k-1,k) can be found in Knuth vol 1 section 1.2.6 part G. */ void mpz_bin_ui (mpz_ptr r, mpz_srcptr n, unsigned long int k) { mpz_t ni; mp_size_t negate; if (SIZ (n) < 0) { /* bin(n,k) = (-1)^k * bin(-n+k-1,k), and set ni = -n+k-1 - k = -n-1 */ mpz_init (ni); mpz_add_ui (ni, n, 1L); mpz_neg (ni, ni); negate = (k & 1); /* (-1)^k */ } else { /* bin(n,k) == 0 if k>n (no test for this under the n<0 case, since -n+k-1 >= k there) */ if (mpz_cmp_ui (n, k) < 0) { SIZ (r) = 0; return; } /* set ni = n-k */ mpz_init (ni); mpz_sub_ui (ni, n, k); negate = 0; } /* Now wanting bin(ni+k,k), with ni positive, and "negate" is the sign (0 for positive, 1 for negative). */ /* Rewrite bin(n,k) as bin(n,n-k) if that is smaller. In this case it's whether ni+k-k < k meaning ni 2 else if (k > 1) { mpz_add_ui (ni, ni, 1 + (APARTAJ_KALKULOJ > 2 && k > 2)); mpz_mul (r, ni, ni); /* r = (n + (k>2))^2 */ if (APARTAJ_KALKULOJ == 2 || k == 2) { mpz_add (r, r, ni); /* n^2+n= n(n+1) */ posmpz_rsh1 (r); } #if APARTAJ_KALKULOJ > 3 #if APARTAJ_KALKULOJ != 5 #error Not implemented! 3 < APARTAJ_KALKULOJ != 5 #endif else /* k > 2 */ { /* k = 3, 4 */ mpz_sub_ui (r, r, 1); /* (n+1)^2-1 */ if (k == 3) { mpz_mul (r, r, ni); /* ((n+1)^2-1)(n+1) = n(n+1)(n+2) */ /* mpz_divexact_ui (r, r, 6); /\* 6=3<<1; div_by3 ? *\/ */ } else /* k = 4 */ { mpz_add (ni, ni, r); /* (n+1)^2+n */ mpz_mul (r, ni, ni); /* ((n+1)^2+n)^2 */ /* We should subtract one: ((n+1)^2+n)^2-1 = n(n+1)(n+2)(n+3). */ /* PTR (r) [0] ^= 1; would suffice, but it is not even needed, */ /* because the next division will shift away this bit anyway. */ /* mpz_divexact_ui (r, r, 24); /\* 24=3<<3; div_by3 ? *\/ */ } mpn_pi1_bdiv_q_1 (PTR(r), PTR(r), SIZ(r), 3, GMP_NUMB_MASK/3*2+1, 1 | (k>>1)); SIZ(r) -= PTR(r) [SIZ(r) - 1] == 0; } #endif } #endif else { /* k = 1 */ mpz_add_ui (r, ni, 1); } } #if UZU_BIN_UIUI else if (mpz_cmp_ui (ni, ULONG_MAX - k) <= 0) { mpz_bin_uiui (r, mpz_get_ui (ni) + k, k); } #endif else { mp_limb_t count; mpz_t num, den; mpz_init (num); mpz_init (den); #if KVAROPE mpz_raising_fac4 (num, ni, k, den, r); popc_limb (count, k); ASSERT (k - (k >> 1) - (k >> 2) - count >= 0); mpz_tdiv_q_2exp (num, num, k - (k >> 1) - (k >> 2) - count); #else mpz_raising_fac (num, ni, k, den, r); popc_limb (count, k); ASSERT (k - (k >> 1) - count >= 0); mpz_tdiv_q_2exp (num, num, k - (k >> 1) - count); #endif mpz_oddfac_1(den, k, 0); mpz_divexact(r, num, den); mpz_clear (num); mpz_clear (den); } mpz_clear (ni); SIZ(r) = (SIZ(r) ^ -negate) + negate; }