/* Reference mpn functions, designed to be simple, portable and independent of the normal gmp code. Speed isn't a consideration. Copyright 1996-2009, 2011-2014 Free Software Foundation, Inc. This file is part of the GNU MP Library test suite. The GNU MP Library test suite is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. The GNU MP Library test suite is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with the GNU MP Library test suite. If not, see https://www.gnu.org/licenses/. */ /* Most routines have assertions representing what the mpn routines are supposed to accept. Many of these reference routines do sensible things outside these ranges (eg. for size==0), but the assertions are present to pick up bad parameters passed here that are about to be passed the same to a real mpn routine being compared. */ /* always do assertion checking */ #define WANT_ASSERT 1 #include /* for NULL */ #include /* for malloc */ #include "gmp-impl.h" #include "longlong.h" #include "tests.h" /* Return non-zero if regions {xp,xsize} and {yp,ysize} overlap, with sizes in bytes. */ int byte_overlap_p (const void *v_xp, mp_size_t xsize, const void *v_yp, mp_size_t ysize) { const char *xp = (const char *) v_xp; const char *yp = (const char *) v_yp; ASSERT (xsize >= 0); ASSERT (ysize >= 0); /* no wraparounds */ ASSERT (xp+xsize >= xp); ASSERT (yp+ysize >= yp); if (xp + xsize <= yp) return 0; if (yp + ysize <= xp) return 0; return 1; } /* Return non-zero if limb regions {xp,xsize} and {yp,ysize} overlap. */ int refmpn_overlap_p (mp_srcptr xp, mp_size_t xsize, mp_srcptr yp, mp_size_t ysize) { return byte_overlap_p (xp, xsize * GMP_LIMB_BYTES, yp, ysize * GMP_LIMB_BYTES); } /* Check overlap for a routine defined to work low to high. */ int refmpn_overlap_low_to_high_p (mp_srcptr dst, mp_srcptr src, mp_size_t size) { return (dst <= src || ! refmpn_overlap_p (dst, size, src, size)); } /* Check overlap for a routine defined to work high to low. */ int refmpn_overlap_high_to_low_p (mp_srcptr dst, mp_srcptr src, mp_size_t size) { return (dst >= src || ! refmpn_overlap_p (dst, size, src, size)); } /* Check overlap for a standard routine requiring equal or separate. */ int refmpn_overlap_fullonly_p (mp_srcptr dst, mp_srcptr src, mp_size_t size) { return (dst == src || ! refmpn_overlap_p (dst, size, src, size)); } int refmpn_overlap_fullonly_two_p (mp_srcptr dst, mp_srcptr src1, mp_srcptr src2, mp_size_t size) { return (refmpn_overlap_fullonly_p (dst, src1, size) && refmpn_overlap_fullonly_p (dst, src2, size)); } mp_ptr refmpn_malloc_limbs (mp_size_t size) { mp_ptr p; ASSERT (size >= 0); if (size == 0) size = 1; p = (mp_ptr) malloc ((size_t) (size * GMP_LIMB_BYTES)); ASSERT (p != NULL); return p; } /* Free limbs allocated by refmpn_malloc_limbs. NOTE: Can't free * memory allocated by refmpn_malloc_limbs_aligned. */ void refmpn_free_limbs (mp_ptr p) { free (p); } mp_ptr refmpn_memdup_limbs (mp_srcptr ptr, mp_size_t size) { mp_ptr p; p = refmpn_malloc_limbs (size); refmpn_copyi (p, ptr, size); return p; } /* malloc n limbs on a multiple of m bytes boundary */ mp_ptr refmpn_malloc_limbs_aligned (mp_size_t n, size_t m) { return (mp_ptr) align_pointer (refmpn_malloc_limbs (n + m-1), m); } void refmpn_fill (mp_ptr ptr, mp_size_t size, mp_limb_t value) { mp_size_t i; ASSERT (size >= 0); for (i = 0; i < size; i++) ptr[i] = value; } void refmpn_zero (mp_ptr ptr, mp_size_t size) { refmpn_fill (ptr, size, CNST_LIMB(0)); } void refmpn_zero_extend (mp_ptr ptr, mp_size_t oldsize, mp_size_t newsize) { ASSERT (newsize >= oldsize); refmpn_zero (ptr+oldsize, newsize-oldsize); } int refmpn_zero_p (mp_srcptr ptr, mp_size_t size) { mp_size_t i; for (i = 0; i < size; i++) if (ptr[i] != 0) return 0; return 1; } mp_size_t refmpn_normalize (mp_srcptr ptr, mp_size_t size) { ASSERT (size >= 0); while (size > 0 && ptr[size-1] == 0) size--; return size; } /* the highest one bit in x */ mp_limb_t refmpn_msbone (mp_limb_t x) { mp_limb_t n = (mp_limb_t) 1 << (GMP_LIMB_BITS-1); while (n != 0) { if (x & n) break; n >>= 1; } return n; } /* a mask of the highest one bit plus and all bits below */ mp_limb_t refmpn_msbone_mask (mp_limb_t x) { if (x == 0) return 0; return (refmpn_msbone (x) << 1) - 1; } /* How many digits in the given base will fit in a limb. Notice that the product b is allowed to be equal to the limit 2^GMP_NUMB_BITS, this ensures the result for base==2 will be GMP_NUMB_BITS (and similarly other powers of 2). */ int refmpn_chars_per_limb (int base) { mp_limb_t limit[2], b[2]; int chars_per_limb; ASSERT (base >= 2); limit[0] = 0; /* limit = 2^GMP_NUMB_BITS */ limit[1] = 1; b[0] = 1; /* b = 1 */ b[1] = 0; chars_per_limb = 0; for (;;) { if (refmpn_mul_1 (b, b, (mp_size_t) 2, (mp_limb_t) base)) break; if (refmpn_cmp (b, limit, (mp_size_t) 2) > 0) break; chars_per_limb++; } return chars_per_limb; } /* The biggest value base**n which fits in GMP_NUMB_BITS. */ mp_limb_t refmpn_big_base (int base) { int chars_per_limb = refmpn_chars_per_limb (base); int i; mp_limb_t bb; ASSERT (base >= 2); bb = 1; for (i = 0; i < chars_per_limb; i++) bb *= base; return bb; } void refmpn_setbit (mp_ptr ptr, unsigned long bit) { ptr[bit/GMP_NUMB_BITS] |= CNST_LIMB(1) << (bit%GMP_NUMB_BITS); } void refmpn_clrbit (mp_ptr ptr, unsigned long bit) { ptr[bit/GMP_NUMB_BITS] &= ~ (CNST_LIMB(1) << (bit%GMP_NUMB_BITS)); } #define REFMPN_TSTBIT(ptr,bit) \ (((ptr)[(bit)/GMP_NUMB_BITS] & (CNST_LIMB(1) << ((bit)%GMP_NUMB_BITS))) != 0) int refmpn_tstbit (mp_srcptr ptr, unsigned long bit) { return REFMPN_TSTBIT (ptr, bit); } unsigned long refmpn_scan0 (mp_srcptr ptr, unsigned long bit) { while (REFMPN_TSTBIT (ptr, bit) != 0) bit++; return bit; } unsigned long refmpn_scan1 (mp_srcptr ptr, unsigned long bit) { while (REFMPN_TSTBIT (ptr, bit) == 0) bit++; return bit; } void refmpn_copy (mp_ptr rp, mp_srcptr sp, mp_size_t size) { ASSERT (refmpn_overlap_fullonly_p (rp, sp, size)); refmpn_copyi (rp, sp, size); } void refmpn_copyi (mp_ptr rp, mp_srcptr sp, mp_size_t size) { mp_size_t i; ASSERT (refmpn_overlap_low_to_high_p (rp, sp, size)); ASSERT (size >= 0); for (i = 0; i < size; i++) rp[i] = sp[i]; } void refmpn_copyd (mp_ptr rp, mp_srcptr sp, mp_size_t size) { mp_size_t i; ASSERT (refmpn_overlap_high_to_low_p (rp, sp, size)); ASSERT (size >= 0); for (i = size-1; i >= 0; i--) rp[i] = sp[i]; } /* Copy {xp,xsize} to {wp,wsize}. If x is shorter, then pad w with low zeros to wsize. If x is longer, then copy just the high wsize limbs. */ void refmpn_copy_extend (mp_ptr wp, mp_size_t wsize, mp_srcptr xp, mp_size_t xsize) { ASSERT (wsize >= 0); ASSERT (xsize >= 0); /* high part of x if x bigger than w */ if (xsize > wsize) { xp += xsize - wsize; xsize = wsize; } refmpn_copy (wp + wsize-xsize, xp, xsize); refmpn_zero (wp, wsize-xsize); } int refmpn_cmp (mp_srcptr xp, mp_srcptr yp, mp_size_t size) { mp_size_t i; ASSERT (size >= 1); ASSERT_MPN (xp, size); ASSERT_MPN (yp, size); for (i = size-1; i >= 0; i--) { if (xp[i] > yp[i]) return 1; if (xp[i] < yp[i]) return -1; } return 0; } int refmpn_cmp_allowzero (mp_srcptr xp, mp_srcptr yp, mp_size_t size) { if (size == 0) return 0; else return refmpn_cmp (xp, yp, size); } int refmpn_cmp_twosizes (mp_srcptr xp, mp_size_t xsize, mp_srcptr yp, mp_size_t ysize) { int opp, cmp; ASSERT_MPN (xp, xsize); ASSERT_MPN (yp, ysize); opp = (xsize < ysize); if (opp) MPN_SRCPTR_SWAP (xp,xsize, yp,ysize); if (! refmpn_zero_p (xp+ysize, xsize-ysize)) cmp = 1; else cmp = refmpn_cmp (xp, yp, ysize); return (opp ? -cmp : cmp); } int refmpn_equal_anynail (mp_srcptr xp, mp_srcptr yp, mp_size_t size) { mp_size_t i; ASSERT (size >= 0); for (i = 0; i < size; i++) if (xp[i] != yp[i]) return 0; return 1; } #define LOGOPS(operation) \ { \ mp_size_t i; \ \ ASSERT (refmpn_overlap_fullonly_two_p (rp, s1p, s2p, size)); \ ASSERT (size >= 1); \ ASSERT_MPN (s1p, size); \ ASSERT_MPN (s2p, size); \ \ for (i = 0; i < size; i++) \ rp[i] = operation; \ } void refmpn_and_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size) { LOGOPS (s1p[i] & s2p[i]); } void refmpn_andn_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size) { LOGOPS (s1p[i] & ~s2p[i]); } void refmpn_nand_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size) { LOGOPS ((s1p[i] & s2p[i]) ^ GMP_NUMB_MASK); } void refmpn_ior_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size) { LOGOPS (s1p[i] | s2p[i]); } void refmpn_iorn_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size) { LOGOPS (s1p[i] | (s2p[i] ^ GMP_NUMB_MASK)); } void refmpn_nior_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size) { LOGOPS ((s1p[i] | s2p[i]) ^ GMP_NUMB_MASK); } void refmpn_xor_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size) { LOGOPS (s1p[i] ^ s2p[i]); } void refmpn_xnor_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size) { LOGOPS ((s1p[i] ^ s2p[i]) ^ GMP_NUMB_MASK); } /* set *dh,*dl to mh:ml - sh:sl, in full limbs */ void refmpn_sub_ddmmss (mp_limb_t *dh, mp_limb_t *dl, mp_limb_t mh, mp_limb_t ml, mp_limb_t sh, mp_limb_t sl) { *dl = ml - sl; *dh = mh - sh - (ml < sl); } /* set *w to x+y, return 0 or 1 carry */ mp_limb_t ref_addc_limb (mp_limb_t *w, mp_limb_t x, mp_limb_t y) { mp_limb_t sum, cy; ASSERT_LIMB (x); ASSERT_LIMB (y); sum = x + y; #if GMP_NAIL_BITS == 0 *w = sum; cy = (sum < x); #else *w = sum & GMP_NUMB_MASK; cy = (sum >> GMP_NUMB_BITS); #endif return cy; } /* set *w to x-y, return 0 or 1 borrow */ mp_limb_t ref_subc_limb (mp_limb_t *w, mp_limb_t x, mp_limb_t y) { mp_limb_t diff, cy; ASSERT_LIMB (x); ASSERT_LIMB (y); diff = x - y; #if GMP_NAIL_BITS == 0 *w = diff; cy = (diff > x); #else *w = diff & GMP_NUMB_MASK; cy = (diff >> GMP_NUMB_BITS) & 1; #endif return cy; } /* set *w to x+y+c (where c is 0 or 1), return 0 or 1 carry */ mp_limb_t adc (mp_limb_t *w, mp_limb_t x, mp_limb_t y, mp_limb_t c) { mp_limb_t r; ASSERT_LIMB (x); ASSERT_LIMB (y); ASSERT (c == 0 || c == 1); r = ref_addc_limb (w, x, y); return r + ref_addc_limb (w, *w, c); } /* set *w to x-y-c (where c is 0 or 1), return 0 or 1 borrow */ mp_limb_t sbb (mp_limb_t *w, mp_limb_t x, mp_limb_t y, mp_limb_t c) { mp_limb_t r; ASSERT_LIMB (x); ASSERT_LIMB (y); ASSERT (c == 0 || c == 1); r = ref_subc_limb (w, x, y); return r + ref_subc_limb (w, *w, c); } #define AORS_1(operation) \ { \ mp_size_t i; \ \ ASSERT (refmpn_overlap_fullonly_p (rp, sp, size)); \ ASSERT (size >= 1); \ ASSERT_MPN (sp, size); \ ASSERT_LIMB (n); \ \ for (i = 0; i < size; i++) \ n = operation (&rp[i], sp[i], n); \ return n; \ } mp_limb_t refmpn_add_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t n) { AORS_1 (ref_addc_limb); } mp_limb_t refmpn_sub_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t n) { AORS_1 (ref_subc_limb); } #define AORS_NC(operation) \ { \ mp_size_t i; \ \ ASSERT (refmpn_overlap_fullonly_two_p (rp, s1p, s2p, size)); \ ASSERT (carry == 0 || carry == 1); \ ASSERT (size >= 1); \ ASSERT_MPN (s1p, size); \ ASSERT_MPN (s2p, size); \ \ for (i = 0; i < size; i++) \ carry = operation (&rp[i], s1p[i], s2p[i], carry); \ return carry; \ } mp_limb_t refmpn_add_nc (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size, mp_limb_t carry) { AORS_NC (adc); } mp_limb_t refmpn_sub_nc (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size, mp_limb_t carry) { AORS_NC (sbb); } mp_limb_t refmpn_add_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size) { return refmpn_add_nc (rp, s1p, s2p, size, CNST_LIMB(0)); } mp_limb_t refmpn_sub_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size) { return refmpn_sub_nc (rp, s1p, s2p, size, CNST_LIMB(0)); } mp_limb_t refmpn_cnd_add_n (mp_limb_t cnd, mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size) { if (cnd != 0) return refmpn_add_n (rp, s1p, s2p, size); else { refmpn_copyi (rp, s1p, size); return 0; } } mp_limb_t refmpn_cnd_sub_n (mp_limb_t cnd, mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size) { if (cnd != 0) return refmpn_sub_n (rp, s1p, s2p, size); else { refmpn_copyi (rp, s1p, size); return 0; } } #define AORS_ERR1_N(operation) \ { \ mp_size_t i; \ mp_limb_t carry2; \ \ ASSERT (refmpn_overlap_fullonly_p (rp, s1p, size)); \ ASSERT (refmpn_overlap_fullonly_p (rp, s2p, size)); \ ASSERT (! refmpn_overlap_p (rp, size, yp, size)); \ ASSERT (! refmpn_overlap_p (ep, 2, s1p, size)); \ ASSERT (! refmpn_overlap_p (ep, 2, s2p, size)); \ ASSERT (! refmpn_overlap_p (ep, 2, yp, size)); \ ASSERT (! refmpn_overlap_p (ep, 2, rp, size)); \ \ ASSERT (carry == 0 || carry == 1); \ ASSERT (size >= 1); \ ASSERT_MPN (s1p, size); \ ASSERT_MPN (s2p, size); \ ASSERT_MPN (yp, size); \ \ ep[0] = ep[1] = CNST_LIMB(0); \ \ for (i = 0; i < size; i++) \ { \ carry = operation (&rp[i], s1p[i], s2p[i], carry); \ if (carry == 1) \ { \ carry2 = ref_addc_limb (&ep[0], ep[0], yp[size - 1 - i]); \ carry2 = ref_addc_limb (&ep[1], ep[1], carry2); \ ASSERT (carry2 == 0); \ } \ } \ return carry; \ } mp_limb_t refmpn_add_err1_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_ptr ep, mp_srcptr yp, mp_size_t size, mp_limb_t carry) { AORS_ERR1_N (adc); } mp_limb_t refmpn_sub_err1_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_ptr ep, mp_srcptr yp, mp_size_t size, mp_limb_t carry) { AORS_ERR1_N (sbb); } #define AORS_ERR2_N(operation) \ { \ mp_size_t i; \ mp_limb_t carry2; \ \ ASSERT (refmpn_overlap_fullonly_p (rp, s1p, size)); \ ASSERT (refmpn_overlap_fullonly_p (rp, s2p, size)); \ ASSERT (! refmpn_overlap_p (rp, size, y1p, size)); \ ASSERT (! refmpn_overlap_p (rp, size, y2p, size)); \ ASSERT (! refmpn_overlap_p (ep, 4, s1p, size)); \ ASSERT (! refmpn_overlap_p (ep, 4, s2p, size)); \ ASSERT (! refmpn_overlap_p (ep, 4, y1p, size)); \ ASSERT (! refmpn_overlap_p (ep, 4, y2p, size)); \ ASSERT (! refmpn_overlap_p (ep, 4, rp, size)); \ \ ASSERT (carry == 0 || carry == 1); \ ASSERT (size >= 1); \ ASSERT_MPN (s1p, size); \ ASSERT_MPN (s2p, size); \ ASSERT_MPN (y1p, size); \ ASSERT_MPN (y2p, size); \ \ ep[0] = ep[1] = CNST_LIMB(0); \ ep[2] = ep[3] = CNST_LIMB(0); \ \ for (i = 0; i < size; i++) \ { \ carry = operation (&rp[i], s1p[i], s2p[i], carry); \ if (carry == 1) \ { \ carry2 = ref_addc_limb (&ep[0], ep[0], y1p[size - 1 - i]); \ carry2 = ref_addc_limb (&ep[1], ep[1], carry2); \ ASSERT (carry2 == 0); \ carry2 = ref_addc_limb (&ep[2], ep[2], y2p[size - 1 - i]); \ carry2 = ref_addc_limb (&ep[3], ep[3], carry2); \ ASSERT (carry2 == 0); \ } \ } \ return carry; \ } mp_limb_t refmpn_add_err2_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_ptr ep, mp_srcptr y1p, mp_srcptr y2p, mp_size_t size, mp_limb_t carry) { AORS_ERR2_N (adc); } mp_limb_t refmpn_sub_err2_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_ptr ep, mp_srcptr y1p, mp_srcptr y2p, mp_size_t size, mp_limb_t carry) { AORS_ERR2_N (sbb); } #define AORS_ERR3_N(operation) \ { \ mp_size_t i; \ mp_limb_t carry2; \ \ ASSERT (refmpn_overlap_fullonly_p (rp, s1p, size)); \ ASSERT (refmpn_overlap_fullonly_p (rp, s2p, size)); \ ASSERT (! refmpn_overlap_p (rp, size, y1p, size)); \ ASSERT (! refmpn_overlap_p (rp, size, y2p, size)); \ ASSERT (! refmpn_overlap_p (rp, size, y3p, size)); \ ASSERT (! refmpn_overlap_p (ep, 6, s1p, size)); \ ASSERT (! refmpn_overlap_p (ep, 6, s2p, size)); \ ASSERT (! refmpn_overlap_p (ep, 6, y1p, size)); \ ASSERT (! refmpn_overlap_p (ep, 6, y2p, size)); \ ASSERT (! refmpn_overlap_p (ep, 6, y3p, size)); \ ASSERT (! refmpn_overlap_p (ep, 6, rp, size)); \ \ ASSERT (carry == 0 || carry == 1); \ ASSERT (size >= 1); \ ASSERT_MPN (s1p, size); \ ASSERT_MPN (s2p, size); \ ASSERT_MPN (y1p, size); \ ASSERT_MPN (y2p, size); \ ASSERT_MPN (y3p, size); \ \ ep[0] = ep[1] = CNST_LIMB(0); \ ep[2] = ep[3] = CNST_LIMB(0); \ ep[4] = ep[5] = CNST_LIMB(0); \ \ for (i = 0; i < size; i++) \ { \ carry = operation (&rp[i], s1p[i], s2p[i], carry); \ if (carry == 1) \ { \ carry2 = ref_addc_limb (&ep[0], ep[0], y1p[size - 1 - i]); \ carry2 = ref_addc_limb (&ep[1], ep[1], carry2); \ ASSERT (carry2 == 0); \ carry2 = ref_addc_limb (&ep[2], ep[2], y2p[size - 1 - i]); \ carry2 = ref_addc_limb (&ep[3], ep[3], carry2); \ ASSERT (carry2 == 0); \ carry2 = ref_addc_limb (&ep[4], ep[4], y3p[size - 1 - i]); \ carry2 = ref_addc_limb (&ep[5], ep[5], carry2); \ ASSERT (carry2 == 0); \ } \ } \ return carry; \ } mp_limb_t refmpn_add_err3_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_ptr ep, mp_srcptr y1p, mp_srcptr y2p, mp_srcptr y3p, mp_size_t size, mp_limb_t carry) { AORS_ERR3_N (adc); } mp_limb_t refmpn_sub_err3_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_ptr ep, mp_srcptr y1p, mp_srcptr y2p, mp_srcptr y3p, mp_size_t size, mp_limb_t carry) { AORS_ERR3_N (sbb); } mp_limb_t refmpn_addlsh_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, unsigned int s) { mp_limb_t cy; mp_ptr tp; ASSERT (refmpn_overlap_fullonly_two_p (rp, up, vp, n)); ASSERT (n >= 1); ASSERT (0 < s && s < GMP_NUMB_BITS); ASSERT_MPN (up, n); ASSERT_MPN (vp, n); tp = refmpn_malloc_limbs (n); cy = refmpn_lshift (tp, vp, n, s); cy += refmpn_add_n (rp, up, tp, n); free (tp); return cy; } mp_limb_t refmpn_addlsh1_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n) { return refmpn_addlsh_n (rp, up, vp, n, 1); } mp_limb_t refmpn_addlsh2_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n) { return refmpn_addlsh_n (rp, up, vp, n, 2); } mp_limb_t refmpn_addlsh_n_ip1 (mp_ptr rp, mp_srcptr vp, mp_size_t n, unsigned int s) { return refmpn_addlsh_n (rp, rp, vp, n, s); } mp_limb_t refmpn_addlsh1_n_ip1 (mp_ptr rp, mp_srcptr vp, mp_size_t n) { return refmpn_addlsh_n (rp, rp, vp, n, 1); } mp_limb_t refmpn_addlsh2_n_ip1 (mp_ptr rp, mp_srcptr vp, mp_size_t n) { return refmpn_addlsh_n (rp, rp, vp, n, 2); } mp_limb_t refmpn_addlsh_n_ip2 (mp_ptr rp, mp_srcptr vp, mp_size_t n, unsigned int s) { return refmpn_addlsh_n (rp, vp, rp, n, s); } mp_limb_t refmpn_addlsh1_n_ip2 (mp_ptr rp, mp_srcptr vp, mp_size_t n) { return refmpn_addlsh_n (rp, vp, rp, n, 1); } mp_limb_t refmpn_addlsh2_n_ip2 (mp_ptr rp, mp_srcptr vp, mp_size_t n) { return refmpn_addlsh_n (rp, vp, rp, n, 2); } mp_limb_t refmpn_addlsh_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, unsigned int s, mp_limb_t carry) { mp_limb_t cy; ASSERT (carry <= (CNST_LIMB(1) << s)); cy = refmpn_addlsh_n (rp, up, vp, n, s); cy += refmpn_add_1 (rp, rp, n, carry); return cy; } mp_limb_t refmpn_addlsh1_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t carry) { return refmpn_addlsh_nc (rp, up, vp, n, 1, carry); } mp_limb_t refmpn_addlsh2_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t carry) { return refmpn_addlsh_nc (rp, up, vp, n, 2, carry); } mp_limb_t refmpn_sublsh_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, unsigned int s) { mp_limb_t cy; mp_ptr tp; ASSERT (refmpn_overlap_fullonly_two_p (rp, up, vp, n)); ASSERT (n >= 1); ASSERT (0 < s && s < GMP_NUMB_BITS); ASSERT_MPN (up, n); ASSERT_MPN (vp, n); tp = refmpn_malloc_limbs (n); cy = mpn_lshift (tp, vp, n, s); cy += mpn_sub_n (rp, up, tp, n); free (tp); return cy; } mp_limb_t refmpn_sublsh1_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n) { return refmpn_sublsh_n (rp, up, vp, n, 1); } mp_limb_t refmpn_sublsh2_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n) { return refmpn_sublsh_n (rp, up, vp, n, 2); } mp_limb_t refmpn_sublsh_n_ip1 (mp_ptr rp, mp_srcptr vp, mp_size_t n, unsigned int s) { return refmpn_sublsh_n (rp, rp, vp, n, s); } mp_limb_t refmpn_sublsh1_n_ip1 (mp_ptr rp, mp_srcptr vp, mp_size_t n) { return refmpn_sublsh_n (rp, rp, vp, n, 1); } mp_limb_t refmpn_sublsh2_n_ip1 (mp_ptr rp, mp_srcptr vp, mp_size_t n) { return refmpn_sublsh_n (rp, rp, vp, n, 2); } mp_limb_t refmpn_sublsh_n_ip2 (mp_ptr rp, mp_srcptr vp, mp_size_t n, unsigned int s) { return refmpn_sublsh_n (rp, vp, rp, n, s); } mp_limb_t refmpn_sublsh1_n_ip2 (mp_ptr rp, mp_srcptr vp, mp_size_t n) { return refmpn_sublsh_n (rp, vp, rp, n, 1); } mp_limb_t refmpn_sublsh2_n_ip2 (mp_ptr rp, mp_srcptr vp, mp_size_t n) { return refmpn_sublsh_n (rp, vp, rp, n, 2); } mp_limb_t refmpn_sublsh_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, unsigned int s, mp_limb_t carry) { mp_limb_t cy; ASSERT (carry <= (CNST_LIMB(1) << s)); cy = refmpn_sublsh_n (rp, up, vp, n, s); cy += refmpn_sub_1 (rp, rp, n, carry); return cy; } mp_limb_t refmpn_sublsh1_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t carry) { return refmpn_sublsh_nc (rp, up, vp, n, 1, carry); } mp_limb_t refmpn_sublsh2_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t carry) { return refmpn_sublsh_nc (rp, up, vp, n, 2, carry); } mp_limb_signed_t refmpn_rsblsh_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, unsigned int s) { mp_limb_signed_t cy; mp_ptr tp; ASSERT (refmpn_overlap_fullonly_two_p (rp, up, vp, n)); ASSERT (n >= 1); ASSERT (0 < s && s < GMP_NUMB_BITS); ASSERT_MPN (up, n); ASSERT_MPN (vp, n); tp = refmpn_malloc_limbs (n); cy = mpn_lshift (tp, vp, n, s); cy -= mpn_sub_n (rp, tp, up, n); free (tp); return cy; } mp_limb_signed_t refmpn_rsblsh1_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n) { return refmpn_rsblsh_n (rp, up, vp, n, 1); } mp_limb_signed_t refmpn_rsblsh2_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n) { return refmpn_rsblsh_n (rp, up, vp, n, 2); } mp_limb_signed_t refmpn_rsblsh_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, unsigned int s, mp_limb_signed_t carry) { mp_limb_signed_t cy; ASSERT (carry == -1 || (carry >> s) == 0); cy = refmpn_rsblsh_n (rp, up, vp, n, s); if (carry > 0) cy += refmpn_add_1 (rp, rp, n, carry); else cy -= refmpn_sub_1 (rp, rp, n, -carry); return cy; } mp_limb_signed_t refmpn_rsblsh1_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_signed_t carry) { return refmpn_rsblsh_nc (rp, up, vp, n, 1, carry); } mp_limb_signed_t refmpn_rsblsh2_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_signed_t carry) { return refmpn_rsblsh_nc (rp, up, vp, n, 2, carry); } mp_limb_t refmpn_rsh1add_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n) { mp_limb_t cya, cys; ASSERT (refmpn_overlap_fullonly_two_p (rp, up, vp, n)); ASSERT (n >= 1); ASSERT_MPN (up, n); ASSERT_MPN (vp, n); cya = mpn_add_n (rp, up, vp, n); cys = mpn_rshift (rp, rp, n, 1) >> (GMP_NUMB_BITS - 1); rp[n - 1] |= cya << (GMP_NUMB_BITS - 1); return cys; } mp_limb_t refmpn_rsh1sub_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n) { mp_limb_t cya, cys; ASSERT (refmpn_overlap_fullonly_two_p (rp, up, vp, n)); ASSERT (n >= 1); ASSERT_MPN (up, n); ASSERT_MPN (vp, n); cya = mpn_sub_n (rp, up, vp, n); cys = mpn_rshift (rp, rp, n, 1) >> (GMP_NUMB_BITS - 1); rp[n - 1] |= cya << (GMP_NUMB_BITS - 1); return cys; } /* Twos complement, return borrow. */ mp_limb_t refmpn_neg (mp_ptr dst, mp_srcptr src, mp_size_t size) { mp_ptr zeros; mp_limb_t ret; ASSERT (size >= 1); zeros = refmpn_malloc_limbs (size); refmpn_fill (zeros, size, CNST_LIMB(0)); ret = refmpn_sub_n (dst, zeros, src, size); free (zeros); return ret; } #define AORS(aors_n, aors_1) \ { \ mp_limb_t c; \ ASSERT (s1size >= s2size); \ ASSERT (s2size >= 1); \ c = aors_n (rp, s1p, s2p, s2size); \ if (s1size-s2size != 0) \ c = aors_1 (rp+s2size, s1p+s2size, s1size-s2size, c); \ return c; \ } mp_limb_t refmpn_add (mp_ptr rp, mp_srcptr s1p, mp_size_t s1size, mp_srcptr s2p, mp_size_t s2size) { AORS (refmpn_add_n, refmpn_add_1); } mp_limb_t refmpn_sub (mp_ptr rp, mp_srcptr s1p, mp_size_t s1size, mp_srcptr s2p, mp_size_t s2size) { AORS (refmpn_sub_n, refmpn_sub_1); } #define SHIFTHIGH(x) ((x) << GMP_LIMB_BITS/2) #define SHIFTLOW(x) ((x) >> GMP_LIMB_BITS/2) #define LOWMASK (((mp_limb_t) 1 << GMP_LIMB_BITS/2)-1) #define HIGHMASK SHIFTHIGH(LOWMASK) #define LOWPART(x) ((x) & LOWMASK) #define HIGHPART(x) SHIFTLOW((x) & HIGHMASK) /* Set return:*lo to x*y, using full limbs not nails. */ mp_limb_t refmpn_umul_ppmm (mp_limb_t *lo, mp_limb_t x, mp_limb_t y) { mp_limb_t hi, s; *lo = LOWPART(x) * LOWPART(y); hi = HIGHPART(x) * HIGHPART(y); s = LOWPART(x) * HIGHPART(y); hi += HIGHPART(s); s = SHIFTHIGH(LOWPART(s)); *lo += s; hi += (*lo < s); s = HIGHPART(x) * LOWPART(y); hi += HIGHPART(s); s = SHIFTHIGH(LOWPART(s)); *lo += s; hi += (*lo < s); return hi; } mp_limb_t refmpn_umul_ppmm_r (mp_limb_t x, mp_limb_t y, mp_limb_t *lo) { return refmpn_umul_ppmm (lo, x, y); } mp_limb_t refmpn_mul_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier, mp_limb_t carry) { mp_size_t i; mp_limb_t hi, lo; ASSERT (refmpn_overlap_low_to_high_p (rp, sp, size)); ASSERT (size >= 1); ASSERT_MPN (sp, size); ASSERT_LIMB (multiplier); ASSERT_LIMB (carry); multiplier <<= GMP_NAIL_BITS; for (i = 0; i < size; i++) { hi = refmpn_umul_ppmm (&lo, sp[i], multiplier); lo >>= GMP_NAIL_BITS; ASSERT_NOCARRY (ref_addc_limb (&hi, hi, ref_addc_limb (&lo, lo, carry))); rp[i] = lo; carry = hi; } return carry; } mp_limb_t refmpn_mul_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier) { return refmpn_mul_1c (rp, sp, size, multiplier, CNST_LIMB(0)); } mp_limb_t refmpn_mul_N (mp_ptr dst, mp_srcptr src, mp_size_t size, mp_srcptr mult, mp_size_t msize) { mp_ptr src_copy; mp_limb_t ret; mp_size_t i; ASSERT (refmpn_overlap_fullonly_p (dst, src, size)); ASSERT (! refmpn_overlap_p (dst, size+msize-1, mult, msize)); ASSERT (size >= msize); ASSERT_MPN (mult, msize); /* in case dst==src */ src_copy = refmpn_malloc_limbs (size); refmpn_copyi (src_copy, src, size); src = src_copy; dst[size] = refmpn_mul_1 (dst, src, size, mult[0]); for (i = 1; i < msize-1; i++) dst[size+i] = refmpn_addmul_1 (dst+i, src, size, mult[i]); ret = refmpn_addmul_1 (dst+i, src, size, mult[i]); free (src_copy); return ret; } mp_limb_t refmpn_mul_2 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult) { return refmpn_mul_N (rp, sp, size, mult, (mp_size_t) 2); } mp_limb_t refmpn_mul_3 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult) { return refmpn_mul_N (rp, sp, size, mult, (mp_size_t) 3); } mp_limb_t refmpn_mul_4 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult) { return refmpn_mul_N (rp, sp, size, mult, (mp_size_t) 4); } mp_limb_t refmpn_mul_5 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult) { return refmpn_mul_N (rp, sp, size, mult, (mp_size_t) 5); } mp_limb_t refmpn_mul_6 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult) { return refmpn_mul_N (rp, sp, size, mult, (mp_size_t) 6); } #define AORSMUL_1C(operation_n) \ { \ mp_ptr p; \ mp_limb_t ret; \ \ ASSERT (refmpn_overlap_fullonly_p (rp, sp, size)); \ \ p = refmpn_malloc_limbs (size); \ ret = refmpn_mul_1c (p, sp, size, multiplier, carry); \ ret += operation_n (rp, rp, p, size); \ \ free (p); \ return ret; \ } mp_limb_t refmpn_addmul_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier, mp_limb_t carry) { AORSMUL_1C (refmpn_add_n); } mp_limb_t refmpn_submul_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier, mp_limb_t carry) { AORSMUL_1C (refmpn_sub_n); } mp_limb_t refmpn_addmul_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier) { return refmpn_addmul_1c (rp, sp, size, multiplier, CNST_LIMB(0)); } mp_limb_t refmpn_submul_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier) { return refmpn_submul_1c (rp, sp, size, multiplier, CNST_LIMB(0)); } mp_limb_t refmpn_addmul_N (mp_ptr dst, mp_srcptr src, mp_size_t size, mp_srcptr mult, mp_size_t msize) { mp_ptr src_copy; mp_limb_t ret; mp_size_t i; ASSERT (dst == src || ! refmpn_overlap_p (dst, size+msize-1, src, size)); ASSERT (! refmpn_overlap_p (dst, size+msize-1, mult, msize)); ASSERT (size >= msize); ASSERT_MPN (mult, msize); /* in case dst==src */ src_copy = refmpn_malloc_limbs (size); refmpn_copyi (src_copy, src, size); src = src_copy; for (i = 0; i < msize-1; i++) dst[size+i] = refmpn_addmul_1 (dst+i, src, size, mult[i]); ret = refmpn_addmul_1 (dst+i, src, size, mult[i]); free (src_copy); return ret; } mp_limb_t refmpn_addmul_2 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult) { return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 2); } mp_limb_t refmpn_addmul_3 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult) { return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 3); } mp_limb_t refmpn_addmul_4 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult) { return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 4); } mp_limb_t refmpn_addmul_5 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult) { return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 5); } mp_limb_t refmpn_addmul_6 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult) { return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 6); } mp_limb_t refmpn_addmul_7 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult) { return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 7); } mp_limb_t refmpn_addmul_8 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult) { return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 8); } mp_limb_t refmpn_add_n_sub_nc (mp_ptr r1p, mp_ptr r2p, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size, mp_limb_t carry) { mp_ptr p; mp_limb_t acy, scy; /* Destinations can't overlap. */ ASSERT (! refmpn_overlap_p (r1p, size, r2p, size)); ASSERT (refmpn_overlap_fullonly_two_p (r1p, s1p, s2p, size)); ASSERT (refmpn_overlap_fullonly_two_p (r2p, s1p, s2p, size)); ASSERT (size >= 1); /* in case r1p==s1p or r1p==s2p */ p = refmpn_malloc_limbs (size); acy = refmpn_add_nc (p, s1p, s2p, size, carry >> 1); scy = refmpn_sub_nc (r2p, s1p, s2p, size, carry & 1); refmpn_copyi (r1p, p, size); free (p); return 2 * acy + scy; } mp_limb_t refmpn_add_n_sub_n (mp_ptr r1p, mp_ptr r2p, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size) { return refmpn_add_n_sub_nc (r1p, r2p, s1p, s2p, size, CNST_LIMB(0)); } /* Right shift hi,lo and return the low limb of the result. Note a shift by GMP_LIMB_BITS isn't assumed to work (doesn't on x86). */ mp_limb_t rshift_make (mp_limb_t hi, mp_limb_t lo, unsigned shift) { ASSERT (shift < GMP_NUMB_BITS); if (shift == 0) return lo; else return ((hi << (GMP_NUMB_BITS-shift)) | (lo >> shift)) & GMP_NUMB_MASK; } /* Left shift hi,lo and return the high limb of the result. Note a shift by GMP_LIMB_BITS isn't assumed to work (doesn't on x86). */ mp_limb_t lshift_make (mp_limb_t hi, mp_limb_t lo, unsigned shift) { ASSERT (shift < GMP_NUMB_BITS); if (shift == 0) return hi; else return ((hi << shift) | (lo >> (GMP_NUMB_BITS-shift))) & GMP_NUMB_MASK; } mp_limb_t refmpn_rshift (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift) { mp_limb_t ret; mp_size_t i; ASSERT (refmpn_overlap_low_to_high_p (rp, sp, size)); ASSERT (size >= 1); ASSERT (shift >= 1 && shift < GMP_NUMB_BITS); ASSERT_MPN (sp, size); ret = rshift_make (sp[0], CNST_LIMB(0), shift); for (i = 0; i < size-1; i++) rp[i] = rshift_make (sp[i+1], sp[i], shift); rp[i] = rshift_make (CNST_LIMB(0), sp[i], shift); return ret; } mp_limb_t refmpn_lshift (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift) { mp_limb_t ret; mp_size_t i; ASSERT (refmpn_overlap_high_to_low_p (rp, sp, size)); ASSERT (size >= 1); ASSERT (shift >= 1 && shift < GMP_NUMB_BITS); ASSERT_MPN (sp, size); ret = lshift_make (CNST_LIMB(0), sp[size-1], shift); for (i = size-2; i >= 0; i--) rp[i+1] = lshift_make (sp[i+1], sp[i], shift); rp[i+1] = lshift_make (sp[i+1], CNST_LIMB(0), shift); return ret; } void refmpn_com (mp_ptr rp, mp_srcptr sp, mp_size_t size) { mp_size_t i; /* We work downwards since mpn_lshiftc needs that. */ ASSERT (refmpn_overlap_high_to_low_p (rp, sp, size)); for (i = size - 1; i >= 0; i--) rp[i] = (~sp[i]) & GMP_NUMB_MASK; } mp_limb_t refmpn_lshiftc (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift) { mp_limb_t res; /* No asserts here, refmpn_lshift will assert what we need. */ res = refmpn_lshift (rp, sp, size, shift); refmpn_com (rp, rp, size); return res; } /* accepting shift==0 and doing a plain copyi or copyd in that case */ mp_limb_t refmpn_rshift_or_copy (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift) { if (shift == 0) { refmpn_copyi (rp, sp, size); return 0; } else { return refmpn_rshift (rp, sp, size, shift); } } mp_limb_t refmpn_lshift_or_copy (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift) { if (shift == 0) { refmpn_copyd (rp, sp, size); return 0; } else { return refmpn_lshift (rp, sp, size, shift); } } /* accepting size==0 too */ mp_limb_t refmpn_rshift_or_copy_any (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift) { return (size == 0 ? 0 : refmpn_rshift_or_copy (rp, sp, size, shift)); } mp_limb_t refmpn_lshift_or_copy_any (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift) { return (size == 0 ? 0 : refmpn_lshift_or_copy (rp, sp, size, shift)); } /* Divide h,l by d, return quotient, store remainder to *rp. Operates on full limbs, not nails. Must have h < d. __udiv_qrnnd_c isn't simple, and it's a bit slow, but it works. */ mp_limb_t refmpn_udiv_qrnnd (mp_limb_t *rp, mp_limb_t h, mp_limb_t l, mp_limb_t d) { mp_limb_t q, r; int n; ASSERT (d != 0); ASSERT (h < d); #if 0 udiv_qrnnd (q, r, h, l, d); *rp = r; return q; #endif n = refmpn_count_leading_zeros (d); d <<= n; if (n != 0) { h = (h << n) | (l >> (GMP_LIMB_BITS - n)); l <<= n; } __udiv_qrnnd_c (q, r, h, l, d); r >>= n; *rp = r; return q; } mp_limb_t refmpn_udiv_qrnnd_r (mp_limb_t h, mp_limb_t l, mp_limb_t d, mp_limb_t *rp) { return refmpn_udiv_qrnnd (rp, h, l, d); } /* This little subroutine avoids some bad code generation from i386 gcc 3.0 -fPIC -O2 -fomit-frame-pointer (%ebp being used uninitialized). */ static mp_limb_t refmpn_divmod_1c_workaround (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t divisor, mp_limb_t carry) { mp_size_t i; mp_limb_t rem[1]; for (i = size-1; i >= 0; i--) { rp[i] = refmpn_udiv_qrnnd (rem, carry, sp[i] << GMP_NAIL_BITS, divisor << GMP_NAIL_BITS); carry = *rem >> GMP_NAIL_BITS; } return carry; } mp_limb_t refmpn_divmod_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t divisor, mp_limb_t carry) { mp_ptr sp_orig; mp_ptr prod; mp_limb_t carry_orig; ASSERT (refmpn_overlap_fullonly_p (rp, sp, size)); ASSERT (size >= 0); ASSERT (carry < divisor); ASSERT_MPN (sp, size); ASSERT_LIMB (divisor); ASSERT_LIMB (carry); if (size == 0) return carry; sp_orig = refmpn_memdup_limbs (sp, size); prod = refmpn_malloc_limbs (size); carry_orig = carry; carry = refmpn_divmod_1c_workaround (rp, sp, size, divisor, carry); /* check by multiplying back */ #if 0 printf ("size=%ld divisor=0x%lX carry=0x%lX remainder=0x%lX\n", size, divisor, carry_orig, carry); mpn_trace("s",sp_copy,size); mpn_trace("r",rp,size); printf ("mul_1c %lX\n", refmpn_mul_1c (prod, rp, size, divisor, carry)); mpn_trace("p",prod,size); #endif ASSERT (refmpn_mul_1c (prod, rp, size, divisor, carry) == carry_orig); ASSERT (refmpn_cmp (prod, sp_orig, size) == 0); free (sp_orig); free (prod); return carry; } mp_limb_t refmpn_divmod_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t divisor) { return refmpn_divmod_1c (rp, sp, size, divisor, CNST_LIMB(0)); } mp_limb_t refmpn_mod_1c (mp_srcptr sp, mp_size_t size, mp_limb_t divisor, mp_limb_t carry) { mp_ptr p = refmpn_malloc_limbs (size); carry = refmpn_divmod_1c (p, sp, size, divisor, carry); free (p); return carry; } mp_limb_t refmpn_mod_1 (mp_srcptr sp, mp_size_t size, mp_limb_t divisor) { return refmpn_mod_1c (sp, size, divisor, CNST_LIMB(0)); } mp_limb_t refmpn_preinv_mod_1 (mp_srcptr sp, mp_size_t size, mp_limb_t divisor, mp_limb_t inverse) { ASSERT (divisor & GMP_NUMB_HIGHBIT); ASSERT (inverse == refmpn_invert_limb (divisor)); return refmpn_mod_1 (sp, size, divisor); } /* This implementation will be rather slow, but has the advantage of being in a different style than the libgmp versions. */ mp_limb_t refmpn_mod_34lsub1 (mp_srcptr p, mp_size_t n) { ASSERT ((GMP_NUMB_BITS % 4) == 0); return mpn_mod_1 (p, n, (CNST_LIMB(1) << (3 * GMP_NUMB_BITS / 4)) - 1); } mp_limb_t refmpn_divrem_1c (mp_ptr rp, mp_size_t xsize, mp_srcptr sp, mp_size_t size, mp_limb_t divisor, mp_limb_t carry) { mp_ptr z; z = refmpn_malloc_limbs (xsize); refmpn_fill (z, xsize, CNST_LIMB(0)); carry = refmpn_divmod_1c (rp+xsize, sp, size, divisor, carry); carry = refmpn_divmod_1c (rp, z, xsize, divisor, carry); free (z); return carry; } mp_limb_t refmpn_divrem_1 (mp_ptr rp, mp_size_t xsize, mp_srcptr sp, mp_size_t size, mp_limb_t divisor) { return refmpn_divrem_1c (rp, xsize, sp, size, divisor, CNST_LIMB(0)); } mp_limb_t refmpn_preinv_divrem_1 (mp_ptr rp, mp_size_t xsize, mp_srcptr sp, mp_size_t size, mp_limb_t divisor, mp_limb_t inverse, unsigned shift) { ASSERT (size >= 0); ASSERT (shift == refmpn_count_leading_zeros (divisor)); ASSERT (inverse == refmpn_invert_limb (divisor << shift)); return refmpn_divrem_1 (rp, xsize, sp, size, divisor); } mp_limb_t refmpn_divrem_2 (mp_ptr qp, mp_size_t qxn, mp_ptr np, mp_size_t nn, mp_srcptr dp) { mp_ptr tp; mp_limb_t qh; tp = refmpn_malloc_limbs (nn + qxn); refmpn_zero (tp, qxn); refmpn_copyi (tp + qxn, np, nn); qh = refmpn_sb_div_qr (qp, tp, nn + qxn, dp, 2); refmpn_copyi (np, tp, 2); free (tp); return qh; } /* Inverse is floor((b*(b-d)-1) / d), per division by invariant integers paper, figure 8.1 m', where b=2^GMP_LIMB_BITS. Note that -d-1 < d since d has the high bit set. */ mp_limb_t refmpn_invert_limb (mp_limb_t d) { mp_limb_t r; ASSERT (d & GMP_LIMB_HIGHBIT); return refmpn_udiv_qrnnd (&r, -d-1, MP_LIMB_T_MAX, d); } void refmpn_invert (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_ptr scratch) { mp_ptr qp, tp; TMP_DECL; TMP_MARK; tp = TMP_ALLOC_LIMBS (2 * n); qp = TMP_ALLOC_LIMBS (n + 1); MPN_ZERO (tp, 2 * n); mpn_sub_1 (tp, tp, 2 * n, 1); refmpn_tdiv_qr (qp, rp, 0, tp, 2 * n, up, n); refmpn_copyi (rp, qp, n); TMP_FREE; } void refmpn_binvert (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_ptr scratch) { mp_ptr tp; mp_limb_t binv; TMP_DECL; TMP_MARK; /* We use the library mpn_sbpi1_bdiv_q here, which isn't kosher in testing code. To make up for it, we check that the inverse is correct using a multiply. */ tp = TMP_ALLOC_LIMBS (2 * n); MPN_ZERO (tp, n); tp[0] = 1; binvert_limb (binv, up[0]); mpn_sbpi1_bdiv_q (rp, tp, n, up, n, -binv); refmpn_mul_n (tp, rp, up, n); ASSERT_ALWAYS (tp[0] == 1 && mpn_zero_p (tp + 1, n - 1)); TMP_FREE; } /* The aim is to produce a dst quotient and return a remainder c, satisfying c*b^n + src-i == 3*dst, where i is the incoming carry. Some value c==0, c==1 or c==2 will satisfy, so just try each. If GMP_NUMB_BITS is even then 2^GMP_NUMB_BITS==1mod3 and a non-zero remainder from the first division attempt determines the correct remainder (3-c), but don't bother with that, since we can't guarantee anything about GMP_NUMB_BITS when using nails. If the initial src-i produces a borrow then refmpn_sub_1 leaves a twos complement negative, ie. b^n+a-i, and the calculation produces c1 satisfying c1*b^n + b^n+src-i == 3*dst, from which clearly c=c1+1. This means it's enough to just add any borrow back at the end. A borrow only occurs when a==0 or a==1, and, by the same reasoning as in mpn/generic/diveby3.c, the c1 that results in those cases will only be 0 or 1 respectively, so with 1 added the final return value is still in the prescribed range 0 to 2. */ mp_limb_t refmpn_divexact_by3c (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t carry) { mp_ptr spcopy; mp_limb_t c, cs; ASSERT (refmpn_overlap_fullonly_p (rp, sp, size)); ASSERT (size >= 1); ASSERT (carry <= 2); ASSERT_MPN (sp, size); spcopy = refmpn_malloc_limbs (size); cs = refmpn_sub_1 (spcopy, sp, size, carry); for (c = 0; c <= 2; c++) if (refmpn_divmod_1c (rp, spcopy, size, CNST_LIMB(3), c) == 0) goto done; ASSERT_FAIL (no value of c satisfies); done: c += cs; ASSERT (c <= 2); free (spcopy); return c; } mp_limb_t refmpn_divexact_by3 (mp_ptr rp, mp_srcptr sp, mp_size_t size) { return refmpn_divexact_by3c (rp, sp, size, CNST_LIMB(0)); } /* The same as mpn/generic/mul_basecase.c, but using refmpn functions. */ void refmpn_mul_basecase (mp_ptr prodp, mp_srcptr up, mp_size_t usize, mp_srcptr vp, mp_size_t vsize) { mp_size_t i; ASSERT (! refmpn_overlap_p (prodp, usize+vsize, up, usize)); ASSERT (! refmpn_overlap_p (prodp, usize+vsize, vp, vsize)); ASSERT (usize >= vsize); ASSERT (vsize >= 1); ASSERT_MPN (up, usize); ASSERT_MPN (vp, vsize); prodp[usize] = refmpn_mul_1 (prodp, up, usize, vp[0]); for (i = 1; i < vsize; i++) prodp[usize+i] = refmpn_addmul_1 (prodp+i, up, usize, vp[i]); } /* The same as mpn/generic/mulmid_basecase.c, but using refmpn functions. */ void refmpn_mulmid_basecase (mp_ptr rp, mp_srcptr up, mp_size_t un, mp_srcptr vp, mp_size_t vn) { mp_limb_t cy; mp_size_t i; ASSERT (un >= vn); ASSERT (vn >= 1); ASSERT (! refmpn_overlap_p (rp, un - vn + 3, up, un)); ASSERT (! refmpn_overlap_p (rp, un - vn + 3, vp, vn)); ASSERT_MPN (up, un); ASSERT_MPN (vp, vn); rp[un - vn + 1] = refmpn_mul_1 (rp, up + vn - 1, un - vn + 1, vp[0]); rp[un - vn + 2] = CNST_LIMB (0); for (i = 1; i < vn; i++) { cy = refmpn_addmul_1 (rp, up + vn - i - 1, un - vn + 1, vp[i]); cy = ref_addc_limb (&rp[un - vn + 1], rp[un - vn + 1], cy); cy = ref_addc_limb (&rp[un - vn + 2], rp[un - vn + 2], cy); ASSERT (cy == 0); } } void refmpn_toom42_mulmid (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_ptr scratch) { refmpn_mulmid_basecase (rp, up, 2*n - 1, vp, n); } void refmpn_mulmid_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n) { /* FIXME: this could be made faster by using refmpn_mul and then subtracting off products near the middle product region boundary */ refmpn_mulmid_basecase (rp, up, 2*n - 1, vp, n); } void refmpn_mulmid (mp_ptr rp, mp_srcptr up, mp_size_t un, mp_srcptr vp, mp_size_t vn) { /* FIXME: this could be made faster by using refmpn_mul and then subtracting off products near the middle product region boundary */ refmpn_mulmid_basecase (rp, up, un, vp, vn); } #define TOOM3_THRESHOLD (MAX (MUL_TOOM33_THRESHOLD, SQR_TOOM3_THRESHOLD)) #define TOOM4_THRESHOLD (MAX (MUL_TOOM44_THRESHOLD, SQR_TOOM4_THRESHOLD)) #define TOOM6_THRESHOLD (MAX (MUL_TOOM6H_THRESHOLD, SQR_TOOM6_THRESHOLD)) #if WANT_FFT #define FFT_THRESHOLD (MAX (MUL_FFT_THRESHOLD, SQR_FFT_THRESHOLD)) #else #define FFT_THRESHOLD MP_SIZE_T_MAX /* don't use toom44 here */ #endif void refmpn_mul (mp_ptr wp, mp_srcptr up, mp_size_t un, mp_srcptr vp, mp_size_t vn) { mp_ptr tp, rp; mp_size_t tn; if (vn < TOOM3_THRESHOLD) { /* In the mpn_mul_basecase and toom2 ranges, use our own mul_basecase. */ if (vn != 0) refmpn_mul_basecase (wp, up, un, vp, vn); else MPN_ZERO (wp, un); return; } MPN_ZERO (wp, vn); rp = refmpn_malloc_limbs (2 * vn); if (vn < TOOM4_THRESHOLD) tn = mpn_toom22_mul_itch (vn, vn); else if (vn < TOOM6_THRESHOLD) tn = mpn_toom33_mul_itch (vn, vn); else if (vn < FFT_THRESHOLD) tn = mpn_toom44_mul_itch (vn, vn); else tn = mpn_toom6h_mul_itch (vn, vn); tp = refmpn_malloc_limbs (tn); while (un >= vn) { if (vn < TOOM4_THRESHOLD) /* In the toom3 range, use mpn_toom22_mul. */ mpn_toom22_mul (rp, up, vn, vp, vn, tp); else if (vn < TOOM6_THRESHOLD) /* In the toom4 range, use mpn_toom33_mul. */ mpn_toom33_mul (rp, up, vn, vp, vn, tp); else if (vn < FFT_THRESHOLD) /* In the toom6 range, use mpn_toom44_mul. */ mpn_toom44_mul (rp, up, vn, vp, vn, tp); else /* For the largest operands, use mpn_toom6h_mul. */ mpn_toom6h_mul (rp, up, vn, vp, vn, tp); ASSERT_NOCARRY (refmpn_add (wp, rp, 2 * vn, wp, vn)); wp += vn; up += vn; un -= vn; } free (tp); if (un != 0) { refmpn_mul (rp, vp, vn, up, un); ASSERT_NOCARRY (refmpn_add (wp, rp, un + vn, wp, vn)); } free (rp); } void refmpn_mul_n (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size) { refmpn_mul (prodp, up, size, vp, size); } void refmpn_mullo_n (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size) { mp_ptr tp = refmpn_malloc_limbs (2*size); refmpn_mul (tp, up, size, vp, size); refmpn_copyi (prodp, tp, size); free (tp); } void refmpn_sqr (mp_ptr dst, mp_srcptr src, mp_size_t size) { refmpn_mul (dst, src, size, src, size); } void refmpn_sqrlo (mp_ptr dst, mp_srcptr src, mp_size_t size) { refmpn_mullo_n (dst, src, src, size); } /* Allowing usize= 0); ASSERT (vsize >= 0); ASSERT_MPN (up, usize); ASSERT_MPN (vp, vsize); if (usize == 0) { refmpn_fill (prodp, vsize, CNST_LIMB(0)); return; } if (vsize == 0) { refmpn_fill (prodp, usize, CNST_LIMB(0)); return; } if (usize >= vsize) refmpn_mul (prodp, up, usize, vp, vsize); else refmpn_mul (prodp, vp, vsize, up, usize); } mp_limb_t refmpn_gcd_11 (mp_limb_t x, mp_limb_t y) { /* The non-ref function also requires input operands to be odd, but below refmpn_gcd_1 doesn't guarantee that. */ ASSERT (x > 0); ASSERT (y > 0); do { while ((x & 1) == 0) x >>= 1; while ((y & 1) == 0) y >>= 1; if (x < y) MP_LIMB_T_SWAP (x, y); x -= y; } while (x != 0); return y; } mp_double_limb_t refmpn_gcd_22 (mp_limb_t x1, mp_limb_t x0, mp_limb_t y1, mp_limb_t y0) { ASSERT ((x0 & 1) != 0); ASSERT ((y0 & 1) != 0); mp_double_limb_t g; mp_limb_t cy; do { while ((x0 & 1) == 0) { x0 = (x1 << (GMP_NUMB_BITS - 1)) | (x0 >> 1); x1 >>= 1; } while ((y0 & 1) == 0) { y0 = (y1 << (GMP_NUMB_BITS - 1)) | (y0 >> 1); y1 >>= 1; } if (x1 < y1 || (x1 == y1 && x0 < y0)) { mp_limb_t t; t = x1; x1 = y1; y1 = t; t = x0; x0 = y0; y0 = t; } cy = (x0 < y0); x0 -= y0; x1 -= y1 + cy; } while ((x1 | x0) != 0); g.d1 = y1; g.d0 = y0; return g; } mp_limb_t refmpn_gcd_1 (mp_srcptr xp, mp_size_t xsize, mp_limb_t y) { mp_limb_t x; int twos; ASSERT (y != 0); ASSERT (! refmpn_zero_p (xp, xsize)); ASSERT_MPN (xp, xsize); ASSERT_LIMB (y); x = refmpn_mod_1 (xp, xsize, y); if (x == 0) return y; twos = 0; while ((x & 1) == 0 && (y & 1) == 0) { x >>= 1; y >>= 1; twos++; } return refmpn_gcd_11 (x, y) << twos; } /* Based on the full limb x, not nails. */ unsigned refmpn_count_leading_zeros (mp_limb_t x) { unsigned n = 0; ASSERT (x != 0); while ((x & GMP_LIMB_HIGHBIT) == 0) { x <<= 1; n++; } return n; } /* Full limbs allowed, not limited to nails. */ unsigned refmpn_count_trailing_zeros (mp_limb_t x) { unsigned n = 0; ASSERT (x != 0); ASSERT_LIMB (x); while ((x & 1) == 0) { x >>= 1; n++; } return n; } /* Strip factors of two (low zero bits) from {p,size} by right shifting. The return value is the number of twos stripped. */ mp_size_t refmpn_strip_twos (mp_ptr p, mp_size_t size) { mp_size_t limbs; unsigned shift; ASSERT (size >= 1); ASSERT (! refmpn_zero_p (p, size)); ASSERT_MPN (p, size); for (limbs = 0; p[0] == 0; limbs++) { refmpn_copyi (p, p+1, size-1); p[size-1] = 0; } shift = refmpn_count_trailing_zeros (p[0]); if (shift) refmpn_rshift (p, p, size, shift); return limbs*GMP_NUMB_BITS + shift; } mp_limb_t refmpn_gcd (mp_ptr gp, mp_ptr xp, mp_size_t xsize, mp_ptr yp, mp_size_t ysize) { int cmp; ASSERT (ysize >= 1); ASSERT (xsize >= ysize); ASSERT ((xp[0] & 1) != 0); ASSERT ((yp[0] & 1) != 0); /* ASSERT (xp[xsize-1] != 0); */ /* don't think x needs to be odd */ ASSERT (yp[ysize-1] != 0); ASSERT (refmpn_overlap_fullonly_p (gp, xp, xsize)); ASSERT (refmpn_overlap_fullonly_p (gp, yp, ysize)); ASSERT (! refmpn_overlap_p (xp, xsize, yp, ysize)); if (xsize == ysize) ASSERT (refmpn_msbone (xp[xsize-1]) >= refmpn_msbone (yp[ysize-1])); ASSERT_MPN (xp, xsize); ASSERT_MPN (yp, ysize); refmpn_strip_twos (xp, xsize); MPN_NORMALIZE (xp, xsize); MPN_NORMALIZE (yp, ysize); for (;;) { cmp = refmpn_cmp_twosizes (xp, xsize, yp, ysize); if (cmp == 0) break; if (cmp < 0) MPN_PTR_SWAP (xp,xsize, yp,ysize); ASSERT_NOCARRY (refmpn_sub (xp, xp, xsize, yp, ysize)); refmpn_strip_twos (xp, xsize); MPN_NORMALIZE (xp, xsize); } refmpn_copyi (gp, xp, xsize); return xsize; } unsigned long ref_popc_limb (mp_limb_t src) { unsigned long count; int i; count = 0; for (i = 0; i < GMP_LIMB_BITS; i++) { count += (src & 1); src >>= 1; } return count; } unsigned long refmpn_popcount (mp_srcptr sp, mp_size_t size) { unsigned long count = 0; mp_size_t i; ASSERT (size >= 0); ASSERT_MPN (sp, size); for (i = 0; i < size; i++) count += ref_popc_limb (sp[i]); return count; } unsigned long refmpn_hamdist (mp_srcptr s1p, mp_srcptr s2p, mp_size_t size) { mp_ptr d; unsigned long count; ASSERT (size >= 0); ASSERT_MPN (s1p, size); ASSERT_MPN (s2p, size); if (size == 0) return 0; d = refmpn_malloc_limbs (size); refmpn_xor_n (d, s1p, s2p, size); count = refmpn_popcount (d, size); free (d); return count; } /* set r to a%d */ void refmpn_mod2 (mp_limb_t r[2], const mp_limb_t a[2], const mp_limb_t d[2]) { mp_limb_t D[2]; int n; ASSERT (! refmpn_overlap_p (r, (mp_size_t) 2, d, (mp_size_t) 2)); ASSERT_MPN (a, 2); ASSERT_MPN (d, 2); D[1] = d[1], D[0] = d[0]; r[1] = a[1], r[0] = a[0]; n = 0; for (;;) { if (D[1] & GMP_NUMB_HIGHBIT) break; if (refmpn_cmp (r, D, (mp_size_t) 2) <= 0) break; refmpn_lshift (D, D, (mp_size_t) 2, 1); n++; ASSERT (n <= GMP_NUMB_BITS); } while (n >= 0) { if (refmpn_cmp (r, D, (mp_size_t) 2) >= 0) ASSERT_NOCARRY (refmpn_sub_n (r, r, D, (mp_size_t) 2)); refmpn_rshift (D, D, (mp_size_t) 2, 1); n--; } ASSERT (refmpn_cmp (r, d, (mp_size_t) 2) < 0); } /* Similar to the old mpn/generic/sb_divrem_mn.c, but somewhat simplified, in particular the trial quotient is allowed to be 2 too big. */ mp_limb_t refmpn_sb_div_qr (mp_ptr qp, mp_ptr np, mp_size_t nsize, mp_srcptr dp, mp_size_t dsize) { mp_limb_t retval = 0; mp_size_t i; mp_limb_t d1 = dp[dsize-1]; mp_ptr np_orig = refmpn_memdup_limbs (np, nsize); ASSERT (nsize >= dsize); /* ASSERT (dsize > 2); */ ASSERT (dsize >= 2); ASSERT (dp[dsize-1] & GMP_NUMB_HIGHBIT); ASSERT (! refmpn_overlap_p (qp, nsize-dsize, np, nsize) || qp+dsize >= np); ASSERT_MPN (np, nsize); ASSERT_MPN (dp, dsize); i = nsize-dsize; if (refmpn_cmp (np+i, dp, dsize) >= 0) { ASSERT_NOCARRY (refmpn_sub_n (np+i, np+i, dp, dsize)); retval = 1; } for (i--; i >= 0; i--) { mp_limb_t n0 = np[i+dsize]; mp_limb_t n1 = np[i+dsize-1]; mp_limb_t q, dummy_r; ASSERT (n0 <= d1); if (n0 == d1) q = GMP_NUMB_MAX; else q = refmpn_udiv_qrnnd (&dummy_r, n0, n1 << GMP_NAIL_BITS, d1 << GMP_NAIL_BITS); n0 -= refmpn_submul_1 (np+i, dp, dsize, q); ASSERT (n0 == 0 || n0 == MP_LIMB_T_MAX); if (n0) { q--; if (! refmpn_add_n (np+i, np+i, dp, dsize)) { q--; ASSERT_CARRY (refmpn_add_n (np+i, np+i, dp, dsize)); } } np[i+dsize] = 0; qp[i] = q; } /* remainder < divisor */ #if 0 /* ASSERT triggers gcc 4.2.1 bug */ ASSERT (refmpn_cmp (np, dp, dsize) < 0); #endif /* multiply back to original */ { mp_ptr mp = refmpn_malloc_limbs (nsize); refmpn_mul_any (mp, qp, nsize-dsize, dp, dsize); if (retval) ASSERT_NOCARRY (refmpn_add_n (mp+nsize-dsize,mp+nsize-dsize, dp, dsize)); ASSERT_NOCARRY (refmpn_add (mp, mp, nsize, np, dsize)); ASSERT (refmpn_cmp (mp, np_orig, nsize) == 0); free (mp); } free (np_orig); return retval; } /* Similar to the old mpn/generic/sb_divrem_mn.c, but somewhat simplified, in particular the trial quotient is allowed to be 2 too big. */ void refmpn_tdiv_qr (mp_ptr qp, mp_ptr rp, mp_size_t qxn, mp_ptr np, mp_size_t nsize, mp_srcptr dp, mp_size_t dsize) { ASSERT (qxn == 0); ASSERT_MPN (np, nsize); ASSERT_MPN (dp, dsize); ASSERT (dsize > 0); ASSERT (dp[dsize-1] != 0); if (dsize == 1) { rp[0] = refmpn_divmod_1 (qp, np, nsize, dp[0]); return; } else { mp_ptr n2p = refmpn_malloc_limbs (nsize+1); mp_ptr d2p = refmpn_malloc_limbs (dsize); int norm = refmpn_count_leading_zeros (dp[dsize-1]) - GMP_NAIL_BITS; n2p[nsize] = refmpn_lshift_or_copy (n2p, np, nsize, norm); ASSERT_NOCARRY (refmpn_lshift_or_copy (d2p, dp, dsize, norm)); refmpn_sb_div_qr (qp, n2p, nsize+1, d2p, dsize); refmpn_rshift_or_copy (rp, n2p, dsize, norm); /* ASSERT (refmpn_zero_p (tp+dsize, nsize-dsize)); */ free (n2p); free (d2p); } } mp_limb_t refmpn_redc_1 (mp_ptr rp, mp_ptr up, mp_srcptr mp, mp_size_t n, mp_limb_t invm) { mp_size_t j; mp_limb_t cy; ASSERT_MPN (up, 2*n); /* ASSERT about directed overlap rp, up */ /* ASSERT about overlap rp, mp */ /* ASSERT about overlap up, mp */ for (j = n - 1; j >= 0; j--) { up[0] = refmpn_addmul_1 (up, mp, n, (up[0] * invm) & GMP_NUMB_MASK); up++; } cy = mpn_add_n (rp, up, up - n, n); return cy; } size_t refmpn_get_str (unsigned char *dst, int base, mp_ptr src, mp_size_t size) { unsigned char *d; size_t dsize; ASSERT (size >= 0); ASSERT (base >= 2); ASSERT (base < numberof (mp_bases)); ASSERT (size == 0 || src[size-1] != 0); ASSERT_MPN (src, size); MPN_SIZEINBASE (dsize, src, size, base); ASSERT (dsize >= 1); ASSERT (! byte_overlap_p (dst, (mp_size_t) dsize, src, size * GMP_LIMB_BYTES)); if (size == 0) { dst[0] = 0; return 1; } /* don't clobber input for power of 2 bases */ if (POW2_P (base)) src = refmpn_memdup_limbs (src, size); d = dst + dsize; do { d--; ASSERT (d >= dst); *d = refmpn_divrem_1 (src, (mp_size_t) 0, src, size, (mp_limb_t) base); size -= (src[size-1] == 0); } while (size != 0); /* Move result back and decrement dsize if we didn't generate the maximum possible digits. */ if (d != dst) { size_t i; dsize -= d - dst; for (i = 0; i < dsize; i++) dst[i] = d[i]; } if (POW2_P (base)) free (src); return dsize; } mp_limb_t ref_bswap_limb (mp_limb_t src) { mp_limb_t dst; int i; dst = 0; for (i = 0; i < GMP_LIMB_BYTES; i++) { dst = (dst << 8) + (src & 0xFF); src >>= 8; } return dst; } /* These random functions are mostly for transitional purposes while adding nail support, since they're independent of the normal mpn routines. They can probably be removed when those normal routines are reliable, though perhaps something independent would still be useful at times. */ #if GMP_LIMB_BITS == 32 #define RAND_A CNST_LIMB(0x29CF535) #endif #if GMP_LIMB_BITS == 64 #define RAND_A CNST_LIMB(0xBAECD515DAF0B49D) #endif mp_limb_t refmpn_random_seed; mp_limb_t refmpn_random_half (void) { refmpn_random_seed = refmpn_random_seed * RAND_A + 1; return (refmpn_random_seed >> GMP_LIMB_BITS/2); } mp_limb_t refmpn_random_limb (void) { return ((refmpn_random_half () << (GMP_LIMB_BITS/2)) | refmpn_random_half ()) & GMP_NUMB_MASK; } void refmpn_random (mp_ptr ptr, mp_size_t size) { mp_size_t i; if (GMP_NAIL_BITS == 0) { mpn_random (ptr, size); return; } for (i = 0; i < size; i++) ptr[i] = refmpn_random_limb (); } void refmpn_random2 (mp_ptr ptr, mp_size_t size) { mp_size_t i; mp_limb_t bit, mask, limb; int run; if (GMP_NAIL_BITS == 0) { mpn_random2 (ptr, size); return; } #define RUN_MODULUS 32 /* start with ones at a random pos in the high limb */ bit = CNST_LIMB(1) << (refmpn_random_half () % GMP_NUMB_BITS); mask = 0; run = 0; for (i = size-1; i >= 0; i--) { limb = 0; do { if (run == 0) { run = (refmpn_random_half () % RUN_MODULUS) + 1; mask = ~mask; } limb |= (bit & mask); bit >>= 1; run--; } while (bit != 0); ptr[i] = limb; bit = GMP_NUMB_HIGHBIT; } } /* This is a simple bitwise algorithm working high to low across "s" and testing each time whether setting the bit would make s^2 exceed n. */ mp_size_t refmpn_sqrtrem (mp_ptr sp, mp_ptr rp, mp_srcptr np, mp_size_t nsize) { mp_ptr tp, dp; mp_size_t ssize, talloc, tsize, dsize, ret, ilimbs; unsigned ibit; long i; mp_limb_t c; ASSERT (nsize >= 0); /* If n==0, then s=0 and r=0. */ if (nsize == 0) return 0; ASSERT (np[nsize - 1] != 0); ASSERT (rp == NULL || MPN_SAME_OR_SEPARATE_P (np, rp, nsize)); ASSERT (rp == NULL || ! MPN_OVERLAP_P (sp, (nsize + 1) / 2, rp, nsize)); ASSERT (! MPN_OVERLAP_P (sp, (nsize + 1) / 2, np, nsize)); /* root */ ssize = (nsize+1)/2; refmpn_zero (sp, ssize); /* the remainder so far */ dp = refmpn_memdup_limbs (np, nsize); dsize = nsize; /* temporary */ talloc = 2*ssize + 1; tp = refmpn_malloc_limbs (talloc); for (i = GMP_NUMB_BITS * ssize - 1; i >= 0; i--) { /* t = 2*s*2^i + 2^(2*i), being the amount s^2 will increase by if 2^i is added to it */ ilimbs = (i+1) / GMP_NUMB_BITS; ibit = (i+1) % GMP_NUMB_BITS; refmpn_zero (tp, ilimbs); c = refmpn_lshift_or_copy (tp+ilimbs, sp, ssize, ibit); tsize = ilimbs + ssize; tp[tsize] = c; tsize += (c != 0); ilimbs = (2*i) / GMP_NUMB_BITS; ibit = (2*i) % GMP_NUMB_BITS; if (ilimbs + 1 > tsize) { refmpn_zero_extend (tp, tsize, ilimbs + 1); tsize = ilimbs + 1; } c = refmpn_add_1 (tp+ilimbs, tp+ilimbs, tsize-ilimbs, CNST_LIMB(1) << ibit); ASSERT (tsize < talloc); tp[tsize] = c; tsize += (c != 0); if (refmpn_cmp_twosizes (dp, dsize, tp, tsize) >= 0) { /* set this bit in s and subtract from the remainder */ refmpn_setbit (sp, i); ASSERT_NOCARRY (refmpn_sub_n (dp, dp, tp, dsize)); dsize = refmpn_normalize (dp, dsize); } } if (rp == NULL) { ret = ! refmpn_zero_p (dp, dsize); } else { ASSERT (dsize == 0 || dp[dsize-1] != 0); refmpn_copy (rp, dp, dsize); ret = dsize; } free (dp); free (tp); return ret; }