From d7e186b07849147782af3856ca3af2263c4643a4 Mon Sep 17 00:00:00 2001 From: tege Date: Mon, 29 Apr 2002 14:28:10 +0200 Subject: Nailify. GNUify code layout. --- mpn/generic/gcd.c | 138 +++++++++++++++++++++++++++++++----------------------- 1 file changed, 80 insertions(+), 58 deletions(-) diff --git a/mpn/generic/gcd.c b/mpn/generic/gcd.c index 4fe700298..903d147ff 100644 --- a/mpn/generic/gcd.c +++ b/mpn/generic/gcd.c @@ -57,10 +57,10 @@ MA 02111-1307, USA. */ /* When U and V differ in size by more than BMOD_THRESHOLD, the accelerated algorithm reduces using the bmod operation. Otherwise, the k-ary reduction - is used. 0 <= BMOD_THRESHOLD < BITS_PER_MP_LIMB. */ + is used. 0 <= BMOD_THRESHOLD < GMP_NUMB_BITS. */ enum { - BMOD_THRESHOLD = BITS_PER_MP_LIMB/2 + BMOD_THRESHOLD = GMP_NUMB_BITS/2 }; @@ -72,23 +72,28 @@ gcd_2 (mp_ptr vp, mp_srcptr up) mp_limb_t u0, u1, v0, v1; mp_size_t vsize; - u0 = up[0], u1 = up[1], v0 = vp[0], v1 = vp[1]; + u0 = up[0]; + u1 = up[1]; + v0 = vp[0]; + v1 = vp[1]; while (u1 != v1 && u0 != v0) { unsigned long int r; if (u1 > v1) { - u1 -= v1 + (u0 < v0), u0 -= v0; + u1 -= v1 + (u0 < v0); + u0 = (u0 - v0) & GMP_NUMB_MASK; count_trailing_zeros (r, u0); - u0 = u1 << (BITS_PER_MP_LIMB - r) | u0 >> r; + u0 = ((u1 << (GMP_NUMB_BITS - r)) & GMP_NUMB_MASK) | (u0 >> r); u1 >>= r; } else /* u1 < v1. */ { - v1 -= u1 + (v0 < u0), v0 -= u0; + v1 -= u1 + (v0 < u0); + v0 = (v0 - u0) & GMP_NUMB_MASK; count_trailing_zeros (r, v0); - v0 = v1 << (BITS_PER_MP_LIMB - r) | v0 >> r; + v0 = ((v1 << (GMP_NUMB_BITS - r)) & GMP_NUMB_MASK) | (v0 >> r); v1 >>= r; } } @@ -105,15 +110,15 @@ gcd_2 (mp_ptr vp, mp_srcptr up) return 1; } -/* The function find_a finds 0 < N < 2^BITS_PER_MP_LIMB such that there exists - 0 < |D| < 2^BITS_PER_MP_LIMB, and N == D * C mod 2^(2*BITS_PER_MP_LIMB). +/* The function find_a finds 0 < N < 2^GMP_NUMB_BITS such that there exists + 0 < |D| < 2^GMP_NUMB_BITS, and N == D * C mod 2^(2*GMP_NUMB_BITS). In the reference article, D was computed along with N, but it is better to - compute D separately as D <-- N / C mod 2^(BITS_PER_MP_LIMB + 1), treating + compute D separately as D <-- N / C mod 2^(GMP_NUMB_BITS + 1), treating the result as a twos' complement signed integer. - Initialize N1 to C mod 2^(2*BITS_PER_MP_LIMB). According to the reference - article, N2 should be initialized to 2^(2*BITS_PER_MP_LIMB), but we use - 2^(2*BITS_PER_MP_LIMB) - N1 to start the calculations within double + Initialize N1 to C mod 2^(2*GMP_NUMB_BITS). According to the reference + article, N2 should be initialized to 2^(2*GMP_NUMB_BITS), but we use + 2^(2*GMP_NUMB_BITS) - N1 to start the calculations within double precision. If N2 > N1 initially, the first iteration of the while loop will swap them. In all other situations, N1 >= N2 is maintained. */ @@ -130,33 +135,43 @@ find_a (mp_srcptr cp) { unsigned long int leading_zero_bits = 0; - mp_limb_t n1_l = cp[0]; /* N1 == n1_h * 2^BITS_PER_MP_LIMB + n1_l. */ + mp_limb_t n1_l = cp[0]; /* N1 == n1_h * 2^GMP_NUMB_BITS + n1_l. */ mp_limb_t n1_h = cp[1]; - mp_limb_t n2_l = -n1_l; /* N2 == n2_h * 2^BITS_PER_MP_LIMB + n2_l. */ - mp_limb_t n2_h = ~n1_h; + mp_limb_t n2_l = (-n1_l & GMP_NUMB_MASK); /* N2 == n2_h * 2^GMP_NUMB_BITS + n2_l. */ + mp_limb_t n2_h = (~n1_h & GMP_NUMB_MASK); /* Main loop. */ - while (n2_h) /* While N2 >= 2^BITS_PER_MP_LIMB. */ + while (n2_h != 0) /* While N2 >= 2^GMP_NUMB_BITS. */ { /* N1 <-- N1 % N2. */ - if ((MP_LIMB_T_HIGHBIT >> leading_zero_bits & n2_h) == 0) + if (((GMP_NUMB_HIGHBIT >> leading_zero_bits) & n2_h) == 0) { unsigned long int i; count_leading_zeros (i, n2_h); - i -= leading_zero_bits, leading_zero_bits += i; - n2_h = n2_h<>(BITS_PER_MP_LIMB - i), n2_l <<= i; + i -= GMP_NAIL_BITS; + i -= leading_zero_bits; + leading_zero_bits += i; + n2_h = ((n2_h << i) & GMP_NUMB_MASK) | (n2_l >> (GMP_NUMB_BITS - i)); + n2_l = (n2_l << i) & GMP_NUMB_MASK; do { if (n1_h > n2_h || (n1_h == n2_h && n1_l >= n2_l)) - n1_h -= n2_h + (n1_l < n2_l), n1_l -= n2_l; - n2_l = n2_l>>1 | n2_h<<(BITS_PER_MP_LIMB - 1), n2_h >>= 1; + { + n1_h -= n2_h + (n1_l < n2_l); + n1_l = (n1_l - n2_l) & GMP_NUMB_MASK; + } + n2_l = (n2_l >> 1) | ((n2_h << (GMP_NUMB_BITS - 1)) & GMP_NUMB_MASK); + n2_h >>= 1; i -= 1; } - while (i); + while (i != 0); } if (n1_h > n2_h || (n1_h == n2_h && n1_l >= n2_l)) - n1_h -= n2_h + (n1_l < n2_l), n1_l -= n2_l; + { + n1_h -= n2_h + (n1_l < n2_l); + n1_l = (n1_l - n2_l) & GMP_NUMB_MASK; + } MP_LIMB_T_SWAP (n1_h, n2_h); MP_LIMB_T_SWAP (n1_l, n2_l); @@ -179,14 +194,14 @@ mpn_gcd (mp_ptr gp, mp_ptr up, mp_size_t usize, mp_ptr vp, mp_size_t vsize) ASSERT (vsize >= 1); ASSERT (usize >= vsize); ASSERT (vp[0] & 1); - ASSERT (up[usize-1] != 0); - ASSERT (vp[vsize-1] != 0); + ASSERT (up[usize - 1] != 0); + ASSERT (vp[vsize - 1] != 0); #if WANT_ASSERT if (usize == vsize) { int uzeros, vzeros; - count_leading_zeros (uzeros, up[usize-1]); - count_leading_zeros (vzeros, vp[vsize-1]); + count_leading_zeros (uzeros, up[usize - 1]); + count_leading_zeros (vzeros, vp[vsize - 1]); ASSERT (uzeros <= vzeros); } #endif @@ -208,10 +223,12 @@ mpn_gcd (mp_ptr gp, mp_ptr up, mp_size_t usize, mp_ptr vp, mp_size_t vsize) MPN_COPY (anchor_up, orig_up, usize); up = anchor_up; - count_leading_zeros (d, up[usize-1]); - d = usize * BITS_PER_MP_LIMB - d; - count_leading_zeros (vbitsize, vp[vsize-1]); - vbitsize = vsize * BITS_PER_MP_LIMB - vbitsize; + count_leading_zeros (d, up[usize - 1]); + d -= GMP_NAIL_BITS; + d = usize * GMP_NUMB_BITS - d; + count_leading_zeros (vbitsize, vp[vsize - 1]); + vbitsize -= GMP_NAIL_BITS; + vbitsize = vsize * GMP_NUMB_BITS - vbitsize; ASSERT (d >= vbitsize); d = d - vbitsize + 1; @@ -220,7 +237,7 @@ mpn_gcd (mp_ptr gp, mp_ptr up, mp_size_t usize, mp_ptr vp, mp_size_t vsize) mpn_bdivmod (up, up, usize, vp, vsize, d); /* Now skip U/V mod 2^d and any low zero limbs. */ - d /= BITS_PER_MP_LIMB, up += d, usize -= d; + d /= GMP_NUMB_BITS, up += d, usize -= d; while (usize != 0 && up[0] == 0) up++, usize--; @@ -234,12 +251,12 @@ mpn_gcd (mp_ptr gp, mp_ptr up, mp_size_t usize, mp_ptr vp, mp_size_t vsize) { /* mpn_com_n can't be used here because anchor_up and up may partially overlap */ - if (up[usize-1] & MP_LIMB_T_HIGHBIT) /* U < 0; take twos' compl. */ + if ((up[usize - 1] & GMP_NUMB_HIGHBIT) != 0) /* U < 0; take twos' compl. */ { mp_size_t i; - anchor_up[0] = -up[0]; + anchor_up[0] = -up[0] & GMP_NUMB_MASK; for (i = 1; i < usize; i++) - anchor_up[i] = ~up[i]; + anchor_up[i] = (~up[i] & GMP_NUMB_MASK); up = anchor_up; } @@ -250,7 +267,7 @@ mpn_gcd (mp_ptr gp, mp_ptr up, mp_size_t usize, mp_ptr vp, mp_size_t vsize) unsigned int r; count_trailing_zeros (r, up[0]); mpn_rshift (anchor_up, up, usize, r); - usize -= (anchor_up[usize-1] == 0); + usize -= (anchor_up[usize - 1] == 0); } else if (anchor_up != up) MPN_COPY_INCR (anchor_up, up, usize); @@ -262,49 +279,52 @@ mpn_gcd (mp_ptr gp, mp_ptr up, mp_size_t usize, mp_ptr vp, mp_size_t vsize) break; /* isn't efficient for == 2 limbs. */ d = vbitsize; - count_leading_zeros (vbitsize, vp[vsize-1]); - vbitsize = vsize * BITS_PER_MP_LIMB - vbitsize; + count_leading_zeros (vbitsize, vp[vsize - 1]); + vbitsize -= GMP_NAIL_BITS; + vbitsize = vsize * GMP_NUMB_BITS - vbitsize; d = d - vbitsize + 1; if (d > BMOD_THRESHOLD) /* Bmod reduction. */ { up[usize++] = 0; mpn_bdivmod (up, up, usize, vp, vsize, d); - d /= BITS_PER_MP_LIMB, up += d, usize -= d; + d /= GMP_NUMB_BITS, up += d, usize -= d; } else /* Kary reduction. */ { mp_limb_t bp[2], cp[2]; - /* C <-- V/U mod 2^(2*BITS_PER_MP_LIMB). */ + /* C <-- V/U mod 2^(2*GMP_NUMB_BITS). */ { mp_limb_t u_inv, hi, lo; modlimb_invert (u_inv, up[0]); - cp[0] = vp[0] * u_inv; - umul_ppmm (hi, lo, cp[0], up[0]); - cp[1] = (vp[1] - hi - cp[0] * up[1]) * u_inv; + cp[0] = (vp[0] * u_inv) & GMP_NUMB_MASK; + umul_ppmm (hi, lo, cp[0], up[0] << GMP_NAIL_BITS); + lo >>= GMP_NAIL_BITS; + cp[1] = (vp[1] - hi - cp[0] * up[1]) * u_inv & GMP_NUMB_MASK; } /* U <-- find_a (C) * U. */ up[usize] = mpn_mul_1 (up, up, usize, find_a (cp)); usize++; - /* B <-- A/C == U/V mod 2^(BITS_PER_MP_LIMB + 1). - bp[0] <-- U/V mod 2^BITS_PER_MP_LIMB and - bp[1] <-- ( (U - bp[0] * V)/2^BITS_PER_MP_LIMB ) / V mod 2 + /* B <-- A/C == U/V mod 2^(GMP_NUMB_BITS + 1). + bp[0] <-- U/V mod 2^GMP_NUMB_BITS and + bp[1] <-- ( (U - bp[0] * V)/2^GMP_NUMB_BITS ) / V mod 2 Like V/U above, but simplified because only the low bit of bp[1] is wanted. */ { mp_limb_t v_inv, hi, lo; modlimb_invert (v_inv, vp[0]); - bp[0] = up[0] * v_inv; - umul_ppmm (hi, lo, bp[0], vp[0]); - bp[1] = (up[1] + hi + (bp[0]&vp[1])) & 1; + bp[0] = (up[0] * v_inv) & GMP_NUMB_MASK; + umul_ppmm (hi, lo, bp[0], vp[0] << GMP_NAIL_BITS); + lo >>= GMP_NAIL_BITS; + bp[1] = (up[1] + hi + (bp[0] & vp[1])) & 1; } up[usize++] = 0; - if (bp[1]) /* B < 0: U <-- U + (-B) * V. */ + if (bp[1] != 0) /* B < 0: U <-- U + (-B) * V. */ { mp_limb_t c = mpn_addmul_1 (up, vp, vsize, -bp[0]); mpn_add_1 (up + vsize, up + vsize, usize - vsize, c); @@ -322,7 +342,7 @@ mpn_gcd (mp_ptr gp, mp_ptr up, mp_size_t usize, mp_ptr vp, mp_size_t vsize) while (usize != 0 && up[0] == 0) up++, usize--; } - while (usize); + while (usize != 0); /* Compute GCD (ORIG_V, GCD (ORIG_U, V)). Binary will execute twice. */ up = orig_up, usize = orig_usize; @@ -337,15 +357,17 @@ mpn_gcd (mp_ptr gp, mp_ptr up, mp_size_t usize, mp_ptr vp, mp_size_t vsize) if (usize > 2) /* First make U close to V in size. */ { unsigned long int vbitsize, d; - count_leading_zeros (d, up[usize-1]); - d = usize * BITS_PER_MP_LIMB - d; - count_leading_zeros (vbitsize, vp[vsize-1]); - vbitsize = vsize * BITS_PER_MP_LIMB - vbitsize; + count_leading_zeros (d, up[usize - 1]); + d -= GMP_NAIL_BITS; + d = usize * GMP_NUMB_BITS - d; + count_leading_zeros (vbitsize, vp[vsize - 1]); + vbitsize -= GMP_NAIL_BITS; + vbitsize = vsize * GMP_NUMB_BITS - vbitsize; d = d - vbitsize - 1; if (d != -(unsigned long int)1 && d > 2) { mpn_bdivmod (up, up, usize, vp, vsize, d); /* Result > 0. */ - d /= (unsigned long int)BITS_PER_MP_LIMB, up += d, usize -= d; + d /= (unsigned long int)GMP_NUMB_BITS, up += d, usize -= d; } } @@ -363,7 +385,7 @@ mpn_gcd (mp_ptr gp, mp_ptr up, mp_size_t usize, mp_ptr vp, mp_size_t vsize) unsigned int r; count_trailing_zeros (r, up[0]); mpn_rshift (up, up, usize, r); - usize -= (up[usize-1] == 0); + usize -= (up[usize - 1] == 0); } /* Keep usize >= vsize. */ -- cgit v1.2.1