diff options
author | Niels Möller <nisse@lysator.liu.se> | 2011-05-20 19:22:15 +0200 |
---|---|---|
committer | Niels Möller <nisse@lysator.liu.se> | 2011-05-20 19:22:15 +0200 |
commit | 5186c3a14c3f49140f1983f2a8bcbb7eb7137085 (patch) | |
tree | a04aadb4cb612965e74c0199d53f6025d34fdf47 /mpn | |
parent | e23b60d3e67b6ca68244833a939a48f6998b16fb (diff) | |
download | gmp-5186c3a14c3f49140f1983f2a8bcbb7eb7137085.tar.gz |
New jacobi-related files.
Diffstat (limited to 'mpn')
-rw-r--r-- | mpn/generic/hgcd2_jacobi.c | 356 | ||||
-rw-r--r-- | mpn/generic/hgcd_jacobi.c | 235 | ||||
-rw-r--r-- | mpn/generic/jacobi_2.c | 342 |
3 files changed, 933 insertions, 0 deletions
diff --git a/mpn/generic/hgcd2_jacobi.c b/mpn/generic/hgcd2_jacobi.c new file mode 100644 index 000000000..c888deb1d --- /dev/null +++ b/mpn/generic/hgcd2_jacobi.c @@ -0,0 +1,356 @@ +/* hgcd2_jacobi.c + + THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY + SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST + GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. + +Copyright 1996, 1998, 2000, 2001, 2002, 2003, 2004, 2008, 2011 Free Software +Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +#if GMP_NAIL_BITS > 0 +#error Nails not supported. +#endif + +/* FIXME: Duplicated in hgcd2.c. Should move to gmp-impl.h, and + possibly be renamed. */ +static inline mp_limb_t +div1 (mp_ptr rp, + mp_limb_t n0, + mp_limb_t d0) +{ + mp_limb_t q = 0; + + if ((mp_limb_signed_t) n0 < 0) + { + int cnt; + for (cnt = 1; (mp_limb_signed_t) d0 >= 0; cnt++) + { + d0 = d0 << 1; + } + + q = 0; + while (cnt) + { + q <<= 1; + if (n0 >= d0) + { + n0 = n0 - d0; + q |= 1; + } + d0 = d0 >> 1; + cnt--; + } + } + else + { + int cnt; + for (cnt = 0; n0 >= d0; cnt++) + { + d0 = d0 << 1; + } + + q = 0; + while (cnt) + { + d0 = d0 >> 1; + q <<= 1; + if (n0 >= d0) + { + n0 = n0 - d0; + q |= 1; + } + cnt--; + } + } + *rp = n0; + return q; +} + +/* Two-limb division optimized for small quotients. */ +static inline mp_limb_t +div2 (mp_ptr rp, + mp_limb_t nh, mp_limb_t nl, + mp_limb_t dh, mp_limb_t dl) +{ + mp_limb_t q = 0; + + if ((mp_limb_signed_t) nh < 0) + { + int cnt; + for (cnt = 1; (mp_limb_signed_t) dh >= 0; cnt++) + { + dh = (dh << 1) | (dl >> (GMP_LIMB_BITS - 1)); + dl = dl << 1; + } + + while (cnt) + { + q <<= 1; + if (nh > dh || (nh == dh && nl >= dl)) + { + sub_ddmmss (nh, nl, nh, nl, dh, dl); + q |= 1; + } + dl = (dh << (GMP_LIMB_BITS - 1)) | (dl >> 1); + dh = dh >> 1; + cnt--; + } + } + else + { + int cnt; + for (cnt = 0; nh > dh || (nh == dh && nl >= dl); cnt++) + { + dh = (dh << 1) | (dl >> (GMP_LIMB_BITS - 1)); + dl = dl << 1; + } + + while (cnt) + { + dl = (dh << (GMP_LIMB_BITS - 1)) | (dl >> 1); + dh = dh >> 1; + q <<= 1; + if (nh > dh || (nh == dh && nl >= dl)) + { + sub_ddmmss (nh, nl, nh, nl, dh, dl); + q |= 1; + } + cnt--; + } + } + + rp[0] = nl; + rp[1] = nh; + + return q; +} + +int +mpn_hgcd2_jacobi (mp_limb_t ah, mp_limb_t al, mp_limb_t bh, mp_limb_t bl, + struct hgcd_matrix1 *M, unsigned *bitsp) +{ + mp_limb_t u00, u01, u10, u11; + unsigned bits = *bitsp; + + if (ah < 2 || bh < 2) + return 0; + + if (ah > bh || (ah == bh && al > bl)) + { + sub_ddmmss (ah, al, ah, al, bh, bl); + if (ah < 2) + return 0; + + u00 = u01 = u11 = 1; + u10 = 0; + bits = mpn_jacobi_update (bits, 1, 1); + } + else + { + sub_ddmmss (bh, bl, bh, bl, ah, al); + if (bh < 2) + return 0; + + u00 = u10 = u11 = 1; + u01 = 0; + bits = mpn_jacobi_update (bits, 0, 1); + } + + if (ah < bh) + goto subtract_a; + + for (;;) + { + ASSERT (ah >= bh); + if (ah == bh) + goto done; + + if (ah < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2))) + { + ah = (ah << (GMP_LIMB_BITS / 2) ) + (al >> (GMP_LIMB_BITS / 2)); + bh = (bh << (GMP_LIMB_BITS / 2) ) + (bl >> (GMP_LIMB_BITS / 2)); + + break; + } + + /* Subtract a -= q b, and multiply M from the right by (1 q ; 0 + 1), affecting the second column of M. */ + ASSERT (ah > bh); + sub_ddmmss (ah, al, ah, al, bh, bl); + + if (ah < 2) + goto done; + + if (ah <= bh) + { + /* Use q = 1 */ + u01 += u00; + u11 += u10; + bits = mpn_jacobi_update (bits, 1, 1); + } + else + { + mp_limb_t r[2]; + mp_limb_t q = div2 (r, ah, al, bh, bl); + al = r[0]; ah = r[1]; + if (ah < 2) + { + /* A is too small, but q is correct. */ + u01 += q * u00; + u11 += q * u10; + bits = mpn_jacobi_update (bits, 1, q & 3); + goto done; + } + q++; + u01 += q * u00; + u11 += q * u10; + bits = mpn_jacobi_update (bits, 1, q & 3); + } + subtract_a: + ASSERT (bh >= ah); + if (ah == bh) + goto done; + + if (bh < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2))) + { + ah = (ah << (GMP_LIMB_BITS / 2) ) + (al >> (GMP_LIMB_BITS / 2)); + bh = (bh << (GMP_LIMB_BITS / 2) ) + (bl >> (GMP_LIMB_BITS / 2)); + + goto subtract_a1; + } + + /* Subtract b -= q a, and multiply M from the right by (1 0 ; q + 1), affecting the first column of M. */ + sub_ddmmss (bh, bl, bh, bl, ah, al); + + if (bh < 2) + goto done; + + if (bh <= ah) + { + /* Use q = 1 */ + u00 += u01; + u10 += u11; + bits = mpn_jacobi_update (bits, 0, 1); + } + else + { + mp_limb_t r[2]; + mp_limb_t q = div2 (r, bh, bl, ah, al); + bl = r[0]; bh = r[1]; + if (bh < 2) + { + /* B is too small, but q is correct. */ + u00 += q * u01; + u10 += q * u11; + bits = mpn_jacobi_update (bits, 0, q & 3); + goto done; + } + q++; + u00 += q * u01; + u10 += q * u11; + bits = mpn_jacobi_update (bits, 0, q & 3); + } + } + + /* NOTE: Since we discard the least significant half limb, we don't + get a truly maximal M (corresponding to |a - b| < + 2^{GMP_LIMB_BITS +1}). */ + /* Single precision loop */ + for (;;) + { + ASSERT (ah >= bh); + if (ah == bh) + break; + + ah -= bh; + if (ah < (CNST_LIMB (1) << (GMP_LIMB_BITS / 2 + 1))) + break; + + if (ah <= bh) + { + /* Use q = 1 */ + u01 += u00; + u11 += u10; + bits = mpn_jacobi_update (bits, 1, 1); + } + else + { + mp_limb_t r; + mp_limb_t q = div1 (&r, ah, bh); + ah = r; + if (ah < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2 + 1))) + { + /* A is too small, but q is correct. */ + u01 += q * u00; + u11 += q * u10; + bits = mpn_jacobi_update (bits, 1, q & 3); + break; + } + q++; + u01 += q * u00; + u11 += q * u10; + bits = mpn_jacobi_update (bits, 1, q & 3); + } + subtract_a1: + ASSERT (bh >= ah); + if (ah == bh) + break; + + bh -= ah; + if (bh < (CNST_LIMB (1) << (GMP_LIMB_BITS / 2 + 1))) + break; + + if (bh <= ah) + { + /* Use q = 1 */ + u00 += u01; + u10 += u11; + bits = mpn_jacobi_update (bits, 0, 1); + } + else + { + mp_limb_t r; + mp_limb_t q = div1 (&r, bh, ah); + bh = r; + if (bh < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2 + 1))) + { + /* B is too small, but q is correct. */ + u00 += q * u01; + u10 += q * u11; + bits = mpn_jacobi_update (bits, 0, q & 3); + break; + } + q++; + u00 += q * u01; + u10 += q * u11; + bits = mpn_jacobi_update (bits, 0, q & 3); + } + } + + done: + M->u[0][0] = u00; M->u[0][1] = u01; + M->u[1][0] = u10; M->u[1][1] = u11; + *bitsp = bits; + + return 1; +} diff --git a/mpn/generic/hgcd_jacobi.c b/mpn/generic/hgcd_jacobi.c new file mode 100644 index 000000000..2dce43b99 --- /dev/null +++ b/mpn/generic/hgcd_jacobi.c @@ -0,0 +1,235 @@ +/* hgcd_jacobi.c. + + THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY + SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST + GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. + +Copyright 2003, 2004, 2005, 2008, 2011 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +/* This file is almost a copy of hgcd.c, with some added calls to + mpn_jacobi_update */ + +struct hgcd_jacobi_ctx +{ + struct hgcd_matrix *M; + unsigned *bitsp; +}; + +static void +hgcd_jacobi_hook (void *p, mp_srcptr gp, mp_size_t gn, + mp_srcptr qp, mp_size_t qn, int d) +{ + ASSERT (!gp); + ASSERT (d >= 0); + + MPN_NORMALIZE (qp, qn); + if (qn > 0) + { + struct hgcd_jacobi_ctx *ctx = (struct hgcd_jacobi_ctx *) p; + /* NOTES: This is a bit ugly. A tp area is passed to + gcd_subdiv_step, which stores q at the start of that area. We + now use the rest. */ + mp_ptr tp = (mp_ptr) qp + qn; + + mpn_hgcd_matrix_update_q (ctx->M, qp, qn, d, tp); + *ctx->bitsp = mpn_jacobi_update (*ctx->bitsp, d, qp[0] & 3); + } +} + +/* Perform a few steps, using some of mpn_hgcd2, subtraction and + division. Reduces the size by almost one limb or more, but never + below the given size s. Return new size for a and b, or 0 if no + more steps are possible. + + If hgcd2 succeds, needs temporary space for hgcd_matrix_mul_1, M->n + limbs, and hgcd_mul_matrix1_inverse_vector, n limbs. If hgcd2 + fails, needs space for the quotient, qn <= n - s + 1 limbs, for and + hgcd_matrix_update_q, qn + (size of the appropriate column of M) <= + resulting size of M. + + If N is the input size to the calling hgcd, then s = floor(N/2) + + 1, M->n < N, qn + matrix size <= n - s + 1 + n - s = 2 (n - s) + 1 + < N, so N is sufficient. +*/ + +static mp_size_t +hgcd_jacobi_step (mp_size_t n, mp_ptr ap, mp_ptr bp, mp_size_t s, + struct hgcd_matrix *M, unsigned *bitsp, mp_ptr tp) +{ + struct hgcd_matrix1 M1; + mp_limb_t mask; + mp_limb_t ah, al, bh, bl; + mp_size_t an, bn, qn; + int col; + + ASSERT (n > s); + + mask = ap[n-1] | bp[n-1]; + ASSERT (mask > 0); + + if (n == s + 1) + { + if (mask < 4) + goto subtract; + + ah = ap[n-1]; al = ap[n-2]; + bh = bp[n-1]; bl = bp[n-2]; + } + else if (mask & GMP_NUMB_HIGHBIT) + { + ah = ap[n-1]; al = ap[n-2]; + bh = bp[n-1]; bl = bp[n-2]; + } + else + { + int shift; + + count_leading_zeros (shift, mask); + ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]); + al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]); + bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]); + bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]); + } + + /* Try an mpn_hgcd2 step */ + if (mpn_hgcd2_jacobi (ah, al, bh, bl, &M1, bitsp)) + { + /* Multiply M <- M * M1 */ + mpn_hgcd_matrix_mul_1 (M, &M1, tp); + + /* Can't swap inputs, so we need to copy. */ + MPN_COPY (tp, ap, n); + /* Multiply M1^{-1} (a;b) */ + return mpn_matrix22_mul1_inverse_vector (&M1, ap, tp, bp, n); + } + + subtract: + { + struct hgcd_jacobi_ctx ctx; + ctx.M = M; + ctx.bitsp = bitsp; + + return mpn_gcd_subdiv_step (ap, bp, n, s, hgcd_jacobi_hook, &ctx, tp); + } +} + +/* Reduces a,b until |a-b| fits in n/2 + 1 limbs. Constructs matrix M + with elements of size at most (n+1)/2 - 1. Returns new size of a, + b, or zero if no reduction is possible. */ + +/* Same scratch requirements as for mpn_hgcd. */ +mp_size_t +mpn_hgcd_jacobi (mp_ptr ap, mp_ptr bp, mp_size_t n, + struct hgcd_matrix *M, unsigned *bitsp, mp_ptr tp) +{ + mp_size_t s = n/2 + 1; + + mp_size_t nn; + int success = 0; + + if (n <= s) + /* Happens when n <= 2, a fairly uninteresting case but exercised + by the random inputs of the testsuite. */ + return 0; + + ASSERT ((ap[n-1] | bp[n-1]) > 0); + + ASSERT ((n+1)/2 - 1 < M->alloc); + + if (ABOVE_THRESHOLD (n, HGCD_THRESHOLD)) + { + mp_size_t n2 = (3*n)/4 + 1; + mp_size_t p = n/2; + + nn = mpn_hgcd_jacobi (ap + p, bp + p, n - p, M, bitsp, tp); + if (nn > 0) + { + /* Needs 2*(p + M->n) <= 2*(floor(n/2) + ceil(n/2) - 1) + = 2 (n - 1) */ + n = mpn_hgcd_matrix_adjust (M, p + nn, ap, bp, p, tp); + success = 1; + } + while (n > n2) + { + /* Needs n + 1 storage */ + nn = hgcd_jacobi_step (n, ap, bp, s, M, bitsp, tp); + if (!nn) + return success ? n : 0; + n = nn; + success = 1; + } + + if (n > s + 2) + { + struct hgcd_matrix M1; + mp_size_t scratch; + + p = 2*s - n + 1; + scratch = MPN_HGCD_MATRIX_INIT_ITCH (n-p); + + mpn_hgcd_matrix_init(&M1, n - p, tp); + nn = mpn_hgcd_jacobi (ap + p, bp + p, n - p, &M1, bitsp, tp + scratch); + if (nn > 0) + { + /* We always have max(M) > 2^{-(GMP_NUMB_BITS + 1)} max(M1) */ + ASSERT (M->n + 2 >= M1.n); + + /* Furthermore, assume M ends with a quotient (1, q; 0, 1), + then either q or q + 1 is a correct quotient, and M1 will + start with either (1, 0; 1, 1) or (2, 1; 1, 1). This + rules out the case that the size of M * M1 is much + smaller than the expected M->n + M1->n. */ + + ASSERT (M->n + M1.n < M->alloc); + + /* Needs 2 (p + M->n) <= 2 (2*s - n2 + 1 + n2 - s - 1) + = 2*s <= 2*(floor(n/2) + 1) <= n + 2. */ + n = mpn_hgcd_matrix_adjust (&M1, p + nn, ap, bp, p, tp + scratch); + + /* We need a bound for of M->n + M1.n. Let n be the original + input size. Then + + ceil(n/2) - 1 >= size of product >= M.n + M1.n - 2 + + and it follows that + + M.n + M1.n <= ceil(n/2) + 1 + + Then 3*(M.n + M1.n) + 5 <= 3 * ceil(n/2) + 8 is the + amount of needed scratch space. */ + mpn_hgcd_matrix_mul (M, &M1, tp + scratch); + success = 1; + } + } + } + + for (;;) + { + /* Needs s+3 < n */ + nn = hgcd_jacobi_step (n, ap, bp, s, M, bitsp, tp); + if (!nn) + return success ? n : 0; + + n = nn; + success = 1; + } +} diff --git a/mpn/generic/jacobi_2.c b/mpn/generic/jacobi_2.c new file mode 100644 index 000000000..9a8a2b551 --- /dev/null +++ b/mpn/generic/jacobi_2.c @@ -0,0 +1,342 @@ +/* jacobi_2.c + + THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY + SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST + GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. + +Copyright 1996, 1998, 2000, 2001, 2002, 2003, 2004, 2008, 2010 Free Software +Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +#ifndef JACOBI_2_METHOD +#define JACOBI_2_METHOD 2 +#endif + +/* Computes (a / b) where b is odd, and a and b are otherwise arbitrary + two-limb numbers. */ +#if JACOBI_2_METHOD == 1 +int +mpn_jacobi_2 (mp_srcptr ap, mp_srcptr bp, unsigned bit) +{ + mp_limb_t ah, al, bh, bl; + int c; + + al = ap[0]; + ah = ap[1]; + bl = bp[0]; + bh = bp[1]; + + ASSERT (bl & 1); + + bl = ((bh << (GMP_NUMB_BITS - 1)) & GMP_NUMB_MASK) | (bl >> 1); + bh >>= 1; + + if ( (bh | bl) == 0) + return 1 - 2*(bit & 1); + + if ( (ah | al) == 0) + return 0; + + if (al == 0) + { + al = ah; + ah = 0; + bit ^= GMP_NUMB_BITS & (bl ^ (bl >> 1)); + } + count_trailing_zeros (c, al); + bit ^= c & (bl ^ (bl >> 1)); + + c++; + if (UNLIKELY (c == GMP_NUMB_BITS)) + { + al = ah; + ah = 0; + } + else + { + al = ((ah << (GMP_NUMB_BITS - c)) & GMP_NUMB_MASK) | (al >> c); + ah >>= c; + } + while ( (ah | bh) > 0) + { + mp_limb_t th, tl; + mp_limb_t bgta; + + sub_ddmmss (th, tl, ah, al, bh, bl); + if ( (tl | th) == 0) + return 0; + + bgta = LIMB_HIGHBIT_TO_MASK (th); + + /* If b > a, invoke reciprocity */ + bit ^= (bgta & al & bl); + + /* b <-- min (a, b) */ + add_ssaaaa (bh, bl, bh, bl, th & bgta, tl & bgta); + + if ( (bh | bl) == 0) + return 1 - 2*(bit & 1); + + /* a <-- |a - b| */ + al = (bgta ^ tl) - bgta; + ah = (bgta ^ th); + + if (UNLIKELY (al == 0)) + { + /* If b > a, al == 0 implies that we have a carry to + propagate. */ + al = ah - bgta; + ah = 0; + bit ^= GMP_NUMB_BITS & (bl ^ (bl >> 1)); + } + count_trailing_zeros (c, al); + c++; + bit ^= c & (bl ^ (bl >> 1)); + + if (UNLIKELY (c == GMP_NUMB_BITS)) + { + al = ah; + ah = 0; + } + else + { + al = ((ah << (GMP_NUMB_BITS - c)) & GMP_NUMB_MASK) | (al >> c); + ah >>= c; + } + } + + ASSERT (bl > 0); + + while ( (al | bl) & GMP_LIMB_HIGHBIT) + { + /* Need an extra comparison to get the mask. */ + mp_limb_t t = al - bl; + mp_limb_t bgta = - (bl > al); + + if (t == 0) + return 0; + + /* If b > a, invoke reciprocity */ + bit ^= (bgta & al & bl); + + /* b <-- min (a, b) */ + bl += (bgta & t); + + /* a <-- |a - b| */ + al = (t ^ bgta) - bgta; + + /* Number of trailing zeros is the same no matter if we look at + * t or a, but using t gives more parallelism. */ + count_trailing_zeros (c, t); + c ++; + /* (2/b) = -1 if b = 3 or 5 mod 8 */ + bit ^= c & (bl ^ (bl >> 1)); + + if (UNLIKELY (c == GMP_NUMB_BITS)) + return 1 - 2*(bit & 1); + + al >>= c; + } + + /* Here we have a little impedance mismatch. Better to inline it? */ + return mpn_jacobi_base (2*al+1, 2*bl+1, bit << 1); +} +#elif JACOBI_2_METHOD == 2 +int +mpn_jacobi_2 (mp_srcptr ap, mp_srcptr bp, unsigned bit) +{ + mp_limb_t ah, al, bh, bl; + int c; + + al = ap[0]; + ah = ap[1]; + bl = bp[0]; + bh = bp[1]; + + ASSERT (bl & 1); + + /* Use bit 1. */ + bit <<= 1; + + if (bh == 0 && bl == 1) + /* (a|1) = 1 */ + return 1 - (bit & 2); + + if (al == 0) + { + if (ah == 0) + /* (0|b) = 0, b > 1 */ + return 0; + + count_trailing_zeros (c, ah); + bit ^= ((GMP_NUMB_BITS + c) << 1) & (bl ^ (bl >> 1)); + + al = bl; + bl = ah >> c; + + if (bl == 1) + /* (1|b) = 1 */ + return 1 - (bit & 2); + + ah = bh; + + bit ^= al & bl; + + goto b_reduced; + } + if ( (al & 1) == 0) + { + count_trailing_zeros (c, al); + + al = ((ah << (GMP_NUMB_BITS - c)) & GMP_NUMB_MASK) | (al >> c); + ah >>= c; + bit ^= (c << 1) & (bl ^ (bl >> 1)); + } + if (ah == 0) + { + if (bh > 0) + { + bit ^= al & bl; + MP_LIMB_T_SWAP (al, bl); + ah = bh; + goto b_reduced; + } + goto ab_reduced; + } + + while (bh > 0) + { + /* Compute (a|b) */ + while (ah > bh) + { + sub_ddmmss (ah, al, ah, al, bh, bl); + if (al == 0) + { + count_trailing_zeros (c, ah); + bit ^= ((GMP_NUMB_BITS + c) << 1) & (bl ^ (bl >> 1)); + + al = bl; + bl = ah >> c; + ah = bh; + + bit ^= al & bl; + goto b_reduced; + } + count_trailing_zeros (c, al); + bit ^= (c << 1) & (bl ^ (bl >> 1)); + al = ((ah << (GMP_NUMB_BITS - c)) & GMP_NUMB_MASK) | (al >> c); + ah >>= c; + } + if (ah == bh) + goto cancel_hi; + + if (ah == 0) + { + bit ^= al & bl; + MP_LIMB_T_SWAP (al, bl); + ah = bh; + break; + } + + bit ^= al & bl; + + /* Compute (b|a) */ + while (bh > ah) + { + sub_ddmmss (bh, bl, bh, bl, ah, al); + if (bl == 0) + { + count_trailing_zeros (c, bh); + bit ^= ((GMP_NUMB_BITS + c) << 1) & (al ^ (al >> 1)); + + bl = bh >> c; + bit ^= al & bl; + goto b_reduced; + } + count_trailing_zeros (c, bl); + bit ^= (c << 1) & (al ^ (al >> 1)); + bl = ((bh << (GMP_NUMB_BITS - c)) & GMP_NUMB_MASK) | (bl >> c); + bh >>= c; + } + bit ^= al & bl; + + /* Compute (a|b) */ + if (ah == bh) + { + cancel_hi: + if (al < bl) + { + MP_LIMB_T_SWAP (al, bl); + bit ^= al & bl; + } + al -= bl; + if (al == 0) + return 0; + + count_trailing_zeros (c, al); + bit ^= (c << 1) & (bl ^ (bl >> 1)); + al >>= c; + + if (al == 1) + return 1 - (bit & 2); + + MP_LIMB_T_SWAP (al, bl); + bit ^= al & bl; + break; + } + } + + b_reduced: + /* Compute (a|b), with b a single limb. */ + ASSERT (bl & 1); + + if (bl == 1) + /* (a|1) = 1 */ + return 1 - (bit & 2); + + while (ah > 0) + { + ah -= (al < bl); + al -= bl; + if (al == 0) + { + if (ah == 0) + return 0; + count_trailing_zeros (c, ah); + bit ^= ((GMP_NUMB_BITS + c) << 1) & (bl ^ (bl >> 1)); + al = ah >> c; + goto ab_reduced; + } + count_trailing_zeros (c, al); + + al = ((ah << (GMP_NUMB_BITS - c)) & GMP_NUMB_MASK) | (al >> c); + ah >>= c; + bit ^= (c << 1) & (bl ^ (bl >> 1)); + } + ab_reduced: + ASSERT (bl & 1); + ASSERT (bl > 1); + + return mpn_jacobi_base (al, bl, bit); +} +#else +#error Unsupported value for JACOBI_2_METHOD +#endif |