Simplified mpz_jacobi using reciprocity.

author: Niels Möller <nisse@lysator.liu.se> 2011-05-23 10:12:16 +0200
committer: Niels Möller <nisse@lysator.liu.se> 2011-05-23 10:12:16 +0200
commit: 2124166fce28581fecf7c26b9c7929d529ee5ffb (patch)
tree: 711f6efaa5b2536058ba005295cee018dd6dbfb7 /mpz/jacobi.c
parent: ea42d2b6a8a568b972d01390b5a7fa2ece1cbbea (diff)
download: gmp-2124166fce28581fecf7c26b9c7929d529ee5ffb.tar.gz
1 files changed, 34 insertions, 121 deletions
diff --git a/mpz/jacobi.c b/mpz/jacobi.c
index 40255d8ec..c39c635ae 100644
--- a/mpz/jacobi.c
+++ b/mpz/jacobi.c
@@ -98,9 +98,9 @@ mpz_jacobi (mpz_srcptr a, mpz_srcptr b)
 
   if (bsize > 1 && btwos > 0)
     {
-      mp_limb_t second = bsrcp[1];
-      blow |= second << (GMP_NUMB_BITS - btwos);
-      if (bsize == 2 && (second >> btwos) == 0)
+      mp_limb_t b1 = bsrcp[1];
+      blow |= b1 << (GMP_NUMB_BITS - btwos);
+      if (bsize == 2 && (b1 >> btwos) == 0)
 	bsize = 1;
     }
 
@@ -149,140 +149,53 @@ mpz_jacobi (mpz_srcptr a, mpz_srcptr b)
       JACOBI_MOD_OR_MODEXACT_1_ODD (result_bit1, blow, bsrcp, bsize, alow);
       return mpn_jacobi_base (blow, alow, result_bit1);
     }
-  result_bit1 ^= JACOBI_TWOS_U_BIT1(btwos, alow);
 
-  bits = mpn_jacobi_init (alow, blow, (result_bit1>>1) & 1);
+  /* Ensure asize >= bsize. Take advantage of the reciprocity law
+     (a/b*2^n) = (b*2^n / a) * RECIP(a,b) */
 
-  /* Allocation strategy: When one operand is much larger than the
-     other, we currently have to allocate space for the entire
-     quotient, even though we need juste the lowest few bits. But we
-     at least avoid allocating a copy of th larger input.
-
-     We put the reduction of the larger operand first in the scratch
-     area, followed by an area that holds first the quotient, and then
-     the working copy of the smaller operand. */
-
-  if (asize > bsize)
+  if (asize < bsize)
     {
-      n = bsize;
+      MPN_SRCPTR_SWAP (asrcp, asize, bsrcp, bsize);
+      MP_LIMB_T_SWAP (alow, blow);
       
-      if (asize >= 2*bsize)
-	itch = asize + 1;
-      else
-	itch = 2*bsize;
-    }
-  else
-    {
-      n = asize;
+      count_trailing_zeros (btwos, blow);
+      if (btwos > 0)
+	blow = (blow >> btwos) | (bsrcp[1] << (GMP_LIMB_BITS - btwos));
 
-      if (bsize >= 2*asize)
-	itch = bsize + 1;
-      else
-	itch = 2*asize;
+      result_bit1 ^= JACOBI_RECIP_UU_BIT1 (alow, blow);
     }
 
+  /* Allocation strategy: For A, we allocate a working copy only for A
+     % B, but when A is much larger than B, we have to allocate space
+     for the large quotient. We use the same area, pointed to by bp,
+     for both the quotient A/B and the working copy of B. */
+  
   TMP_MARK;
 
-  scratch = TMP_ALLOC_LIMBS (itch);
-
-  if (n < asize)
-    {
-      mp_limb_t q0;
-      ap = scratch;
-      bp = scratch + n;
-
-      mpn_tdiv_qr (bp, ap, 0, asrcp, asize, bsrcp, n);
-      q0 = bp[0];
+  if (asize >= 2*bsize)
+    TMP_ALLOC_LIMBS_2 (ap, bsize, bp, asize - bsize + 1);
+  else
+    TMP_ALLOC_LIMBS_2 (ap, bsize, bp, bsize);
 
-      if (btwos > 0)
-	{
-	  ASSERT_NOCARRY (mpn_rshift (bp, bsrcp, n, btwos));
-	  n -= (bp[n-1] | ap[n-1]) == 0;
+  if (asize > bsize)
+    mpn_tdiv_qr (bp, ap, 0, asrcp, asize, bsrcp, bsize);
+  else
+    MPN_COPY (ap, asrcp, bsize);
 
-	  /* We have reduced a -= q * 2^k b */
-	  q0 <<= btwos;
-	}
-      else
-	MPN_COPY (bp, bsrcp, n);
-      
-      bits = mpn_jacobi_update (bits, 1, q0 & 3);
-      if (mpn_zero_p (ap, n))
-	{
-	  /* FIXME: n > 1 always? */
-	  if (n > 1 || bp[0] != 1)
-	    {
-	      TMP_FREE;
-	      return 0;
-	    }
-
-	  TMP_FREE;
-	  return mpn_jacobi_finish (bits);
-	}
-    }
-  else if (n < bsize)
+  if (btwos > 0)
     {
-      mp_limb_t q0;
-      mp_limb_t cy;
-      bp = scratch;
-      ap = scratch + n;
-
-      mpn_tdiv_qr (ap, bp, 0, bsrcp, bsize, asrcp, n);
-      q0 = ap[0];
-
-      if (btwos > 0)
-	{
-	  /* Let b be the correctly shifted, odd, value, and b' = 2^k
-	     b (k = btwos). We have divided
-
-	       b' = q' a + r'
-
-	     Let q' = 2^k q + ql, then we can recover the correct
-	     division as
-
-	       b = q a + r
-
-	     where the remainder is
-
-	       r = (ql a + r')/2^k
-	   */
-	  mp_limb_t ql, hi;
-
-	  ql = q0 & ((CNST_LIMB(1) << btwos) - 1);
-	  q0 = (q0 >> btwos) | (ap[1] << (GMP_LIMB_BITS - btwos));
-	  hi = mpn_addmul_1 (bp, asrcp, n, ql);
-
-	  ASSERT_NOCARRY (mpn_rshift (bp, bp, n, btwos));
-	  bp[n-1] |= hi << (GMP_LIMB_BITS - btwos);
-	}
-
-      bits = mpn_jacobi_update (bits, 0, q0 & 3);
-
-      if (mpn_zero_p (bp, n))
-	{
-	  TMP_FREE;
+      /* Must use the original alow *before* the division above. */
+      result_bit1 ^= JACOBI_TWOS_U_BIT1(btwos, alow);
 
-	  /* FIXME: n > 1 always? */
-	  if (n > 1 || asrcp[0] != 1)
-	    return 0;
-	  else
-	    return mpn_jacobi_finish (bits);
-	}
-      
-      MPN_COPY (ap, asrcp, n);
+      ASSERT_NOCARRY (mpn_rshift (bp, bsrcp, bsize, btwos));
+      bsize -= (ap[bsize-1] | bp[bsize-1]) == 0;      
     }
   else
-    {
-      ap = scratch;
-      bp = scratch + n;
-
-      MPN_COPY (ap, asrcp, n);
-      if (btwos > 0)
-	ASSERT_NOCARRY (mpn_rshift (bp, bsrcp, n, btwos));
-      else
-	MPN_COPY (bp, bsrcp, n);
-    }
+    MPN_COPY (bp, bsrcp, bsize);
 
-  res = mpn_jacobi_n (ap, bp, n, bits);
+  ASSERT (blow == bp[0]);
+  res = mpn_jacobi_n (ap, bp, bsize,
+		      mpn_jacobi_init (ap[0], blow, (result_bit1>>1) & 1));
 
   TMP_FREE;
   return res;
author	Niels Möller <nisse@lysator.liu.se>	2011-05-23 10:12:16 +0200
committer	Niels Möller <nisse@lysator.liu.se>	2011-05-23 10:12:16 +0200
commit	2124166fce28581fecf7c26b9c7929d529ee5ffb (patch)
tree	711f6efaa5b2536058ba005295cee018dd6dbfb7 /mpz/jacobi.c
parent	ea42d2b6a8a568b972d01390b5a7fa2ece1cbbea (diff)
download	gmp-2124166fce28581fecf7c26b9c7929d529ee5ffb.tar.gz