summaryrefslogtreecommitdiff
path: root/mpz/jacobi.c
blob: 1afbe2e23c7eb50fd9ebb71947f85d9805f8ead4 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
/* mpz_jacobi, mpz_legendre, mpz_kronecker -- mpz/mpz Jacobi symbols.

Copyright 2000, 2001 Free Software Foundation, Inc.

This file is part of the GNU MP Library.

The GNU MP Library is free software; you can redistribute it and/or modify
it under the terms of the GNU Library General Public License as published by
the Free Software Foundation; either version 2 of the License, or (at your
option) any later version.

The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
License for more details.

You should have received a copy of the GNU Library General Public License
along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
MA 02111-1307, USA. */

#include <stdio.h>
#include "gmp.h"
#include "gmp-impl.h"
#include "longlong.h"


/* Change this to "#define TRACE(x) x" for some traces. */
#define TRACE(x)


#define MPN_RSHIFT_OR_COPY(dst,src,size,shift)                  \
  do {                                                          \
    if ((shift) != 0)                                           \
      {                                                         \
        ASSERT_NOCARRY (mpn_rshift (dst, src, size, shift));    \
        (size) -= ((dst)[(size)-1] == 0);                       \
      }                                                         \
    else                                                        \
      MPN_COPY (dst, src, size);                                \
  } while (0)


/* This code does triple duty as mpz_jacobi, mpz_legendre and mpz_kronecker.

   mpz_jacobi could assume b is odd, but the improvements from that seem
   small compared to other operations, and anything significant should be
   checked at run-time since we'd like odd b to go fast in mpz_kronecker
   too.

   mpz_legendre could assume b is an odd prime, but knowing this doesn't
   present any obvious benefits.  Result 0 wouldn't arise (unless "a" is a
   multiple of b), but the checking for that takes little time compared to
   other operations.

   The main loop is just a simple binary GCD with the jacobi symbol result
   tracked during the reduction.

   The special cases for a or b fitting in one limb let mod_1 or modexact_1
   get used, without any copying, and end up just as efficient as the mixed
   precision mpz_kronecker_ui etc.

   When tdiv_qr is called it's not necessary to make "a" odd or make a
   working copy of it, but tdiv_qr is going to be pretty slow so it's not
   worth bothering trying to save anything for that case.

   Enhancements:

   mpn_bdivmod could be used instead of mpn_tdiv_qr, like in mpn_gcd.
   Currently tdiv_qr is preferred since it's sub-quadratic on big sizes,
   although bdivmod might be a touch quicker on small sizes.  This can be
   revised when bdivmod becomes sub-quadratic too.

   Some sort of multi-step algorithm should be used.  The current subtract
   and shift for every bit is very inefficient.  Lehmer (per current gcdext)
   would need some low bits included in its calculation to apply the sign
   change for reciprocity.  Binary Lehmer keeps low bits to strip twos
   anyway, so might be better suited.  Maybe the accelerated GCD style k-ary
   reduction would work, if sign changes due to the extra factors it
   introduces can be accounted for (or maybe they can be ignored).  */


/* This implementation depends on BITS_PER_MP_LIMB being even, so that
   (a/2)^BITS_PER_MP_LIMB = 1 and so there's no need to pay attention to how
   many low zero limbs are stripped.  */
#if BITS_PER_MP_LIMB % 2 != 0
Error, error, need BITS_PER_MP_LIMB even
#endif


int
mpz_jacobi (mpz_srcptr a, mpz_srcptr b)
{
  mp_srcptr  asrcp, bsrcp;
  mp_size_t  asize, bsize;
  mp_ptr     ap, bp;
  mp_limb_t  alow, blow, ahigh, bhigh, asecond, bsecond;
  unsigned   atwos, btwos;
  int        result_bit1;
  TMP_DECL (marker);

  TRACE (printf ("start asize=%d bsize=%d\n", SIZ(a), SIZ(b));
         mpz_trace (" a", a);
         mpz_trace (" b", b));

  asize = SIZ(a);
  asrcp = PTR(a);
  alow = asrcp[0];

  bsize = SIZ(b);
  if (bsize == 0)
    return JACOBI_LS0 (alow, asize);  /* (a/0) */

  bsrcp = PTR(b);
  blow = bsrcp[0];

  if (asize == 0)
    return JACOBI_0LS (blow, bsize);  /* (0/b) */

  /* (even/even)=0 */
  if (((alow | blow) & 1) == 0)
    return 0;

  /* account for effect of sign of b, then ignore it */
  result_bit1 = JACOBI_BSGN_SS_BIT1 (asize, bsize);
  bsize = ABS (bsize);

  /* low zero limbs on b can be discarded */
  MPN_STRIP_LOW_ZEROS_NOT_ZERO (bsrcp, bsize, blow);

  count_trailing_zeros (btwos, blow);
  TRACE (printf ("b twos %u\n", btwos));

  /* establish shifted blow */
  blow >>= btwos;
  if (bsize > 1)
    {
      bsecond = bsrcp[1];
      if (btwos != 0)
        blow |= bsecond << (BITS_PER_MP_LIMB-btwos);
    }

  /* account for effect of sign of a, then ignore it */
  result_bit1 ^= JACOBI_ASGN_SU_BIT1 (asize, blow);
  asize = ABS (asize);
          
  if (bsize == 1 || (bsize == 2 && (bsecond >> btwos) == 0))
    {
      /* special case one limb b, use modexact and no copying */

      /* (a/2)=(2/a) with a odd, and if b is even then a is odd here */
      result_bit1 ^= JACOBI_TWOS_U_BIT1 (btwos, alow);

      if (blow == 1)   /* (a/1)=1 always */
        return JACOBI_BIT1_TO_PN (result_bit1);
      
      JACOBI_MOD_OR_MODEXACT_1_ODD (result_bit1, alow, asrcp, asize, blow);
      TRACE (printf ("base (%lu/%lu) with %d\n",
                     alow, blow, JACOBI_BIT1_TO_PN (result_bit1)));
      return mpn_jacobi_base (alow, blow, result_bit1);
    }

  /* Discard low zero limbs of a.  Usually there won't be anything to
     strip, hence not bothering with it for the bsize==1 case.  */
  MPN_STRIP_LOW_ZEROS_NOT_ZERO (asrcp, asize, alow);

  count_trailing_zeros (atwos, alow);
  TRACE (printf ("a twos %u\n", atwos));
  result_bit1 ^= JACOBI_TWOS_U_BIT1 (atwos, blow);

  /* establish shifted alow */
  alow >>= atwos;
  if (asize > 1)
    {
      asecond = asrcp[1];
      if (atwos != 0)
        alow |= asecond << (BITS_PER_MP_LIMB-atwos);
    }
  
  /* (a/2)=(2/a) with a odd */
  result_bit1 ^= JACOBI_TWOS_U_BIT1 (btwos, alow);

  if (asize == 1 || (asize == 2 && (asecond >> atwos) == 0))
    {
      /* another special case with modexact and no copying */

      if (alow == 1)  /* (1/b)=1 always */
        return JACOBI_BIT1_TO_PN (result_bit1);

      /* b still has its twos, so cancel out their effect */
      result_bit1 ^= JACOBI_TWOS_U_BIT1 (btwos, alow);
      
      result_bit1 ^= JACOBI_RECIP_UU_BIT1 (alow, blow);  /* now (b/a) */
      JACOBI_MOD_OR_MODEXACT_1_ODD (result_bit1, blow, bsrcp, bsize, alow);
      TRACE (printf ("base (%lu/%lu) with %d\n",
                     blow, alow, JACOBI_BIT1_TO_PN (result_bit1)));
      return mpn_jacobi_base (blow, alow, result_bit1);
    }


  TMP_MARK (marker);
  TMP_ALLOC_LIMBS_2 (ap, asize, bp, bsize);

  MPN_RSHIFT_OR_COPY (ap, asrcp, asize, atwos);
  ASSERT (alow == ap[0]);
  TRACE (mpn_trace ("stripped a", ap, asize));

  MPN_RSHIFT_OR_COPY (bp, bsrcp, bsize, btwos);
  ASSERT (blow == bp[0]);
  TRACE (mpn_trace ("stripped b", bp, bsize));

  /* swap if necessary to make a longer than b */
  if (asize < bsize)
    {
      TRACE (printf ("swap\n"));
      MPN_PTR_SWAP (ap,asize, bp,bsize);
      MP_LIMB_T_SWAP (alow, blow);
      result_bit1 ^= JACOBI_RECIP_UU_BIT1 (alow, blow);
    }

  /* If a is bigger than b then reduce to a mod b.
     Division is much faster than chipping away at "a" bit-by-bit. */
  if (asize > bsize)
    {
      mp_ptr  rp, qp;

      TRACE (printf ("tdiv_qr asize=%ld bsize=%ld\n", asize, bsize));

      TMP_ALLOC_LIMBS_2 (rp, bsize, qp, asize-bsize+1);
      mpn_tdiv_qr (qp, rp, 0, ap, asize, bp, bsize);
      ap = rp;
      asize = bsize;
      MPN_NORMALIZE (ap, asize);

      TRACE (printf ("tdiv_qr asize=%ld bsize=%ld\n", asize, bsize);
             mpn_trace (" a", ap, asize);
             mpn_trace (" b", bp, bsize));

      if (asize == 0)  /* (0/b)=0 for b!=1 */
        goto zero;

      alow = ap[0];
      goto strip_a;
    }

  for (;;)
    {
      ASSERT (asize >= 1);         /* a,b non-empty */
      ASSERT (bsize >= 1);
      ASSERT (ap[asize-1] != 0);   /* a,b normalized (and hence non-zero) */
      ASSERT (bp[bsize-1] != 0);
      ASSERT (alow == ap[0]);      /* low limb copies should be correct */
      ASSERT (blow == bp[0]);
      ASSERT (alow & 1);           /* a,b odd */
      ASSERT (blow & 1);

      TRACE (printf ("top asize=%ld bsize=%ld\n", asize, bsize);
             mpn_trace (" a", ap, asize);
             mpn_trace (" b", bp, bsize));

      /* swap if necessary to make a>=b, applying reciprocity
         high limbs are almost always enough to tell which is bigger */
      if (asize < bsize
          || (asize == bsize
              && ((ahigh=ap[asize-1]) < (bhigh=bp[asize-1])
                  || (ahigh == bhigh
                      && mpn_cmp (ap, bp, asize-1) < 0))))
        {
          TRACE (printf ("swap\n"));
          MPN_PTR_SWAP (ap,asize, bp,bsize);
          MP_LIMB_T_SWAP (alow, blow);
          result_bit1 ^= JACOBI_RECIP_UU_BIT1 (alow, blow);
        }

      if (asize == 1)
        break;

      /* a = a-b */
      ASSERT (asize >= bsize);
      ASSERT_NOCARRY (mpn_sub (ap, ap, asize, bp, bsize));
      MPN_NORMALIZE (ap, asize);
      alow = ap[0];

      /* (0/b)=0 for b!=1.  b!=1 when a==0 because otherwise would have had
         a==1 which is asize==1 and would have exited above.  */
      if (asize == 0)
        goto zero;

    strip_a:
      /* low zero limbs on a can be discarded */
      MPN_STRIP_LOW_ZEROS_NOT_ZERO (ap, asize, alow);

      if ((alow & 1) == 0)
        {
          /* factors of 2 from a */
          unsigned  twos;
          count_trailing_zeros (twos, alow);
          TRACE (printf ("twos %u\n", twos));
          result_bit1 ^= JACOBI_TWOS_U_BIT1 (twos, blow);
          ASSERT_NOCARRY (mpn_rshift (ap, ap, asize, twos));
          asize -= (ap[asize-1] == 0);
          alow = ap[0];
        }
    }

  ASSERT (asize == 1 && bsize == 1);  /* just alow and blow left */
  TMP_FREE (marker);

  /* (1/b)=1 always (in this case have b==1 because a>=b) */
  if (alow == 1)
    return JACOBI_BIT1_TO_PN (result_bit1);

  /* swap with reciprocity and do (b/a) */
  result_bit1 ^= JACOBI_RECIP_UU_BIT1 (alow, blow);
  TRACE (printf ("base (%lu/%lu) with %d\n",
                 blow, alow, JACOBI_BIT1_TO_PN (result_bit1)));
  return mpn_jacobi_base (blow, alow, result_bit1);

 zero:
  TMP_FREE (marker);
  return 0;
}