summaryrefslogtreecommitdiff
path: root/mpz
diff options
context:
space:
mode:
authorMarco Bodrato <bodrato@mail.dm.unipi.it>2020-03-20 12:07:54 +0100
committerMarco Bodrato <bodrato@mail.dm.unipi.it>2020-03-20 12:07:54 +0100
commit3fc1d2b6e65abbdb21a90ed05cf452e5d1576413 (patch)
treea1cee1663a09237adf7d2a1da5721da43b7002da /mpz
parent3737a65526f080d5ec31f32f50196a9cf4a8471c (diff)
downloadgmp-3fc1d2b6e65abbdb21a90ed05cf452e5d1576413.tar.gz
mpz/nextprime.c: Speed-up for large primes, using a sieve. (By Seth Troisi)
Diffstat (limited to 'mpz')
-rw-r--r--mpz/nextprime.c209
1 files changed, 158 insertions, 51 deletions
diff --git a/mpz/nextprime.c b/mpz/nextprime.c
index 1a0b5b6b0..92377fe82 100644
--- a/mpz/nextprime.c
+++ b/mpz/nextprime.c
@@ -30,33 +30,108 @@ You should have received copies of the GNU General Public License and the
GNU Lesser General Public License along with the GNU MP Library. If not,
see https://www.gnu.org/licenses/. */
+#include <string.h>
+
#include "gmp-impl.h"
#include "longlong.h"
-static const unsigned char primegap[] =
+/*********************************************************/
+/* Section sieve: sieving functions and tools for primes */
+/*********************************************************/
+
+static mp_limb_t
+id_to_n (mp_limb_t id) { return id*3+1+(id&1); }
+
+static mp_limb_t
+n_to_bit (mp_limb_t n) { return ((n-5)|1)/3U; }
+
+static mp_size_t
+primesieve_size (mp_limb_t n) { return n_to_bit(n) / GMP_LIMB_BITS + 1; }
+
+
+/************************************/
+/* Section macros: macros for sieve */
+/************************************/
+
+#define LOOP_ON_SIEVE_BEGIN(prime,start,end,sieve) \
+ do { \
+ mp_limb_t __mask, __index, __max_i, __i; \
+ __i = (start); \
+ __index = __i / GMP_LIMB_BITS; \
+ __mask = CNST_LIMB(1) << (__i % GMP_LIMB_BITS); \
+ __max_i = (end); \
+ do { \
+ ++__i; \
+ if (((sieve)[__index] & __mask) == 0) \
+ { \
+ mp_limb_t prime = id_to_n(__i) \
+
+#define LOOP_ON_SIEVE_END \
+ } \
+ __mask = __mask << 1 | __mask >> (GMP_LIMB_BITS-1); \
+ __index += __mask & 1; \
+ } while (__i <= __max_i); \
+ } while (0)
+
+
+
+static const unsigned char primegap_small[] =
{
2,2,4,2,4,2,4,6,2,6,4,2,4,6,6,2,6,4,2,6,4,6,8,4,2,4,2,4,14,4,6,
2,10,2,6,6,4,6,6,2,10,2,4,2,12,12,4,2,4,6,2,10,6,6,6,2,6,4,2,10,14,4,2,
4,14,6,10,2,4,6,8,6,6,4,6,8,4,8,10,2,10,2,6,4,6,8,4,2,4,12,8,4,8,4,6,
- 12,2,18,6,10,6,6,2,6,10,6,6,2,6,6,4,2,12,10,2,4,6,6,2,12,4,6,8,10,8,10,8,
- 6,6,4,8,6,4,8,4,14,10,12,2,10,2,4,2,10,14,4,2,4,14,4,2,4,20,4,8,10,8,4,6,
- 6,14,4,6,6,8,6,12
+ 12,2,18,6,10
};
-#define NUMBER_OF_PRIMES 167
+#define NUMBER_OF_PRIMES 100
+
+long calculate_sievelimit(mp_bitcnt_t nbits) {
+ long sieve_limit;
+
+ // Estimate a good sieve bound. Based on derivative of
+ // Merten's 3rd theorem * avg gap * cost of mod
+ // vs
+ // Cost of PRP test O(N^2.55)
+ if (nbits < 12800)
+ {
+ mpz_t tmp;
+ // sieve_limit ~= nbits ^ (5/2) / 124
+ mpz_init (tmp);
+ mpz_ui_pow_ui (tmp, nbits, 5);
+ mpz_sqrt (tmp, tmp);
+ mpz_tdiv_q_ui(tmp, tmp, 124);
+
+ // TODO: Storing gap/2 would allow this to go far beyond 436M.
+ sieve_limit = mpz_get_ui(tmp);
+ mpz_clear (tmp);
+ }
+ else
+ {
+ /* Larger threshold is faster but takes ~O(n/ln(n)) memory.
+ * For 33,000 bits limitting to 150M is ~12% slower than using the
+ * optimal 1.5B sieve_limit.
+ */
+ sieve_limit = 150000000;
+ }
+
+ ASSERT( 1000 < sieve_limit && sieve_limit <= 150000000 );
+ return sieve_limit;
+}
void
mpz_nextprime (mpz_ptr p, mpz_srcptr n)
{
- unsigned short *moduli;
- unsigned long difference;
- int i;
+ char *composite;
+ const unsigned char *primegap;
unsigned prime_limit;
- unsigned long prime;
+ unsigned prime;
mp_size_t pn;
mp_bitcnt_t nbits;
+ int i, m;
unsigned incr;
- TMP_SDECL;
+ unsigned odds_in_composite_sieve;
+ unsigned long difference;
+ TMP_DECL;
/* First handle tiny numbers */
if (mpz_cmp_ui (n, 2) < 0)
@@ -70,59 +145,91 @@ mpz_nextprime (mpz_ptr p, mpz_srcptr n)
if (mpz_cmp_ui (p, 7) <= 0)
return;
+ TMP_MARK;
pn = SIZ(p);
MPN_SIZEINBASE_2EXP(nbits, PTR(p), pn, 1);
- if (nbits / 2 >= NUMBER_OF_PRIMES)
- prime_limit = NUMBER_OF_PRIMES - 1;
+ if (nbits / 2 <= NUMBER_OF_PRIMES)
+ {
+ primegap = primegap_small;
+ prime_limit = nbits / 2;
+ }
else
- prime_limit = nbits / 2;
+ {
+ long sieve_limit;
+ mp_limb_t *sieve;
+ unsigned char *primegap_tmp;
+ int last_prime;
+
+ /* sieve numbers up to sieve_limit and save prime count */
+ sieve_limit = calculate_sievelimit(nbits);
+ sieve = TMP_ALLOC_LIMBS (primesieve_size (sieve_limit));
+ prime_limit = gmp_primesieve(sieve, sieve_limit);
+
+ /* Needed to avoid assignment of read-only location */
+ primegap_tmp = TMP_ALLOC_TYPE (prime_limit, unsigned char);
+ primegap = primegap_tmp;
+
+ i = 0;
+ last_prime = 3;
+ LOOP_ON_SIEVE_BEGIN (prime, n_to_bit (5), n_to_bit (sieve_limit), sieve);
+ primegap_tmp[i++] = prime - last_prime;
+ last_prime = prime;
+ LOOP_ON_SIEVE_END;
+
+ /* Both primesieve and prime_limit ignore the first two primes. */
+ ASSERT(i == prime_limit);
+ }
- TMP_SMARK;
+ if (nbits <= 32)
+ odds_in_composite_sieve = 336 / 2;
+ else if (nbits <= 64)
+ odds_in_composite_sieve = 1550 / 2;
+ else
+ /* Corresponds to a merit 14 prime_gap, which is rare. */
+ odds_in_composite_sieve = 5 * nbits;
- /* Compute residues modulo small odd primes */
- moduli = TMP_SALLOC_TYPE (prime_limit, unsigned short);
+ /* composite[2*i] stores if p+2*i is a known composite */
+ composite = TMP_SALLOC_TYPE (odds_in_composite_sieve, char);
+ difference = 0;
for (;;)
{
- /* FIXME: Compute lazily? */
+ memset (composite, 0, odds_in_composite_sieve);
prime = 3;
for (i = 0; i < prime_limit; i++)
- {
- moduli[i] = mpz_tdiv_ui (p, prime);
- prime += primegap[i];
- }
-
-#define INCR_LIMIT 0x10000 /* deep science */
-
- for (difference = incr = 0; incr < INCR_LIMIT; difference += 2)
- {
- /* First check residues */
- prime = 3;
- for (i = 0; i < prime_limit; i++)
- {
- unsigned r;
- /* FIXME: Reduce moduli + incr and store back, to allow for
- division-free reductions. Alternatively, table primes[]'s
- inverses (mod 2^16). */
- r = (moduli[i] + incr) % prime;
- prime += primegap[i];
-
- if (r == 0)
- goto next;
- }
-
- mpz_add_ui (p, p, difference);
- difference = 0;
-
- /* Miller-Rabin test */
- if (mpz_millerrabin (p, 25))
- goto done;
- next:;
- incr += 2;
- }
+ {
+ m = mpz_cdiv_ui(p, prime);
+ /* Only care about odd multiplies of prime. */
+ if (m & 1)
+ m += prime;
+ m >>= 1;
+
+ /* Mark off any composites in sieve */
+ for (; m < odds_in_composite_sieve; m += prime)
+ composite[m] = 1;
+ prime += primegap[i];
+ }
+
+ for (incr = 0; incr < odds_in_composite_sieve; difference += 2, incr += 1)
+ {
+ if (composite[incr])
+ continue;
+
+ mpz_add_ui (p, p, difference);
+ difference = 0;
+
+ /* Miller-Rabin test */
+ if (mpz_millerrabin (p, 25))
+ goto done;
+ }
+
+ /* Sieve next segment, very rare */
mpz_add_ui (p, p, difference);
difference = 0;
}
done:
- TMP_SFREE;
+ TMP_FREE;
}
+
+#undef LOOP_ON_SIEVE_END
+#undef LOOP_ON_SIEVE_BEGIN