diff options
-rw-r--r-- | Makefile.am | 16 | ||||
-rw-r--r-- | configure.in | 1 | ||||
-rw-r--r-- | gen-trialdivtab.c | 295 | ||||
-rw-r--r-- | gmp-impl.h | 5 | ||||
-rw-r--r-- | mpn/generic/trialdiv.c | 104 |
5 files changed, 421 insertions, 0 deletions
diff --git a/Makefile.am b/Makefile.am index 04ccfbada..063fd06e9 100644 --- a/Makefile.am +++ b/Makefile.am @@ -384,6 +384,22 @@ gen-bases_.c: gen-bases.c $(ANSI2KNR) $(CPP_FOR_BUILD) `if test -f $(srcdir)/gen-bases.c; then echo $(srcdir)/gen-bases.c; else echo gen-bases.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > gen-bases_.c || rm -f gen-bases_.c + +trialdivtab.h: gen-trialdivtab$(EXEEXT_FOR_BUILD) + ./gen-trialdivtab $(BITS_PER_MP_LIMB) 2000 >trialdivtab.h || (rm -f trialdivtab.h; exit 1) +BUILT_SOURCES += trialdivtab.h + +gen-trialdivtab$(EXEEXT_FOR_BUILD): gen-trialdivtab$(U_FOR_BUILD).c dumbmp.c + $(CC_FOR_BUILD) `test -f 'gen-trialdivtab$(U_FOR_BUILD).c' || echo '$(srcdir)/'`gen-trialdivtab$(U_FOR_BUILD).c -o gen-trialdivtab$(EXEEXT_FOR_BUILD) $(LIBM_FOR_BUILD) +DISTCLEANFILES += gen-trialdivtab$(EXEEXT_FOR_BUILD) +EXTRA_DIST += gen-trialdivtab.c + +gen-trialdivtab_.c: gen-trialdivtab.c $(ANSI2KNR) + $(CPP_FOR_BUILD) `if test -f $(srcdir)/gen-trialdivtab.c; then echo $(srcdir)/gen-trialdivtab.c; else echo gen-trialdivtab.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > gen-trialdivtab_.c || rm -f gen-trialdivtab_.c + + + + mpn/perfsqr.h: gen-psqr$(EXEEXT_FOR_BUILD) ./gen-psqr $(BITS_PER_MP_LIMB) $(GMP_NAIL_BITS) >mpn/perfsqr.h || (rm -f mpn/perfsqr.h; exit 1) BUILT_SOURCES += mpn/perfsqr.h diff --git a/configure.in b/configure.in index 2cb3f7910..2e6f2a3ab 100644 --- a/configure.in +++ b/configure.in @@ -2449,6 +2449,7 @@ gmp_mpn_functions="$extra_functions \ bdiv_q_1 \ sb_bdiv_q sb_bdiv_qr dc_bdiv_q dc_bdiv_qr mu_bdiv_q mu_bdiv_qr \ divexact bdiv_dbm1c redc_1 redc_2 powm powlo powm_sec subcnd_n \ + trialdiv \ $gmp_mpn_functions_optional" define(GMP_MULFUNC_CHOICES, diff --git a/gen-trialdivtab.c b/gen-trialdivtab.c new file mode 100644 index 000000000..0a265046a --- /dev/null +++ b/gen-trialdivtab.c @@ -0,0 +1,295 @@ +/* gen-trialdivtab.c + +Copyright 2009 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ + +/* + Generate tables for fast, division-free trial division for GMP. + + There is one main table, ptab. It contains primes, multiplied together, and + several types of pre-computed inverses. It refers to tables of the type + dtab, via the last two indices. That table contains the individual primes in + the range, except that the primes are not actually included in the table (see + the P macro; it sneakingly excludes the primes themselves). Instead, the + dtab tables contains tuples for each prime (modular-inverse, limit) used for + divisibility checks. + + This interface is not intended for division of very many primes, since then + other algorithms apply. +*/ + +#include <stdlib.h> +#include <stdio.h> +#include "dumbmp.c" + +int sumspills (mpz_t, mpz_t *, int); +void mpn_mod_1s_4p_cps (mpz_t [7], mpz_t); + +int limb_bits; + +mpz_t B; + +int +main (int argc, char *argv[]) +{ + unsigned long t, p; + mpz_t ppp, acc, inv, gmp_numb_max, tmp, Bhalf; + mpz_t pre[7]; + int i; + int start_p, end_p, interval_start, interval_end, omitted_p; + char *endtok; + int stop; + int np, start_idx; + + if (argc < 2) + { + fprintf (stderr, "usage: %s bits endprime\n", argv[0]); + exit (1); + } + + limb_bits = atoi (argv[1]); + + end_p = 1290; /* default end prime */ + if (argc == 3) + end_p = atoi (argv[2]); + + printf ("#if GMP_LIMB_BITS != %d\n", limb_bits); + printf ("#error This table is for GMP_LIMB_BITS = %d\n", limb_bits); + printf ("#endif\n\n"); + + printf ("#if GMP_NAIL_BITS != 0\n"); + printf ("#error This table does not support nails\n"); + printf ("#endif\n\n"); + + for (i = 0; i < 7; i++) + mpz_init (pre[i]); + + mpz_init_set_ui (gmp_numb_max, 1); + mpz_mul_2exp (gmp_numb_max, gmp_numb_max, limb_bits); + mpz_sub_ui (gmp_numb_max, gmp_numb_max, 1); + + mpz_init (tmp); + mpz_init (inv); + + mpz_init_set_ui (B, 1); mpz_mul_2exp (B, B, limb_bits); + mpz_init_set_ui (Bhalf, 1); mpz_mul_2exp (Bhalf, Bhalf, limb_bits - 1); + + start_p = 3; + + mpz_init_set_ui (ppp, 1); + mpz_init (acc); + interval_start = start_p; + omitted_p = 3; + interval_end = 0; + + printf ("static struct gmp_primes_dtab gmp_primes_dtab[] = {\n"); + + for (t = start_p; t <= end_p; t += 2) + { + if (! isprime (t)) + continue; + + mpz_mul_ui (acc, ppp, t); + stop = mpz_cmp (acc, Bhalf) >= 0; + if (!stop) + { + mpn_mod_1s_4p_cps (pre, acc); + stop = sumspills (acc, pre + 2, 5); + } + + if (stop) + { + for (p = interval_start; p <= interval_end; p += 2) + { + if (! isprime (p)) + continue; + + printf (" P(%d,", (int) p); + mpz_invert_ui_2exp (inv, p, limb_bits); + printf ("0x"); mpz_out_str (stdout, 16, inv); printf (","); + + mpz_tdiv_q_ui (tmp, gmp_numb_max, p); + printf ("0x"); mpz_out_str (stdout, 16, tmp); + printf ("),\n"); + } + mpz_set_ui (ppp, t); + interval_start = t; + omitted_p = t; + } + else + { + mpz_set (ppp, acc); + } + interval_end = t; + } + printf (" P(0,0,0)\n};\n"); + + + printf ("static struct gmp_primes_ptab gmp_primes_ptab[] = {\n"); + + endtok = ""; + + mpz_set_ui (ppp, 1); + interval_start = start_p; + interval_end = 0; + np = 0; + start_idx = 0; + for (t = start_p; t <= end_p; t += 2) + { + if (! isprime (t)) + continue; + + mpz_mul_ui (acc, ppp, t); + + stop = mpz_cmp (acc, Bhalf) >= 0; + if (!stop) + { + mpn_mod_1s_4p_cps (pre, acc); + stop = sumspills (acc, pre + 2, 5); + } + + if (stop) + { + mpn_mod_1s_4p_cps (pre, ppp); + printf ("%s", endtok); + printf (" {0x"); mpz_out_str (stdout, 16, ppp); + printf (",{0x"); mpz_out_str (stdout, 16, pre[0]); + printf (",%d", (int) PTR(pre[1])[0]); + for (i = 0; i < 5; i++) + { + printf (","); + printf ("0x"); mpz_out_str (stdout, 16, pre[2 + i]); + } + printf ("},"); + printf ("%d,", start_idx); + printf ("%d}", np - start_idx); + + endtok = ",\n"; + mpz_set_ui (ppp, t); + interval_start = t; + start_idx = np; + } + else + { + mpz_set (ppp, acc); + } + interval_end = t; + np++; + } + printf ("\n};\n"); + + printf ("#define SMALLEST_OMITTED_PRIME %d\n", (int) omitted_p); + + return 0; +} + +unsigned long +mpz_log2 (mpz_t x) +{ + mpz_t y; + unsigned long cnt; + + mpz_init (y); + mpz_set (y, x); + cnt = 0; + while (mpz_sgn (y) != 0) + { + mpz_tdiv_q_2exp (y, y, 1); + cnt++; + } + mpz_clear (y); + + return cnt; +} + +void +mpn_mod_1s_4p_cps (mpz_t cps[7], mpz_t bparm) +{ + mpz_t b, bi; + mpz_t B1modb, B2modb, B3modb, B4modb, B5modb; + mpz_t t; + int cnt; + + mpz_init_set (b, bparm); + + cnt = limb_bits - mpz_log2 (b); + + mpz_init (bi); + mpz_init (t); + mpz_init (B1modb); + mpz_init (B2modb); + mpz_init (B3modb); + mpz_init (B4modb); + mpz_init (B5modb); + + mpz_set_ui (t, 1); + mpz_mul_2exp (t, t, limb_bits - cnt); + mpz_sub (t, t, b); + mpz_mul_2exp (t, t, limb_bits); + mpz_tdiv_q (bi, t, b); /* bi = B^2/b */ + + mpz_set_ui (t, 1); + mpz_mul_2exp (t, t, limb_bits); /* t = B */ + mpz_tdiv_r (B1modb, t, b); + + mpz_mul_2exp (t, B1modb, limb_bits); + mpz_tdiv_r (B2modb, t, b); + + mpz_mul_2exp (t, B2modb, limb_bits); + mpz_tdiv_r (B3modb, t, b); + + mpz_mul_2exp (t, B3modb, limb_bits); + mpz_tdiv_r (B4modb, t, b); + + mpz_mul_2exp (t, B4modb, limb_bits); + mpz_tdiv_r (B5modb, t, b); + + mpz_set (cps[0], bi); + mpz_set_ui (cps[1], cnt); + mpz_tdiv_q_2exp (cps[2], B1modb, 0); + mpz_tdiv_q_2exp (cps[3], B2modb, 0); + mpz_tdiv_q_2exp (cps[4], B3modb, 0); + mpz_tdiv_q_2exp (cps[5], B4modb, 0); + mpz_tdiv_q_2exp (cps[6], B5modb, 0); + + mpz_clear (b); + mpz_clear (bi); + mpz_clear (t); + mpz_clear (B1modb); + mpz_clear (B2modb); + mpz_clear (B3modb); + mpz_clear (B4modb); + mpz_clear (B5modb); +} + +int +sumspills (mpz_t ppp, mpz_t *a, int n) +{ + mpz_t s; + int i, ret; + + mpz_init_set (s, a[0]); + + for (i = 1; i < n; i++) + { + mpz_add (s, s, a[i]); + } + ret = mpz_cmp (s, B) >= 0; + mpz_clear (s); + + return ret; +} diff --git a/gmp-impl.h b/gmp-impl.h index 39ff9005f..5f33af892 100644 --- a/gmp-impl.h +++ b/gmp-impl.h @@ -2695,6 +2695,11 @@ __GMP_DECLSPEC mp_limb_t mpn_modexact_1_odd __GMP_PROTO ((mp_srcptr, mp_size_t, ? mpn_modexact_1_odd (src, size, divisor) \ : mpn_mod_1 (src, size, divisor)) +#ifndef mpn_trialdiv /* if not done with cpuvec in a fat binary */ +#define mpn_trialdiv __MPN(trialdiv) +__GMP_DECLSPEC mp_limb_t mpn_trialdiv __GMP_PROTO ((mp_srcptr, mp_size_t, mp_size_t, int *)); +#endif + /* binvert_limb() sets inv to the multiplicative inverse of n modulo 2^GMP_NUMB_BITS, ie. satisfying inv*n == 1 mod 2^GMP_NUMB_BITS. diff --git a/mpn/generic/trialdiv.c b/mpn/generic/trialdiv.c new file mode 100644 index 000000000..02c376441 --- /dev/null +++ b/mpn/generic/trialdiv.c @@ -0,0 +1,104 @@ +/* mpn_trialdiv + +Copyright 2009 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ + +/* + Fast, division-free trial division for GMP. + + This function will find the first (smallest) factor represented in + trialdivtab.h. It does not stop the factoring effort just because it has + reached some sensible limit, such as the square root of the input number. + + The caller can limit the factoring effort by passing NPRIMES. The function + well then divide to *at least* that limit. A position which only + mpn_trialdiv can make sense of is returned in the WHERE parameter. It can + be used for restarting the factoring effort; the first call should pass 0 + here. +*/ + +#include "gmp.h" +#include "gmp-impl.h" + +struct gmp_primes_dtab { + mp_limb_t binv; + mp_limb_t lim; +}; + +struct gmp_primes_ptab { + mp_limb_t ppp; /* primes, multiplied together */ + mp_limb_t cps[7]; /* ppp values pre-computed for mpn_mod_1s_4p */ + unsigned int idx:24; /* index of first primes in dtab */ + unsigned int np :8; /* number of primes related to this entry */ +}; + +#define P(p,inv,lim) {inv,lim} + +#include "trialdivtab.h" + +#define PTAB_LINES (sizeof (gmp_primes_ptab) / sizeof (gmp_primes_ptab[0])) + +/* Attempt to find a factor of T using trial division. + Input: A non-negative number T. + Output: non-zero if we found a factor, zero otherwise. To get the actual + prime factor, compute the mod B inverse of the return value. */ +/* FIXME: We could optimize out one of the outer loop conditions if we + had a final ptab entry with a huge nd field. */ +mp_limb_t +mpn_trialdiv (mp_srcptr tp, mp_size_t tn, mp_size_t nprimes, int *where) +{ + mp_limb_t ppp; + mp_limb_t *cps; + struct gmp_primes_dtab *dp; + long i, j, idx, np; + mp_limb_t r, q; + + ASSERT (tn >= 1); + + for (i = *where; i < PTAB_LINES; i++) + { + ppp = gmp_primes_ptab[i].ppp; + cps = gmp_primes_ptab[i].cps; + +#if __GNU_MP_VERSION == 4 && __GNU_MP_VERSION_MINOR < 4 + if (tn < 4) + r = mpn_mod_1 (tp, tn, ppp); /* FIXME */ + else +#endif + r = mpn_mod_1s_4p (tp, tn, ppp << cps[1], cps); + + idx = gmp_primes_ptab[i].idx; + np = gmp_primes_ptab[i].np; + + /* Check divisibility by individual primes. */ + dp = &gmp_primes_dtab[idx] + np; + for (j = -np; j < 0; j++) + { + q = r * dp[j].binv; + if (q <= dp[j].lim) + { + *where = i; + return dp[j].binv; + } + } + + nprimes -= np; + if (nprimes <= 0) + return 0; + } + return 0; +} |