summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile.am16
-rw-r--r--configure.in1
-rw-r--r--gen-trialdivtab.c295
-rw-r--r--gmp-impl.h5
-rw-r--r--mpn/generic/trialdiv.c104
5 files changed, 421 insertions, 0 deletions
diff --git a/Makefile.am b/Makefile.am
index 04ccfbada..063fd06e9 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -384,6 +384,22 @@ gen-bases_.c: gen-bases.c $(ANSI2KNR)
$(CPP_FOR_BUILD) `if test -f $(srcdir)/gen-bases.c; then echo $(srcdir)/gen-bases.c; else echo gen-bases.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > gen-bases_.c || rm -f gen-bases_.c
+
+trialdivtab.h: gen-trialdivtab$(EXEEXT_FOR_BUILD)
+ ./gen-trialdivtab $(BITS_PER_MP_LIMB) 2000 >trialdivtab.h || (rm -f trialdivtab.h; exit 1)
+BUILT_SOURCES += trialdivtab.h
+
+gen-trialdivtab$(EXEEXT_FOR_BUILD): gen-trialdivtab$(U_FOR_BUILD).c dumbmp.c
+ $(CC_FOR_BUILD) `test -f 'gen-trialdivtab$(U_FOR_BUILD).c' || echo '$(srcdir)/'`gen-trialdivtab$(U_FOR_BUILD).c -o gen-trialdivtab$(EXEEXT_FOR_BUILD) $(LIBM_FOR_BUILD)
+DISTCLEANFILES += gen-trialdivtab$(EXEEXT_FOR_BUILD)
+EXTRA_DIST += gen-trialdivtab.c
+
+gen-trialdivtab_.c: gen-trialdivtab.c $(ANSI2KNR)
+ $(CPP_FOR_BUILD) `if test -f $(srcdir)/gen-trialdivtab.c; then echo $(srcdir)/gen-trialdivtab.c; else echo gen-trialdivtab.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > gen-trialdivtab_.c || rm -f gen-trialdivtab_.c
+
+
+
+
mpn/perfsqr.h: gen-psqr$(EXEEXT_FOR_BUILD)
./gen-psqr $(BITS_PER_MP_LIMB) $(GMP_NAIL_BITS) >mpn/perfsqr.h || (rm -f mpn/perfsqr.h; exit 1)
BUILT_SOURCES += mpn/perfsqr.h
diff --git a/configure.in b/configure.in
index 2cb3f7910..2e6f2a3ab 100644
--- a/configure.in
+++ b/configure.in
@@ -2449,6 +2449,7 @@ gmp_mpn_functions="$extra_functions \
bdiv_q_1 \
sb_bdiv_q sb_bdiv_qr dc_bdiv_q dc_bdiv_qr mu_bdiv_q mu_bdiv_qr \
divexact bdiv_dbm1c redc_1 redc_2 powm powlo powm_sec subcnd_n \
+ trialdiv \
$gmp_mpn_functions_optional"
define(GMP_MULFUNC_CHOICES,
diff --git a/gen-trialdivtab.c b/gen-trialdivtab.c
new file mode 100644
index 000000000..0a265046a
--- /dev/null
+++ b/gen-trialdivtab.c
@@ -0,0 +1,295 @@
+/* gen-trialdivtab.c
+
+Copyright 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+/*
+ Generate tables for fast, division-free trial division for GMP.
+
+ There is one main table, ptab. It contains primes, multiplied together, and
+ several types of pre-computed inverses. It refers to tables of the type
+ dtab, via the last two indices. That table contains the individual primes in
+ the range, except that the primes are not actually included in the table (see
+ the P macro; it sneakingly excludes the primes themselves). Instead, the
+ dtab tables contains tuples for each prime (modular-inverse, limit) used for
+ divisibility checks.
+
+ This interface is not intended for division of very many primes, since then
+ other algorithms apply.
+*/
+
+#include <stdlib.h>
+#include <stdio.h>
+#include "dumbmp.c"
+
+int sumspills (mpz_t, mpz_t *, int);
+void mpn_mod_1s_4p_cps (mpz_t [7], mpz_t);
+
+int limb_bits;
+
+mpz_t B;
+
+int
+main (int argc, char *argv[])
+{
+ unsigned long t, p;
+ mpz_t ppp, acc, inv, gmp_numb_max, tmp, Bhalf;
+ mpz_t pre[7];
+ int i;
+ int start_p, end_p, interval_start, interval_end, omitted_p;
+ char *endtok;
+ int stop;
+ int np, start_idx;
+
+ if (argc < 2)
+ {
+ fprintf (stderr, "usage: %s bits endprime\n", argv[0]);
+ exit (1);
+ }
+
+ limb_bits = atoi (argv[1]);
+
+ end_p = 1290; /* default end prime */
+ if (argc == 3)
+ end_p = atoi (argv[2]);
+
+ printf ("#if GMP_LIMB_BITS != %d\n", limb_bits);
+ printf ("#error This table is for GMP_LIMB_BITS = %d\n", limb_bits);
+ printf ("#endif\n\n");
+
+ printf ("#if GMP_NAIL_BITS != 0\n");
+ printf ("#error This table does not support nails\n");
+ printf ("#endif\n\n");
+
+ for (i = 0; i < 7; i++)
+ mpz_init (pre[i]);
+
+ mpz_init_set_ui (gmp_numb_max, 1);
+ mpz_mul_2exp (gmp_numb_max, gmp_numb_max, limb_bits);
+ mpz_sub_ui (gmp_numb_max, gmp_numb_max, 1);
+
+ mpz_init (tmp);
+ mpz_init (inv);
+
+ mpz_init_set_ui (B, 1); mpz_mul_2exp (B, B, limb_bits);
+ mpz_init_set_ui (Bhalf, 1); mpz_mul_2exp (Bhalf, Bhalf, limb_bits - 1);
+
+ start_p = 3;
+
+ mpz_init_set_ui (ppp, 1);
+ mpz_init (acc);
+ interval_start = start_p;
+ omitted_p = 3;
+ interval_end = 0;
+
+ printf ("static struct gmp_primes_dtab gmp_primes_dtab[] = {\n");
+
+ for (t = start_p; t <= end_p; t += 2)
+ {
+ if (! isprime (t))
+ continue;
+
+ mpz_mul_ui (acc, ppp, t);
+ stop = mpz_cmp (acc, Bhalf) >= 0;
+ if (!stop)
+ {
+ mpn_mod_1s_4p_cps (pre, acc);
+ stop = sumspills (acc, pre + 2, 5);
+ }
+
+ if (stop)
+ {
+ for (p = interval_start; p <= interval_end; p += 2)
+ {
+ if (! isprime (p))
+ continue;
+
+ printf (" P(%d,", (int) p);
+ mpz_invert_ui_2exp (inv, p, limb_bits);
+ printf ("0x"); mpz_out_str (stdout, 16, inv); printf (",");
+
+ mpz_tdiv_q_ui (tmp, gmp_numb_max, p);
+ printf ("0x"); mpz_out_str (stdout, 16, tmp);
+ printf ("),\n");
+ }
+ mpz_set_ui (ppp, t);
+ interval_start = t;
+ omitted_p = t;
+ }
+ else
+ {
+ mpz_set (ppp, acc);
+ }
+ interval_end = t;
+ }
+ printf (" P(0,0,0)\n};\n");
+
+
+ printf ("static struct gmp_primes_ptab gmp_primes_ptab[] = {\n");
+
+ endtok = "";
+
+ mpz_set_ui (ppp, 1);
+ interval_start = start_p;
+ interval_end = 0;
+ np = 0;
+ start_idx = 0;
+ for (t = start_p; t <= end_p; t += 2)
+ {
+ if (! isprime (t))
+ continue;
+
+ mpz_mul_ui (acc, ppp, t);
+
+ stop = mpz_cmp (acc, Bhalf) >= 0;
+ if (!stop)
+ {
+ mpn_mod_1s_4p_cps (pre, acc);
+ stop = sumspills (acc, pre + 2, 5);
+ }
+
+ if (stop)
+ {
+ mpn_mod_1s_4p_cps (pre, ppp);
+ printf ("%s", endtok);
+ printf (" {0x"); mpz_out_str (stdout, 16, ppp);
+ printf (",{0x"); mpz_out_str (stdout, 16, pre[0]);
+ printf (",%d", (int) PTR(pre[1])[0]);
+ for (i = 0; i < 5; i++)
+ {
+ printf (",");
+ printf ("0x"); mpz_out_str (stdout, 16, pre[2 + i]);
+ }
+ printf ("},");
+ printf ("%d,", start_idx);
+ printf ("%d}", np - start_idx);
+
+ endtok = ",\n";
+ mpz_set_ui (ppp, t);
+ interval_start = t;
+ start_idx = np;
+ }
+ else
+ {
+ mpz_set (ppp, acc);
+ }
+ interval_end = t;
+ np++;
+ }
+ printf ("\n};\n");
+
+ printf ("#define SMALLEST_OMITTED_PRIME %d\n", (int) omitted_p);
+
+ return 0;
+}
+
+unsigned long
+mpz_log2 (mpz_t x)
+{
+ mpz_t y;
+ unsigned long cnt;
+
+ mpz_init (y);
+ mpz_set (y, x);
+ cnt = 0;
+ while (mpz_sgn (y) != 0)
+ {
+ mpz_tdiv_q_2exp (y, y, 1);
+ cnt++;
+ }
+ mpz_clear (y);
+
+ return cnt;
+}
+
+void
+mpn_mod_1s_4p_cps (mpz_t cps[7], mpz_t bparm)
+{
+ mpz_t b, bi;
+ mpz_t B1modb, B2modb, B3modb, B4modb, B5modb;
+ mpz_t t;
+ int cnt;
+
+ mpz_init_set (b, bparm);
+
+ cnt = limb_bits - mpz_log2 (b);
+
+ mpz_init (bi);
+ mpz_init (t);
+ mpz_init (B1modb);
+ mpz_init (B2modb);
+ mpz_init (B3modb);
+ mpz_init (B4modb);
+ mpz_init (B5modb);
+
+ mpz_set_ui (t, 1);
+ mpz_mul_2exp (t, t, limb_bits - cnt);
+ mpz_sub (t, t, b);
+ mpz_mul_2exp (t, t, limb_bits);
+ mpz_tdiv_q (bi, t, b); /* bi = B^2/b */
+
+ mpz_set_ui (t, 1);
+ mpz_mul_2exp (t, t, limb_bits); /* t = B */
+ mpz_tdiv_r (B1modb, t, b);
+
+ mpz_mul_2exp (t, B1modb, limb_bits);
+ mpz_tdiv_r (B2modb, t, b);
+
+ mpz_mul_2exp (t, B2modb, limb_bits);
+ mpz_tdiv_r (B3modb, t, b);
+
+ mpz_mul_2exp (t, B3modb, limb_bits);
+ mpz_tdiv_r (B4modb, t, b);
+
+ mpz_mul_2exp (t, B4modb, limb_bits);
+ mpz_tdiv_r (B5modb, t, b);
+
+ mpz_set (cps[0], bi);
+ mpz_set_ui (cps[1], cnt);
+ mpz_tdiv_q_2exp (cps[2], B1modb, 0);
+ mpz_tdiv_q_2exp (cps[3], B2modb, 0);
+ mpz_tdiv_q_2exp (cps[4], B3modb, 0);
+ mpz_tdiv_q_2exp (cps[5], B4modb, 0);
+ mpz_tdiv_q_2exp (cps[6], B5modb, 0);
+
+ mpz_clear (b);
+ mpz_clear (bi);
+ mpz_clear (t);
+ mpz_clear (B1modb);
+ mpz_clear (B2modb);
+ mpz_clear (B3modb);
+ mpz_clear (B4modb);
+ mpz_clear (B5modb);
+}
+
+int
+sumspills (mpz_t ppp, mpz_t *a, int n)
+{
+ mpz_t s;
+ int i, ret;
+
+ mpz_init_set (s, a[0]);
+
+ for (i = 1; i < n; i++)
+ {
+ mpz_add (s, s, a[i]);
+ }
+ ret = mpz_cmp (s, B) >= 0;
+ mpz_clear (s);
+
+ return ret;
+}
diff --git a/gmp-impl.h b/gmp-impl.h
index 39ff9005f..5f33af892 100644
--- a/gmp-impl.h
+++ b/gmp-impl.h
@@ -2695,6 +2695,11 @@ __GMP_DECLSPEC mp_limb_t mpn_modexact_1_odd __GMP_PROTO ((mp_srcptr, mp_size_t,
? mpn_modexact_1_odd (src, size, divisor) \
: mpn_mod_1 (src, size, divisor))
+#ifndef mpn_trialdiv /* if not done with cpuvec in a fat binary */
+#define mpn_trialdiv __MPN(trialdiv)
+__GMP_DECLSPEC mp_limb_t mpn_trialdiv __GMP_PROTO ((mp_srcptr, mp_size_t, mp_size_t, int *));
+#endif
+
/* binvert_limb() sets inv to the multiplicative inverse of n modulo
2^GMP_NUMB_BITS, ie. satisfying inv*n == 1 mod 2^GMP_NUMB_BITS.
diff --git a/mpn/generic/trialdiv.c b/mpn/generic/trialdiv.c
new file mode 100644
index 000000000..02c376441
--- /dev/null
+++ b/mpn/generic/trialdiv.c
@@ -0,0 +1,104 @@
+/* mpn_trialdiv
+
+Copyright 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+/*
+ Fast, division-free trial division for GMP.
+
+ This function will find the first (smallest) factor represented in
+ trialdivtab.h. It does not stop the factoring effort just because it has
+ reached some sensible limit, such as the square root of the input number.
+
+ The caller can limit the factoring effort by passing NPRIMES. The function
+ well then divide to *at least* that limit. A position which only
+ mpn_trialdiv can make sense of is returned in the WHERE parameter. It can
+ be used for restarting the factoring effort; the first call should pass 0
+ here.
+*/
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+struct gmp_primes_dtab {
+ mp_limb_t binv;
+ mp_limb_t lim;
+};
+
+struct gmp_primes_ptab {
+ mp_limb_t ppp; /* primes, multiplied together */
+ mp_limb_t cps[7]; /* ppp values pre-computed for mpn_mod_1s_4p */
+ unsigned int idx:24; /* index of first primes in dtab */
+ unsigned int np :8; /* number of primes related to this entry */
+};
+
+#define P(p,inv,lim) {inv,lim}
+
+#include "trialdivtab.h"
+
+#define PTAB_LINES (sizeof (gmp_primes_ptab) / sizeof (gmp_primes_ptab[0]))
+
+/* Attempt to find a factor of T using trial division.
+ Input: A non-negative number T.
+ Output: non-zero if we found a factor, zero otherwise. To get the actual
+ prime factor, compute the mod B inverse of the return value. */
+/* FIXME: We could optimize out one of the outer loop conditions if we
+ had a final ptab entry with a huge nd field. */
+mp_limb_t
+mpn_trialdiv (mp_srcptr tp, mp_size_t tn, mp_size_t nprimes, int *where)
+{
+ mp_limb_t ppp;
+ mp_limb_t *cps;
+ struct gmp_primes_dtab *dp;
+ long i, j, idx, np;
+ mp_limb_t r, q;
+
+ ASSERT (tn >= 1);
+
+ for (i = *where; i < PTAB_LINES; i++)
+ {
+ ppp = gmp_primes_ptab[i].ppp;
+ cps = gmp_primes_ptab[i].cps;
+
+#if __GNU_MP_VERSION == 4 && __GNU_MP_VERSION_MINOR < 4
+ if (tn < 4)
+ r = mpn_mod_1 (tp, tn, ppp); /* FIXME */
+ else
+#endif
+ r = mpn_mod_1s_4p (tp, tn, ppp << cps[1], cps);
+
+ idx = gmp_primes_ptab[i].idx;
+ np = gmp_primes_ptab[i].np;
+
+ /* Check divisibility by individual primes. */
+ dp = &gmp_primes_dtab[idx] + np;
+ for (j = -np; j < 0; j++)
+ {
+ q = r * dp[j].binv;
+ if (q <= dp[j].lim)
+ {
+ *where = i;
+ return dp[j].binv;
+ }
+ }
+
+ nprimes -= np;
+ if (nprimes <= 0)
+ return 0;
+ }
+ return 0;
+}