summaryrefslogtreecommitdiff
path: root/mpn
diff options
context:
space:
mode:
authorMarco Bodrato <bodrato@mail.dm.unipi.it>2022-02-15 08:53:21 +0100
committerMarco Bodrato <bodrato@mail.dm.unipi.it>2022-02-15 08:53:21 +0100
commit07b73a0b94202f1258d7838d0e79ae86cd4d209c (patch)
treecf9ac9a4ae82b7f8e4f57291f663fa07201da98d /mpn
parent09bbf17f76dbf456c33f57cfdbd664485a89bd3d (diff)
downloadgmp-07b73a0b94202f1258d7838d0e79ae86cd4d209c.tar.gz
mpn/generic/mulmod_bknp1.c: New file, with mpn_{mul,sqr}mod_bknp1
configure.ac: Compile it gmp-impl.h: Define new functions
Diffstat (limited to 'mpn')
-rw-r--r--mpn/generic/mulmod_bknp1.c501
1 files changed, 501 insertions, 0 deletions
diff --git a/mpn/generic/mulmod_bknp1.c b/mpn/generic/mulmod_bknp1.c
new file mode 100644
index 000000000..39109e500
--- /dev/null
+++ b/mpn/generic/mulmod_bknp1.c
@@ -0,0 +1,501 @@
+/* Mulptiplication mod B^n+1, for small operands.
+
+ Contributed to the GNU project by Marco Bodrato.
+
+ THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
+ SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2020-2022 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+
+#ifndef MOD_BKNP1_USE11
+#define MOD_BKNP1_USE11 ((GMP_NUMB_BITS % 8 != 0) && (GMP_NUMB_BITS % 2 == 0))
+#endif
+#ifndef MOD_BKNP1_ONLY3
+#define MOD_BKNP1_ONLY3 0
+#endif
+
+/* {rp, (k - 1) * n} = {op, k * n + 1} % (B^{k*n}+1) / (B^n+1) */
+static void
+_mpn_modbknp1dbnp1_n (mp_ptr rp, mp_srcptr op, mp_size_t n, unsigned k)
+{
+ mp_limb_t hl;
+ mp_srcptr hp;
+ unsigned i;
+
+#if MOD_BKNP1_ONLY3
+ ASSERT (k == 3);
+ k = 3;
+#endif
+ ASSERT (k > 2);
+ ASSERT (k % 2 == 1);
+
+ --k;
+
+ rp += k * n;
+ op += k * n;
+ hp = op;
+ hl = hp[n]; /* initial op[k*n]. */
+ ASSERT (hl < GMP_NUMB_MAX - 1);
+
+#if MOD_BKNP1_ONLY3 == 0
+ /* The first MPN_INCR_U (rp + n, 1, cy); in the loop should be
+ rp[n] = cy; */
+ *rp = 0;
+#endif
+
+ i = k >> 1;
+ do
+ {
+ mp_limb_t cy, bw;
+ rp -= n;
+ op -= n;
+ cy = hl + mpn_add_n (rp, op, hp, n);
+#if MOD_BKNP1_ONLY3
+ rp[n] = cy;
+#else
+ MPN_INCR_U (rp + n, (k - i * 2) * n + 1, cy);
+#endif
+ rp -= n;
+ op -= n;
+ bw = hl + mpn_sub_n (rp, op, hp, n);
+ MPN_DECR_U (rp + n, (k - i * 2 + 1) * n + 1, bw);
+ }
+ while (--i != 0);
+
+ rp += k * n;
+ for (; (hl = *rp) != 0; rp += k * n) /* Should run only once... */
+ {
+ *rp = 0;
+ i = k >> 1;
+ do
+ {
+ rp -= n;
+ MPN_INCR_U (rp, (k - i * 2 + 1) * n + 1, hl);
+ rp -= n;
+ MPN_DECR_U (rp, (k - i * 2 + 2) * n + 1, hl);
+ }
+ while (--i != 0);
+ }
+}
+
+static void
+_mpn_modbnp1_pn_ip (mp_ptr r, mp_size_t n, mp_limb_t h)
+{
+ ASSERT (r[n] == h);
+
+ /* Fully normalise */
+ MPN_DECR_U (r, n + 1, h);
+ h -= r[n];
+ r[n] = 0;
+ MPN_INCR_U (r, n + 1, h);
+}
+
+static void
+_mpn_modbnp1_neg_ip (mp_ptr r, mp_size_t n, mp_limb_t h)
+{
+ r[n] = 0;
+ MPN_INCR_U (r, n + 1, -h);
+ h = r[n];
+ if (UNLIKELY (h != 0))
+ _mpn_modbnp1_pn_ip (r, n, h);
+}
+
+static void
+_mpn_modbnp1_nc_ip (mp_ptr r, mp_size_t n, mp_limb_t h)
+{
+ if (h & GMP_NUMB_HIGHBIT) /* This means h < 0 */
+ {
+ _mpn_modbnp1_neg_ip (r, n, h);
+ return;
+ }
+
+ r[n] = h;
+ if (h)
+ _mpn_modbnp1_pn_ip(r, n, h);
+}
+
+/* {rp, rn + 1} = {op, on} mod (B^{rn}+1) */
+/* Used when rn < on < 2*rn. */
+static void
+_mpn_modbnp1 (mp_ptr rp, mp_size_t rn, mp_srcptr op, mp_size_t on)
+{
+ mp_limb_t bw;
+
+#if 0
+ if (UNLIKELY (on <= rn))
+ {
+ MPN_COPY (rp, op, on);
+ MPN_ZERO (rp + on, rn - on);
+ return;
+ }
+#endif
+
+ ASSERT (on > rn);
+ ASSERT (on <= 2 * rn);
+
+ bw = mpn_sub (rp, op, rn, op + rn, on - rn);
+ rp[rn] = 0;
+ MPN_INCR_U (rp, rn + 1, bw);
+}
+
+/* {rp, rn + 1} = {op, k * rn + 1} % (B^{rn}+1) */
+/* With odd k >= 3. */
+static void
+_mpn_modbnp1_kn (mp_ptr rp, mp_srcptr op, mp_size_t rn, unsigned k)
+{
+ mp_limb_t cy;
+
+#if MOD_BKNP1_ONLY3
+ ASSERT (k == 3);
+ k = 3;
+#endif
+ ASSERT (k & 1);
+ k >>= 1;
+ ASSERT (0 < k && k < GMP_NUMB_HIGHBIT - 1);
+ ASSERT (op[(1 + 2 * k) * rn] < GMP_NUMB_HIGHBIT - 2 - k);
+
+ cy = - mpn_sub_n (rp, op, op + rn, rn);
+ do {
+ op += 2 * rn;
+ cy += mpn_add_n (rp, rp, op, rn);
+ if (--k == 0)
+ break;
+ cy -= mpn_sub_n (rp, rp, op + rn, rn);
+ } while (1);
+
+ cy += op[rn];
+ _mpn_modbnp1_nc_ip (rp, rn, cy);
+}
+
+/* For the various mpn_divexact_byN here, fall back to using either
+ mpn_pi1_bdiv_q_1 or mpn_divexact_1. The former has less overhead and is
+ faster if it is native. For now, since mpn_divexact_1 is native on
+ platforms where mpn_pi1_bdiv_q_1 does not yet exist, do not use
+ mpn_pi1_bdiv_q_1 unconditionally. FIXME. */
+
+#ifndef mpn_divexact_by5
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
+#define BINVERT_5 \
+ ((((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 4)) / 5 * 3 << 3) + 5) & GMP_NUMB_MAX)
+#define mpn_divexact_by5(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,5,BINVERT_5,0)
+#else
+#define mpn_divexact_by5(dst,src,size) mpn_divexact_1(dst,src,size,5)
+#endif
+#endif
+
+#ifndef mpn_divexact_by7
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
+#define BINVERT_7 \
+ ((((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 3)) / 7 * 3 << 4) + 7) & GMP_NUMB_MAX)
+#define mpn_divexact_by7(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,7,BINVERT_7,0)
+#else
+#define mpn_divexact_by7(dst,src,size) mpn_divexact_1(dst,src,size,7)
+#endif
+#endif
+
+#ifndef mpn_divexact_by11
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
+#define BINVERT_11 \
+ ((((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 10)) / 11 << 5) + 3) & GMP_NUMB_MAX)
+#define mpn_divexact_by11(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,11,BINVERT_11,0)
+#else
+#define mpn_divexact_by11(dst,src,size) mpn_divexact_1(dst,src,size,11)
+#endif
+#endif
+
+#ifndef mpn_divexact_by13
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
+#define BINVERT_13 \
+ ((((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 12)) / 13 * 3 << 14) + 3781) & GMP_NUMB_MAX)
+#define mpn_divexact_by13(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,13,BINVERT_13,0)
+#else
+#define mpn_divexact_by13(dst,src,size) mpn_divexact_1(dst,src,size,13)
+#endif
+#endif
+
+#ifndef mpn_divexact_by17
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
+#define BINVERT_17 \
+ ((((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 8)) / 17 * 15 << 7) + 113) & GMP_NUMB_MAX)
+#define mpn_divexact_by17(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,17,BINVERT_17,0)
+#else
+#define mpn_divexact_by17(dst,src,size) mpn_divexact_1(dst,src,size,17)
+#endif
+#endif
+
+/* Thanks to Chinese remainder theorem, store
+ in {rp, k*n+1} the value mod (B^(k*n)+1), given
+ {ap, k*n+1} mod ((B^(k*n)+1)/(B^n+1)) and
+ {bp, n+1} mod (B^n+1) .
+ {tp, n+1} is a scratch area.
+ tp == rp or rp == ap are possible.
+*/
+static void
+_mpn_crt (mp_ptr rp, mp_srcptr ap, mp_srcptr bp,
+ mp_size_t n, unsigned k, mp_ptr tp)
+{
+ mp_limb_t mod;
+ unsigned i;
+
+#if MOD_BKNP1_ONLY3
+ k = 3;
+#endif
+ _mpn_modbnp1_kn (tp, ap, n, k);
+ if (mpn_sub_n (tp, bp, tp, n + 1))
+ _mpn_modbnp1_neg_ip (tp, n, tp[n]);
+
+#if MOD_BKNP1_USE11
+ if (UNLIKELY (k == 11))
+ {
+ ASSERT (GMP_NUMB_BITS % 2 == 0);
+ /* mod <- -Mod(B^n+1,11)^-1 */
+ mod = n * (GMP_NUMB_BITS % 5) % 5;
+ if ((mod > 2) || UNLIKELY (mod == 0))
+ mod += 5;
+
+ mod *= mpn_mod_1 (tp, n + 1, 11);
+ }
+ else
+#endif
+ {
+#if GMP_NUMB_BITS % 8 == 0
+ /* (2^6 - 1) | (2^{GMP_NUMB_BITS*3/4} - 1) */
+ /* (2^6 - 1) = 3^2 * 7 */
+ mod = mpn_mod_34lsub1 (tp, n + 1);
+ ASSERT ((GMP_NUMB_MAX >> (GMP_NUMB_BITS >> 2)) % k == 0);
+ /* (2^12 - 1) = 3^2 * 5 * 7 * 13 */
+ /* (2^24 - 1) = 3^2 * 5 * 7 * 13 * 17 * 241 */
+ ASSERT (k == 3 || k == 5 || k == 7 || k == 13 || k == 17);
+
+#if GMP_NUMB_BITS % 3 != 0
+ if (UNLIKELY (k != 3))
+ {
+ ASSERT ((GMP_NUMB_MAX % k == 0) || (n % 3 != 0));
+ if ((GMP_NUMB_BITS % 16 == 0) && LIKELY (k == 5))
+ mod <<= 1; /* k >> 1 = 1 << 1 */
+ else if ((GMP_NUMB_BITS % 16 != 0) || LIKELY (k == 7))
+ mod <<= (n << (GMP_NUMB_BITS % 3 >> 1)) % 3;
+ else if ((GMP_NUMB_BITS % 32 != 0) || LIKELY (k == 13))
+ mod *= ((n << (GMP_NUMB_BITS % 3 >> 1)) % 3 == 1) ? 3 : 9;
+ else /* k == 17 */
+ mod <<= 3; /* k >> 1 = 1 << 3 */
+#if 0
+ if ((GMP_NUMB_BITS == 8) /* && (k == 7) */ ||
+ (GMP_NUMB_BITS == 16) && (k == 13))
+ mod = ((mod & (GMP_NUMB_MAX >> (GMP_NUMB_BITS >> 2))) +
+ (mod >> (3 * GMP_NUMB_BITS >> 2)));
+#endif
+ }
+#else
+ ASSERT (GMP_NUMB_MAX % k == 0);
+ /* 2^{GMP_NUMB_BITS} - 1 = 0 (mod k) */
+ /* 2^{GMP_NUMB_BITS} = 1 (mod k) */
+ /* 2^{n*GMP_NUMB_BITS} + 1 = 2 (mod k) */
+ /* -2^{-1} = k >> 1 (mod k) */
+ mod *= k >> 1;
+#endif
+#else
+ ASSERT_ALWAYS (k == 0); /* Not implemented, should not be used. */
+#endif
+ }
+
+ MPN_INCR_U (tp, n + 1, mod);
+ tp[n] += mod;
+
+ if (LIKELY (k == 3))
+ ASSERT_NOCARRY (mpn_divexact_by3 (tp, tp, n + 1));
+ else if ((GMP_NUMB_BITS % 16 == 0) && LIKELY (k == 5))
+ mpn_divexact_by5 (tp, tp, n + 1);
+ else if ((GMP_NUMB_BITS % 16 != 0) || LIKELY (k == 7))
+ mpn_divexact_by7 (tp, tp, n + 1);
+#if MOD_BKNP1_USE11
+ else if (UNLIKELY (k == 11))
+ mpn_divexact_by11 (tp, tp, n + 1);
+#endif
+ else if ((GMP_NUMB_BITS % 32 != 0) || LIKELY (k == 13))
+ mpn_divexact_by13 (tp, tp, n + 1);
+ else /* (k == 17) */
+ mpn_divexact_by17 (tp, tp, n + 1);
+
+ rp += k * n;
+ ap += k * n; /* tp - 1 */
+
+ rp -= n;
+ ap -= n;
+ ASSERT_NOCARRY (mpn_add_n (rp, ap, tp, n + 1));
+
+ i = k >> 1;
+ do
+ {
+ mp_limb_t cy, bw;
+ rp -= n;
+ ap -= n;
+ bw = mpn_sub_n (rp, ap, tp, n) + tp[n];
+ MPN_DECR_U (rp + n, (k - i * 2) * n + 1, bw);
+ rp -= n;
+ ap -= n;
+ cy = mpn_add_n (rp, ap, tp, n) + tp[n];
+ MPN_INCR_U (rp + n, (k - i * 2 + 1) * n + 1, cy);
+ }
+ while (--i != 0);
+
+ /* if (LIKELY (rp[k * n])) */
+ _mpn_modbnp1_pn_ip (rp, k * n, rp[k * n]);
+}
+
+
+static void
+_mpn_mulmod_bnp1_tp (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t rn,
+ mp_ptr tp)
+{
+ mp_limb_t cy;
+ unsigned k;
+
+ ASSERT (0 < rn);
+ ASSERT ((ap[rn] | bp[rn]) <= 1);
+
+ if (UNLIKELY (ap[rn] | bp[rn]))
+ {
+ if (ap[rn])
+ cy = bp[rn] + mpn_neg (rp, bp, rn);
+ else /* ap[rn] == 0 */
+ cy = mpn_neg (rp, ap, rn);
+ }
+ else if (MPN_MULMOD_BKNP1_USABLE(rn, k, MUL_FFT_MODF_THRESHOLD / 3))
+ {
+ rn /= k;
+ mpn_mulmod_bknp1 (rp, ap, bp, rn, k, tp);
+ return;
+ }
+ else
+ {
+ mpn_mul_n (tp, ap, bp, rn);
+ cy = mpn_sub_n (rp, tp, tp + rn, rn);
+ }
+ rp[rn] = 0;
+ MPN_INCR_U (rp, rn + 1, cy);
+}
+
+/* {rp, kn + 1} = {ap, kn + 1} * {bp, kn + 1} % (B^kn + 1) */
+/* tp must point to at least 4*(k-1)*n+1 limbs*/
+void
+mpn_mulmod_bknp1 (mp_ptr rp, mp_srcptr ap, mp_srcptr bp,
+ mp_size_t n, unsigned k, mp_ptr tp)
+{
+ mp_ptr hp;
+
+#if MOD_BKNP1_ONLY3
+ ASSERT (k == 3);
+ k = 3;
+#endif
+ ASSERT (k > 2);
+ ASSERT (k % 2 == 1);
+
+ /* a % (B^{nn}+1)/(B^{nn/k}+1) */
+ _mpn_modbknp1dbnp1_n (tp + (k - 1) * n * 2, ap, n, k);
+ /* b % (B^{nn}+1)/(B^{nn/k}+1) */
+ _mpn_modbknp1dbnp1_n (tp + (k - 1) * n * 3, bp, n, k);
+ mpn_mul_n (tp, tp + (k - 1) * n * 2, tp + (k - 1) * n * 3, (k - 1) * n);
+ _mpn_modbnp1 (tp, k * n, tp, (k - 1) * n * 2);
+
+ hp = tp + k * n + 1;
+ /* a % (B^{nn/k}+1) */
+ ASSERT (ap[k * n] <= 1);
+ _mpn_modbnp1_kn (hp, ap, n, k);
+ /* b % (B^{nn/k}+1) */
+ ASSERT (bp[k * n] <= 1);
+ _mpn_modbnp1_kn (hp + n + 1, bp, n, k);
+ _mpn_mulmod_bnp1_tp (hp + (n + 1) * 2, hp, hp + n + 1, n, hp + (n + 1) * 2);
+
+ _mpn_crt (rp, tp, hp + (n + 1) * 2, n, k, hp);
+}
+
+
+static void
+_mpn_sqrmod_bnp1_tp (mp_ptr rp, mp_srcptr ap, mp_size_t rn,
+ mp_ptr tp)
+{
+ mp_limb_t cy;
+ unsigned k;
+
+ ASSERT (0 < rn);
+
+ if (UNLIKELY (ap[rn]))
+ {
+ ASSERT (ap[rn] == 1);
+ *rp = 1;
+ MPN_FILL (rp + 1, rn, 0);
+ return;
+ }
+ else if (MPN_SQRMOD_BKNP1_USABLE(rn, k, MUL_FFT_MODF_THRESHOLD / 3))
+ {
+ rn /= k;
+ mpn_sqrmod_bknp1 (rp, ap, rn, k, tp);
+ return;
+ }
+ else
+ {
+ mpn_sqr (tp, ap, rn);
+ cy = mpn_sub_n (rp, tp, tp + rn, rn);
+ }
+ rp[rn] = 0;
+ MPN_INCR_U (rp, rn + 1, cy);
+}
+
+/* {rp, kn + 1} = {ap, kn + 1}^2 % (B^kn + 1) */
+/* tp must point to at least 3*(k-1)*n+1 limbs*/
+void
+mpn_sqrmod_bknp1 (mp_ptr rp, mp_srcptr ap,
+ mp_size_t n, unsigned k, mp_ptr tp)
+{
+ mp_ptr hp;
+
+#if MOD_BKNP1_ONLY3
+ ASSERT (k == 3);
+ k = 3;
+#endif
+ ASSERT (k > 2);
+ ASSERT (k % 2 == 1);
+
+ /* a % (B^{nn}+1)/(B^{nn/k}+1) */
+ _mpn_modbknp1dbnp1_n (tp + (k - 1) * n * 2, ap, n, k);
+ mpn_sqr (tp, tp + (k - 1) * n * 2, (k - 1) * n);
+ _mpn_modbnp1 (tp, k * n, tp, (k - 1) * n * 2);
+
+ hp = tp + k * n + 1;
+ /* a % (B^{nn/k}+1) */
+ ASSERT (ap[k * n] <= 1);
+ _mpn_modbnp1_kn (hp, ap, n, k);
+ _mpn_sqrmod_bnp1_tp (hp + (n + 1), hp, n, hp + (n + 1));
+
+ _mpn_crt (rp, tp, hp + (n + 1), n, k, hp);
+}