summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNiels Möller <nisse@lysator.liu.se>2020-10-30 15:01:39 +0100
committerNiels Möller <nisse@lysator.liu.se>2020-10-30 20:22:40 +0100
commitf4f5625edb7d899972431b838ac19ced9288f68a (patch)
treebc18e9bd95b9f940bc6d4229e012c063562e8c9e
parentcbf217b15f66a73041e6fe1a108453efd0297d27 (diff)
downloadnettle-f4f5625edb7d899972431b838ac19ced9288f68a.tar.gz
Add separate result argument to all mod functions.
* ecc-internal.h (typedef ecc_mod_func): Add separate result argument. Updated all C implementations and callers.
-rw-r--r--ChangeLog5
-rw-r--r--ecc-a-to-j.c4
-rw-r--r--ecc-curve25519.c24
-rw-r--r--ecc-curve448.c16
-rw-r--r--ecc-gost-gc256b.c8
-rw-r--r--ecc-gost-gc512a.c8
-rw-r--r--ecc-internal.h6
-rw-r--r--ecc-j-to-a.c2
-rw-r--r--ecc-mod-arith.c4
-rw-r--r--ecc-mod-inv.c4
-rw-r--r--ecc-mod.c35
-rw-r--r--ecc-pm1-redc.c10
-rw-r--r--ecc-pp1-redc.c8
-rw-r--r--ecc-secp192r1.c40
-rw-r--r--ecc-secp224r1.c2
-rw-r--r--ecc-secp256r1.c34
-rw-r--r--ecc-secp384r1.c75
-rw-r--r--ecc-secp521r1.c18
-rw-r--r--eddsa-hash.c2
-rw-r--r--examples/ecc-benchmark.c6
-rw-r--r--testsuite/ecc-mod-test.c60
-rw-r--r--testsuite/ecc-redc-test.c6
22 files changed, 215 insertions, 162 deletions
diff --git a/ChangeLog b/ChangeLog
index cc5230c4..cb2deaa5 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+2020-10-30 Niels Möller <nisse@lysator.liu.se>
+
+ * ecc-internal.h (typedef ecc_mod_func): Add separate result
+ argument. Updated all C implementations and callers.
+
2020-10-29 Niels Möller <nisse@lysator.liu.se>
* ecc-mod.c (ecc_mod): More unified handling of final carry
diff --git a/ecc-a-to-j.c b/ecc-a-to-j.c
index 9fb0d2b8..5db5e4ea 100644
--- a/ecc-a-to-j.c
+++ b/ecc-a-to-j.c
@@ -47,10 +47,10 @@ ecc_a_to_j (const struct ecc_curve *ecc,
mpn_copyd (r + ecc->p.size, p, 2*ecc->p.size);
mpn_zero (r, ecc->p.size);
- ecc->p.mod (&ecc->p, r);
+ ecc->p.mod (&ecc->p, r, r);
mpn_zero (r + ecc->p.size, ecc->p.size);
- ecc->p.mod (&ecc->p, r + ecc->p.size);
+ ecc->p.mod (&ecc->p, r + ecc->p.size, r + ecc->p.size);
}
else if (r != p)
mpn_copyi (r, p, 2*ecc->p.size);
diff --git a/ecc-curve25519.c b/ecc-curve25519.c
index 05e772bc..bd2e19d2 100644
--- a/ecc-curve25519.c
+++ b/ecc-curve25519.c
@@ -50,7 +50,7 @@
#define ecc_curve25519_modp _nettle_ecc_curve25519_modp
void
-ecc_curve25519_modp (const struct ecc_modulo *m, mp_limb_t *rp);
+ecc_curve25519_modp (const struct ecc_modulo *m, mp_limb_t *rp, mp_limb_t *xp);
#else
#if PHIGH_BITS == 0
@@ -58,16 +58,16 @@ ecc_curve25519_modp (const struct ecc_modulo *m, mp_limb_t *rp);
#endif
static void
-ecc_curve25519_modp(const struct ecc_modulo *m UNUSED, mp_limb_t *rp)
+ecc_curve25519_modp(const struct ecc_modulo *m UNUSED, mp_limb_t *rp, mp_limb_t *xp)
{
mp_limb_t hi, cy;
- cy = mpn_addmul_1 (rp, rp + ECC_LIMB_SIZE, ECC_LIMB_SIZE,
+ cy = mpn_addmul_1 (xp, xp + ECC_LIMB_SIZE, ECC_LIMB_SIZE,
(mp_limb_t) 19 << PHIGH_BITS);
- hi = rp[ECC_LIMB_SIZE-1];
+ hi = xp[ECC_LIMB_SIZE-1];
cy = (cy << PHIGH_BITS) + (hi >> (GMP_NUMB_BITS - PHIGH_BITS));
rp[ECC_LIMB_SIZE-1] = (hi & (GMP_NUMB_MASK >> PHIGH_BITS))
- + sec_add_1 (rp, rp, ECC_LIMB_SIZE - 1, 19 * cy);
+ + sec_add_1 (rp, xp, ECC_LIMB_SIZE - 1, 19 * cy);
}
#endif /* HAVE_NATIVE_ecc_curve25519_modp */
@@ -78,7 +78,7 @@ ecc_curve25519_modp(const struct ecc_modulo *m UNUSED, mp_limb_t *rp)
#endif
static void
-ecc_curve25519_modq (const struct ecc_modulo *q, mp_limb_t *rp)
+ecc_curve25519_modq (const struct ecc_modulo *q, mp_limb_t *rp, mp_limb_t *xp)
{
mp_size_t n;
mp_limb_t cy;
@@ -86,18 +86,18 @@ ecc_curve25519_modq (const struct ecc_modulo *q, mp_limb_t *rp)
/* n is the offset where we add in the next term */
for (n = ECC_LIMB_SIZE; n-- > 0;)
{
- cy = mpn_submul_1 (rp + n,
+ cy = mpn_submul_1 (xp + n,
q->B_shifted, ECC_LIMB_SIZE,
- rp[n + ECC_LIMB_SIZE]);
+ xp[n + ECC_LIMB_SIZE]);
/* Top limb of mBmodq_shifted is zero, so we get cy == 0 or 1 */
assert (cy < 2);
- mpn_cnd_add_n (cy, rp+n, rp+n, q->m, ECC_LIMB_SIZE);
+ mpn_cnd_add_n (cy, xp+n, xp+n, q->m, ECC_LIMB_SIZE);
}
- cy = mpn_submul_1 (rp, q->m, ECC_LIMB_SIZE,
- rp[ECC_LIMB_SIZE-1] >> (GMP_NUMB_BITS - QHIGH_BITS));
+ cy = mpn_submul_1 (xp, q->m, ECC_LIMB_SIZE,
+ xp[ECC_LIMB_SIZE-1] >> (GMP_NUMB_BITS - QHIGH_BITS));
assert (cy < 2);
- mpn_cnd_add_n (cy, rp, rp, q->m, ECC_LIMB_SIZE);
+ mpn_cnd_add_n (cy, rp, xp, q->m, ECC_LIMB_SIZE);
}
/* Computes a^{(p-5)/8} = a^{2^{252}-3} mod m. Needs 5 * n scratch
diff --git a/ecc-curve448.c b/ecc-curve448.c
index c00faa30..6b061606 100644
--- a/ecc-curve448.c
+++ b/ecc-curve448.c
@@ -48,10 +48,10 @@
#if HAVE_NATIVE_ecc_curve448_modp
#define ecc_curve448_modp _nettle_ecc_curve448_modp
void
-ecc_curve448_modp (const struct ecc_modulo *m, mp_limb_t *rp);
+ecc_curve448_modp (const struct ecc_modulo *m, mp_limb_t *rp, mp_limb_t *xp);
#elif GMP_NUMB_BITS == 64
static void
-ecc_curve448_modp(const struct ecc_modulo *m, mp_limb_t *rp)
+ecc_curve448_modp(const struct ecc_modulo *m, mp_limb_t *rp, mp_limb_t *xp)
{
/* Let B = 2^64, b = 2^32 = sqrt(B).
p = B^7 - b B^3 - 1 ==> B^7 = b B^3 + 1
@@ -79,18 +79,18 @@ ecc_curve448_modp(const struct ecc_modulo *m, mp_limb_t *rp)
+----+----+----+----+----+----+----+
*/
mp_limb_t c3, c4, c7;
- mp_limb_t *tp = rp + 7;
+ mp_limb_t *tp = xp + 7;
- c4 = mpn_add_n (rp, rp, rp + 7, 4);
- c7 = mpn_addmul_1 (rp + 4, rp + 11, 3, 2);
- c3 = mpn_addmul_1 (rp, rp + 11, 3, (mp_limb_t) 1 << 32);
- c7 += mpn_addmul_1 (rp + 3, rp + 7, 4, (mp_limb_t) 1 << 32);
+ c4 = mpn_add_n (xp, xp, xp + 7, 4);
+ c7 = mpn_addmul_1 (xp + 4, xp + 11, 3, 2);
+ c3 = mpn_addmul_1 (xp, xp + 11, 3, (mp_limb_t) 1 << 32);
+ c7 += mpn_addmul_1 (xp + 3, xp + 7, 4, (mp_limb_t) 1 << 32);
tp[0] = c7;
tp[1] = tp[2] = 0;
tp[3] = c3 + (c7 << 32);
tp[4] = c4 + (c7 >> 32) + (tp[3] < c3);
tp[5] = tp[6] = 0;
- c7 = mpn_add_n (rp, rp, tp, 7);
+ c7 = mpn_add_n (rp, xp, tp, 7);
c7 = mpn_cnd_add_n (c7, rp, rp, m->B, 7);
assert (c7 == 0);
}
diff --git a/ecc-gost-gc256b.c b/ecc-gost-gc256b.c
index a23d46fc..1490d7a8 100644
--- a/ecc-gost-gc256b.c
+++ b/ecc-gost-gc256b.c
@@ -43,14 +43,14 @@
#include "ecc-gost-gc256b.h"
static void
-ecc_gost_gc256b_modp (const struct ecc_modulo *m, mp_limb_t *rp)
+ecc_gost_gc256b_modp (const struct ecc_modulo *m, mp_limb_t *rp, mp_limb_t *xp)
{
mp_size_t mn = m->size;
mp_limb_t hi;
- hi = mpn_addmul_1(rp, rp + mn, mn, 0x269);
- hi = sec_add_1 (rp, rp, mn, hi * 0x269);
- hi = sec_add_1 (rp, rp, mn, hi * 0x269);
+ hi = mpn_addmul_1(xp, xp + mn, mn, 0x269);
+ hi = sec_add_1 (xp, xp, mn, hi * 0x269);
+ hi = sec_add_1 (rp, xp, mn, hi * 0x269);
assert(hi == 0);
}
diff --git a/ecc-gost-gc512a.c b/ecc-gost-gc512a.c
index 398762c3..0f6e9b6d 100644
--- a/ecc-gost-gc512a.c
+++ b/ecc-gost-gc512a.c
@@ -43,14 +43,14 @@
#include "ecc-gost-gc512a.h"
static void
-ecc_gost_gc512a_modp (const struct ecc_modulo *m, mp_limb_t *rp)
+ecc_gost_gc512a_modp (const struct ecc_modulo *m, mp_limb_t *rp, mp_limb_t *xp)
{
mp_size_t mn = m->size;
mp_limb_t hi;
- hi = mpn_addmul_1(rp, rp + mn, mn, 0x239);
- hi = sec_add_1 (rp, rp, mn, hi * 0x239);
- hi = sec_add_1 (rp, rp, mn, hi * 0x239);
+ hi = mpn_addmul_1(xp, xp + mn, mn, 0x239);
+ hi = sec_add_1 (xp, xp, mn, hi * 0x239);
+ hi = sec_add_1 (rp, xp, mn, hi * 0x239);
assert(hi == 0);
}
diff --git a/ecc-internal.h b/ecc-internal.h
index 3082917b..b2ee8350 100644
--- a/ecc-internal.h
+++ b/ecc-internal.h
@@ -114,8 +114,10 @@ struct ecc_modulo;
/* Reduces from 2*ecc->size to ecc->size. */
/* Required to return a result < 2q. This property is inherited by
- mod_mul and mod_sqr. */
-typedef void ecc_mod_func (const struct ecc_modulo *m, mp_limb_t *rp);
+ mod_mul and mod_sqr. May clobber input xp. rp may point to the
+ start or the middle of the xp area, but no other overlap is
+ allowed. */
+typedef void ecc_mod_func (const struct ecc_modulo *m, mp_limb_t *rp, mp_limb_t *xp);
typedef void ecc_mod_inv_func (const struct ecc_modulo *m,
mp_limb_t *vp, const mp_limb_t *ap,
diff --git a/ecc-j-to-a.c b/ecc-j-to-a.c
index fd329a48..915c056b 100644
--- a/ecc-j-to-a.c
+++ b/ecc-j-to-a.c
@@ -59,7 +59,7 @@ ecc_j_to_a (const struct ecc_curve *ecc,
/* Divide this common factor by B, instead of applying redc to
both x and y outputs. */
mpn_zero (iz2p + ecc->p.size, ecc->p.size);
- ecc->p.reduce (&ecc->p, iz2p);
+ ecc->p.reduce (&ecc->p, iz2p, iz2p);
}
/* r_x <-- x / z^2 */
diff --git a/ecc-mod-arith.c b/ecc-mod-arith.c
index 34a28544..686b56bf 100644
--- a/ecc-mod-arith.c
+++ b/ecc-mod-arith.c
@@ -115,7 +115,7 @@ ecc_mod_mul (const struct ecc_modulo *m, mp_limb_t *rp,
const mp_limb_t *ap, const mp_limb_t *bp)
{
mpn_mul_n (rp, ap, bp, m->size);
- m->reduce (m, rp);
+ m->reduce (m, rp, rp);
}
void
@@ -123,7 +123,7 @@ ecc_mod_sqr (const struct ecc_modulo *m, mp_limb_t *rp,
const mp_limb_t *ap)
{
mpn_sqr (rp, ap, m->size);
- m->reduce (m, rp);
+ m->reduce (m, rp, rp);
}
/* Compute R <-- X^{2^k} mod M. Needs 2*ecc->size limbs at rp, and
diff --git a/ecc-mod-inv.c b/ecc-mod-inv.c
index e45c230a..570f0568 100644
--- a/ecc-mod-inv.c
+++ b/ecc-mod-inv.c
@@ -181,9 +181,9 @@ ecc_mod_inv_redc (const struct ecc_modulo *m,
mpn_copyi (scratch, ap, m->size);
mpn_zero (scratch + m->size, m->size);
- m->reduce (m, scratch);
+ m->reduce (m, scratch, scratch);
mpn_zero (scratch + m->size, m->size);
- m->reduce (m, scratch);
+ m->reduce (m, scratch, scratch);
ecc_mod_inv_destructive (m, vp, scratch);
}
diff --git a/ecc-mod.c b/ecc-mod.c
index 38a0d4f9..7532d1ec 100644
--- a/ecc-mod.c
+++ b/ecc-mod.c
@@ -39,9 +39,11 @@
#include "ecc-internal.h"
-/* Computes r mod m, input 2*m->size, output m->size. */
+/* Computes r <-- x mod m, input 2*m->size, output m->size. It's
+ * allowed to have rp == xp or rp == xp + m->size, but no other kind
+ * of overlap is allowed. */
void
-ecc_mod (const struct ecc_modulo *m, mp_limb_t *rp)
+ecc_mod (const struct ecc_modulo *m, mp_limb_t *rp, mp_limb_t *xp)
{
mp_limb_t hi;
mp_size_t mn = m->size;
@@ -64,9 +66,9 @@ ecc_mod (const struct ecc_modulo *m, mp_limb_t *rp)
rn -= sn;
for (i = 0; i <= sn; i++)
- rp[rn+i-1] = mpn_addmul_1 (rp + rn - mn - 1 + i, m->B, bn, rp[rn+i-1]);
- rp[rn-1] = rp[rn+sn-1]
- + mpn_add_n (rp + rn - sn - 1, rp + rn - sn - 1, rp + rn - 1, sn);
+ xp[rn+i-1] = mpn_addmul_1 (xp + rn - mn - 1 + i, m->B, bn, xp[rn+i-1]);
+ xp[rn-1] = xp[rn+sn-1]
+ + mpn_add_n (xp + rn - sn - 1, xp + rn - sn - 1, xp + rn - 1, sn);
}
}
else
@@ -76,10 +78,10 @@ ecc_mod (const struct ecc_modulo *m, mp_limb_t *rp)
rn -= sn;
for (i = 0; i < sn; i++)
- rp[rn+i] = mpn_addmul_1 (rp + rn - mn + i, m->B, bn, rp[rn+i]);
+ xp[rn+i] = mpn_addmul_1 (xp + rn - mn + i, m->B, bn, xp[rn+i]);
- hi = mpn_add_n (rp + rn - sn, rp + rn - sn, rp + rn, sn);
- hi = mpn_cnd_add_n (hi, rp + rn - mn, rp + rn - mn, m->B, mn);
+ hi = mpn_add_n (xp + rn - sn, xp + rn - sn, xp + rn, sn);
+ hi = mpn_cnd_add_n (hi, xp + rn - mn, xp + rn - mn, m->B, mn);
assert (hi == 0);
}
}
@@ -89,23 +91,26 @@ ecc_mod (const struct ecc_modulo *m, mp_limb_t *rp)
assert (rn <= sn);
for (i = 0; i < rn; i++)
- rp[mn+i] = mpn_addmul_1 (rp + i, m->B, bn, rp[mn+i]);
+ xp[mn+i] = mpn_addmul_1 (xp + i, m->B, bn, xp[mn+i]);
- hi = mpn_add_n (rp + bn, rp + bn, rp + mn, rn);
+ hi = mpn_add_n (xp + bn, xp + bn, xp + mn, rn);
if (rn < sn)
- hi = sec_add_1 (rp + bn + rn, rp + bn + rn, sn - rn, hi);
+ hi = sec_add_1 (xp + bn + rn, xp + bn + rn, sn - rn, hi);
shift = m->size * GMP_NUMB_BITS - m->bit_size;
if (shift > 0)
{
/* Combine hi with top bits, add in */
- hi = (hi << shift) | (rp[mn-1] >> (GMP_NUMB_BITS - shift));
- rp[mn-1] = (rp[mn-1] & (((mp_limb_t) 1 << (GMP_NUMB_BITS - shift)) - 1))
- + mpn_addmul_1 (rp, m->B_shifted, mn-1, hi);
+ hi = (hi << shift) | (xp[mn-1] >> (GMP_NUMB_BITS - shift));
+ xp[mn-1] = (xp[mn-1] & (((mp_limb_t) 1 << (GMP_NUMB_BITS - shift)) - 1))
+ + mpn_addmul_1 (xp, m->B_shifted, mn-1, hi);
+ /* FIXME: Can this copying be eliminated? */
+ if (rp != xp)
+ mpn_copyi (rp, xp, mn);
}
else
{
- hi = mpn_cnd_add_n (hi, rp, rp, m->B, mn);
+ hi = mpn_cnd_add_n (hi, rp, xp, m->B, mn);
assert (hi == 0);
}
}
diff --git a/ecc-pm1-redc.c b/ecc-pm1-redc.c
index 1b07b793..cc95c6e4 100644
--- a/ecc-pm1-redc.c
+++ b/ecc-pm1-redc.c
@@ -42,7 +42,7 @@
/* Use that 1 = - (p - 1) (mod p), and that at least one low limb of p
- 1 is zero. */
void
-ecc_pm1_redc (const struct ecc_modulo *m, mp_limb_t *rp)
+ecc_pm1_redc (const struct ecc_modulo *m, mp_limb_t *rp, mp_limb_t *xp)
{
unsigned i;
mp_limb_t hi, cy;
@@ -50,10 +50,10 @@ ecc_pm1_redc (const struct ecc_modulo *m, mp_limb_t *rp)
mp_size_t k = m->redc_size;
for (i = 0; i < m->size; i++)
- rp[i] = mpn_submul_1 (rp + i + k,
- m->redc_mpm1, m->size - k, rp[i]);
- hi = mpn_sub_n (rp, rp + m->size, rp, m->size);
- cy = mpn_cnd_add_n (hi, rp, rp, m->m, m->size);
+ xp[i] = mpn_submul_1 (xp + i + k,
+ m->redc_mpm1, m->size - k, xp[i]);
+ hi = mpn_sub_n (xp, xp + m->size, xp, m->size);
+ cy = mpn_cnd_add_n (hi, rp, xp, m->m, m->size);
assert (cy == hi);
if (shift > 0)
diff --git a/ecc-pp1-redc.c b/ecc-pp1-redc.c
index 9f643d97..b088c4c5 100644
--- a/ecc-pp1-redc.c
+++ b/ecc-pp1-redc.c
@@ -42,7 +42,7 @@
/* Use that 1 = p + 1 (mod p), and that at least one low limb of p + 1
is zero. */
void
-ecc_pp1_redc (const struct ecc_modulo *m, mp_limb_t *rp)
+ecc_pp1_redc (const struct ecc_modulo *m, mp_limb_t *rp, mp_limb_t *xp)
{
unsigned i;
mp_limb_t hi, cy;
@@ -50,9 +50,9 @@ ecc_pp1_redc (const struct ecc_modulo *m, mp_limb_t *rp)
mp_size_t k = m->redc_size;
for (i = 0; i < m->size; i++)
- rp[i] = mpn_addmul_1 (rp + i + k,
- m->redc_mpm1, m->size - k, rp[i]);
- hi = mpn_add_n (rp, rp, rp + m->size, m->size);
+ xp[i] = mpn_addmul_1 (xp + i + k,
+ m->redc_mpm1, m->size - k, xp[i]);
+ hi = mpn_add_n (rp, xp, xp + m->size, m->size);
if (shift > 0)
{
hi = (hi << shift) | (rp[m->size - 1] >> (GMP_NUMB_BITS - shift));
diff --git a/ecc-secp192r1.c b/ecc-secp192r1.c
index 05c26408..ec97477c 100644
--- a/ecc-secp192r1.c
+++ b/ecc-secp192r1.c
@@ -52,57 +52,57 @@
#define ecc_secp192r1_modp _nettle_ecc_secp192r1_modp
void
-ecc_secp192r1_modp (const struct ecc_modulo *m, mp_limb_t *rp);
+ecc_secp192r1_modp (const struct ecc_modulo *m, mp_limb_t *rp, mp_limb_t *xp);
/* Use that p = 2^{192} - 2^64 - 1, to eliminate 128 bits at a time. */
#elif GMP_NUMB_BITS == 32
/* p is 6 limbs, p = B^6 - B^2 - 1 */
static void
-ecc_secp192r1_modp (const struct ecc_modulo *m UNUSED, mp_limb_t *rp)
+ecc_secp192r1_modp (const struct ecc_modulo *m UNUSED, mp_limb_t *rp, mp_limb_t *xp)
{
mp_limb_t cy;
/* Reduce from 12 to 9 limbs (top limb small)*/
- cy = mpn_add_n (rp + 2, rp + 2, rp + 8, 4);
- cy = sec_add_1 (rp + 6, rp + 6, 2, cy);
- cy += mpn_add_n (rp + 4, rp + 4, rp + 8, 4);
+ cy = mpn_add_n (xp + 2, xp + 2, xp + 8, 4);
+ cy = sec_add_1 (xp + 6, xp + 6, 2, cy);
+ cy += mpn_add_n (xp + 4, xp + 4, xp + 8, 4);
assert (cy <= 2);
- rp[8] = cy;
+ xp[8] = cy;
/* Reduce from 9 to 6 limbs */
- cy = mpn_add_n (rp, rp, rp + 6, 3);
- cy = sec_add_1 (rp + 3, rp + 3, 2, cy);
- cy += mpn_add_n (rp + 2, rp + 2, rp + 6, 3);
- cy = sec_add_1 (rp + 5, rp + 5, 1, cy);
+ cy = mpn_add_n (xp, xp, xp + 6, 3);
+ cy = sec_add_1 (xp + 3, xp + 3, 2, cy);
+ cy += mpn_add_n (xp + 2, xp + 2, xp + 6, 3);
+ cy = sec_add_1 (xp + 5, xp + 5, 1, cy);
assert (cy <= 1);
- cy = mpn_cnd_add_n (cy, rp, rp, ecc_Bmodp, 6);
+ cy = mpn_cnd_add_n (cy, rp, xp, ecc_Bmodp, 6);
assert (cy == 0);
}
#elif GMP_NUMB_BITS == 64
/* p is 3 limbs, p = B^3 - B - 1 */
static void
-ecc_secp192r1_modp (const struct ecc_modulo *m UNUSED, mp_limb_t *rp)
+ecc_secp192r1_modp (const struct ecc_modulo *m UNUSED, mp_limb_t *rp, mp_limb_t *xp)
{
mp_limb_t cy;
/* Reduce from 6 to 5 limbs (top limb small)*/
- cy = mpn_add_n (rp + 1, rp + 1, rp + 4, 2);
- cy = sec_add_1 (rp + 3, rp + 3, 1, cy);
- cy += mpn_add_n (rp + 2, rp + 2, rp + 4, 2);
+ cy = mpn_add_n (xp + 1, xp + 1, xp + 4, 2);
+ cy = sec_add_1 (xp + 3, xp + 3, 1, cy);
+ cy += mpn_add_n (xp + 2, xp + 2, xp + 4, 2);
assert (cy <= 2);
- rp[4] = cy;
+ xp[4] = cy;
/* Reduce from 5 to 4 limbs (high limb small) */
- cy = mpn_add_n (rp, rp, rp + 3, 2);
- cy = sec_add_1 (rp + 2, rp + 2, 1, cy);
- cy += mpn_add_n (rp + 1, rp + 1, rp + 3, 2);
+ cy = mpn_add_n (xp, xp, xp + 3, 2);
+ cy = sec_add_1 (xp + 2, xp + 2, 1, cy);
+ cy += mpn_add_n (xp + 1, xp + 1, xp + 3, 2);
assert (cy <= 1);
- cy = mpn_cnd_add_n (cy, rp, rp, ecc_Bmodp, 3);
+ cy = mpn_cnd_add_n (cy, rp, xp, ecc_Bmodp, 3);
assert (cy == 0);
}
diff --git a/ecc-secp224r1.c b/ecc-secp224r1.c
index c8d4d40e..30a9b5a9 100644
--- a/ecc-secp224r1.c
+++ b/ecc-secp224r1.c
@@ -45,7 +45,7 @@
#define USE_REDC 0
#define ecc_secp224r1_modp _nettle_ecc_secp224r1_modp
void
-ecc_secp224r1_modp (const struct ecc_modulo *m, mp_limb_t *rp);
+ecc_secp224r1_modp (const struct ecc_modulo *m, mp_limb_t *rp, mp_limb_t *xp);
#else
#define USE_REDC (ECC_REDC_SIZE != 0)
diff --git a/ecc-secp256r1.c b/ecc-secp256r1.c
index 4b153327..e7ac62c4 100644
--- a/ecc-secp256r1.c
+++ b/ecc-secp256r1.c
@@ -53,7 +53,7 @@
#if HAVE_NATIVE_ecc_secp256r1_redc
# define ecc_secp256r1_redc _nettle_ecc_secp256r1_redc
void
-ecc_secp256r1_redc (const struct ecc_modulo *p, mp_limb_t *rp);
+ecc_secp256r1_redc (const struct ecc_modulo *p, mp_limb_t *rp, mp_limb_t *xp);
#else /* !HAVE_NATIVE_ecc_secp256r1_redc */
# if ECC_REDC_SIZE > 0
# define ecc_secp256r1_redc ecc_pp1_redc
@@ -70,14 +70,14 @@ ecc_secp256r1_redc (const struct ecc_modulo *p, mp_limb_t *rp);
#elif GMP_NUMB_BITS == 64
static void
-ecc_secp256r1_modp (const struct ecc_modulo *p, mp_limb_t *rp)
+ecc_secp256r1_modp (const struct ecc_modulo *p, mp_limb_t *rp, mp_limb_t *xp)
{
mp_limb_t u1, u0;
mp_size_t n;
n = 2*p->size;
- u1 = rp[--n];
- u0 = rp[n-1];
+ u1 = xp[--n];
+ u0 = xp[n-1];
/* This is not particularly fast, but should work well with assembly implementation. */
for (; n >= p->size; n--)
@@ -126,41 +126,43 @@ ecc_secp256r1_modp (const struct ecc_modulo *p, mp_limb_t *rp)
We multiply by two low limbs of p, 2^96 - 1, so we could use
shifts rather than mul.
*/
- t = mpn_submul_1 (rp + n - 4, p->m, 2, q1);
- t += mpn_cnd_sub_n (q2, rp + n - 3, rp + n - 3, p->m, 1);
+ t = mpn_submul_1 (xp + n - 4, p->m, 2, q1);
+ t += mpn_cnd_sub_n (q2, xp + n - 3, xp + n - 3, p->m, 1);
t += (-q2) & 0xffffffff;
- u0 = rp[n-2];
+ u0 = xp[n-2];
cy = (u0 < t);
u0 -= t;
t = (u1 < cy);
u1 -= cy;
- cy = mpn_cnd_add_n (t, rp + n - 4, rp + n - 4, p->m, 2);
+ cy = mpn_cnd_add_n (t, xp + n - 4, xp + n - 4, p->m, 2);
u0 += cy;
u1 += (u0 < cy);
u1 -= (-t) & 0xffffffff;
}
+ rp[0] = xp[0];
+ rp[1] = xp[1];
rp[2] = u0;
rp[3] = u1;
}
static void
-ecc_secp256r1_modq (const struct ecc_modulo *q, mp_limb_t *rp)
+ecc_secp256r1_modq (const struct ecc_modulo *q, mp_limb_t *rp, mp_limb_t *xp)
{
mp_limb_t u2, u1, u0;
mp_size_t n;
n = 2*q->size;
- u2 = rp[--n];
- u1 = rp[n-1];
+ u2 = xp[--n];
+ u1 = xp[n-1];
/* This is not particularly fast, but should work well with assembly implementation. */
for (; n >= q->size; n--)
{
mp_limb_t q2, q1, q0, t, c1, c0;
- u0 = rp[n-2];
+ u0 = xp[n-2];
/* <q2, q1, q0> = v * u2 + <u2,u1>, same method as above.
@@ -210,9 +212,9 @@ ecc_secp256r1_modq (const struct ecc_modulo *q, mp_limb_t *rp)
assert (q2 < 2);
- c0 = mpn_cnd_sub_n (q2, rp + n - 3, rp + n - 3, q->m, 1);
+ c0 = mpn_cnd_sub_n (q2, xp + n - 3, xp + n - 3, q->m, 1);
c0 += (-q2) & q->m[1];
- t = mpn_submul_1 (rp + n - 4, q->m, 2, q1);
+ t = mpn_submul_1 (xp + n - 4, q->m, 2, q1);
c0 += t;
c1 = c0 < t;
@@ -227,10 +229,12 @@ ecc_secp256r1_modq (const struct ecc_modulo *q, mp_limb_t *rp)
u1 += t;
u2 += (t<<32) + (u1 < t);
- t = mpn_cnd_add_n (t, rp + n - 4, rp + n - 4, q->m, 2);
+ t = mpn_cnd_add_n (t, xp + n - 4, xp + n - 4, q->m, 2);
u1 += t;
u2 += (u1 < t);
}
+ rp[0] = xp[0];
+ rp[1] = xp[1];
rp[2] = u1;
rp[3] = u2;
}
diff --git a/ecc-secp384r1.c b/ecc-secp384r1.c
index 317899e4..caa5d970 100644
--- a/ecc-secp384r1.c
+++ b/ecc-secp384r1.c
@@ -49,7 +49,7 @@
#if HAVE_NATIVE_ecc_secp384r1_modp
#define ecc_secp384r1_modp _nettle_ecc_secp384r1_modp
void
-ecc_secp384r1_modp (const struct ecc_modulo *m, mp_limb_t *rp);
+ecc_secp384r1_modp (const struct ecc_modulo *m, mp_limb_t *rp, mp_limb_t *xp);
#elif GMP_NUMB_BITS == 32
/* Use that 2^{384} = 2^{128} + 2^{96} - 2^{32} + 1, and eliminate 256
@@ -62,91 +62,92 @@ ecc_secp384r1_modp (const struct ecc_modulo *m, mp_limb_t *rp);
almost 8 at a time. Do only 7, to avoid additional carry
propagation, followed by 5. */
static void
-ecc_secp384r1_modp (const struct ecc_modulo *p, mp_limb_t *rp)
+ecc_secp384r1_modp (const struct ecc_modulo *p, mp_limb_t *rp, mp_limb_t *xp)
{
mp_limb_t cy, bw;
/* Reduce from 24 to 17 limbs. */
- cy = mpn_add_n (rp + 4, rp + 4, rp + 16, 8);
- cy = sec_add_1 (rp + 12, rp + 12, 3, cy);
+ cy = mpn_add_n (xp + 4, xp + 4, xp + 16, 8);
+ cy = sec_add_1 (xp + 12, xp + 12, 3, cy);
- bw = mpn_sub_n (rp + 5, rp + 5, rp + 16, 8);
- bw = sec_sub_1 (rp + 13, rp + 13, 3, bw);
+ bw = mpn_sub_n (xp + 5, xp + 5, xp + 16, 8);
+ bw = sec_sub_1 (xp + 13, xp + 13, 3, bw);
- cy += mpn_add_n (rp + 7, rp + 7, rp + 16, 8);
- cy = sec_add_1 (rp + 15, rp + 15, 1, cy);
+ cy += mpn_add_n (xp + 7, xp + 7, xp + 16, 8);
+ cy = sec_add_1 (xp + 15, xp + 15, 1, cy);
- cy += mpn_add_n (rp + 8, rp + 8, rp + 16, 8);
+ cy += mpn_add_n (xp + 8, xp + 8, xp + 16, 8);
assert (bw <= cy);
cy -= bw;
assert (cy <= 2);
- rp[16] = cy;
+ xp[16] = cy;
/* Reduce from 17 to 12 limbs */
- cy = mpn_add_n (rp, rp, rp + 12, 5);
- cy = sec_add_1 (rp + 5, rp + 5, 3, cy);
+ cy = mpn_add_n (xp, xp, xp + 12, 5);
+ cy = sec_add_1 (xp + 5, xp + 5, 3, cy);
- bw = mpn_sub_n (rp + 1, rp + 1, rp + 12, 5);
- bw = sec_sub_1 (rp + 6, rp + 6, 6, bw);
+ bw = mpn_sub_n (xp + 1, xp + 1, xp + 12, 5);
+ bw = sec_sub_1 (xp + 6, xp + 6, 6, bw);
- cy += mpn_add_n (rp + 3, rp + 3, rp + 12, 5);
- cy = sec_add_1 (rp + 8, rp + 8, 1, cy);
+ cy += mpn_add_n (xp + 3, xp + 3, xp + 12, 5);
+ cy = sec_add_1 (xp + 8, xp + 8, 1, cy);
- cy += mpn_add_n (rp + 4, rp + 4, rp + 12, 5);
- cy = sec_add_1 (rp + 9, rp + 9, 3, cy);
+ cy += mpn_add_n (xp + 4, xp + 4, xp + 12, 5);
+ cy = sec_add_1 (xp + 9, xp + 9, 3, cy);
assert (cy >= bw);
cy -= bw;
assert (cy <= 1);
- cy = mpn_cnd_add_n (cy, rp, rp, p->B, ECC_LIMB_SIZE);
+ cy = mpn_cnd_add_n (cy, rp, xp, p->B, ECC_LIMB_SIZE);
assert (cy == 0);
}
#elif GMP_NUMB_BITS == 64
/* p is 6 limbs, and B^6 - p = B^2 + 2^32 (B - 1) + 1. Eliminate 3
(almost 4) limbs at a time. */
static void
-ecc_secp384r1_modp (const struct ecc_modulo *p, mp_limb_t *rp)
+ecc_secp384r1_modp (const struct ecc_modulo *p, mp_limb_t *rp, mp_limb_t *xp)
{
mp_limb_t tp[6];
mp_limb_t cy;
/* Reduce from 12 to 9 limbs */
tp[0] = 0; /* FIXME: Could use mpn_sub_nc */
- mpn_copyi (tp + 1, rp + 8, 3);
- tp[4] = rp[11] - mpn_sub_n (tp, tp, rp + 8, 4);
+ mpn_copyi (tp + 1, xp + 8, 3);
+ tp[4] = xp[11] - mpn_sub_n (tp, tp, xp + 8, 4);
tp[5] = mpn_lshift (tp, tp, 5, 32);
- cy = mpn_add_n (rp + 2, rp + 2, rp + 8, 4);
- cy = sec_add_1 (rp + 6, rp + 6, 2, cy);
+ cy = mpn_add_n (xp + 2, xp + 2, xp + 8, 4);
+ cy = sec_add_1 (xp + 6, xp + 6, 2, cy);
- cy += mpn_add_n (rp + 2, rp + 2, tp, 6);
- cy += mpn_add_n (rp + 4, rp + 4, rp + 8, 4);
+ cy += mpn_add_n (xp + 2, xp + 2, tp, 6);
+ cy += mpn_add_n (xp + 4, xp + 4, xp + 8, 4);
assert (cy <= 2);
- rp[8] = cy;
+ xp[8] = cy;
/* Reduce from 9 to 6 limbs */
tp[0] = 0;
- mpn_copyi (tp + 1, rp + 6, 2);
- tp[3] = rp[8] - mpn_sub_n (tp, tp, rp + 6, 3);
+ mpn_copyi (tp + 1, xp + 6, 2);
+ tp[3] = xp[8] - mpn_sub_n (tp, tp, xp + 6, 3);
tp[4] = mpn_lshift (tp, tp, 4, 32);
- cy = mpn_add_n (rp, rp, rp + 6, 3);
- cy = sec_add_1 (rp + 3, rp + 3, 2, cy);
- cy += mpn_add_n (rp, rp, tp, 5);
- cy += mpn_add_n (rp + 2, rp + 2, rp + 6, 3);
+ cy = mpn_add_n (xp, xp, xp + 6, 3);
+ cy = sec_add_1 (xp + 3, xp + 3, 2, cy);
+ cy += mpn_add_n (xp, xp, tp, 5);
+ cy += mpn_add_n (xp + 2, xp + 2, xp + 6, 3);
- cy = sec_add_1 (rp + 5, rp + 5, 1, cy);
+ cy = sec_add_1 (xp + 5, xp + 5, 1, cy);
assert (cy <= 1);
- cy = mpn_cnd_add_n (cy, rp, rp, p->B, ECC_LIMB_SIZE);
- assert (cy == 0);
+ cy = mpn_cnd_add_n (cy, xp, xp, p->B, ECC_LIMB_SIZE);
+ assert (cy == 0);
+ mpn_copyi (rp, xp, ECC_LIMB_SIZE);
}
#else
#define ecc_secp384r1_modp ecc_mod
#endif
-
+
const struct ecc_curve _nettle_secp_384r1 =
{
{
diff --git a/ecc-secp521r1.c b/ecc-secp521r1.c
index 776f7ae0..ec875dbf 100644
--- a/ecc-secp521r1.c
+++ b/ecc-secp521r1.c
@@ -47,7 +47,7 @@
#if HAVE_NATIVE_ecc_secp521r1_modp
#define ecc_secp521r1_modp _nettle_ecc_secp521r1_modp
void
-ecc_secp521r1_modp (const struct ecc_modulo *m, mp_limb_t *rp);
+ecc_secp521r1_modp (const struct ecc_modulo *m, mp_limb_t *rp, mp_limb_t *xp);
#else
@@ -57,21 +57,21 @@ ecc_secp521r1_modp (const struct ecc_modulo *m, mp_limb_t *rp);
/* Result may be *slightly* larger than 2^521 */
static void
-ecc_secp521r1_modp (const struct ecc_modulo *m UNUSED, mp_limb_t *rp)
+ecc_secp521r1_modp (const struct ecc_modulo *m UNUSED, mp_limb_t *rp, mp_limb_t *xp)
{
/* FIXME: Should use mpn_addlsh_n_ip1 */
mp_limb_t hi;
/* Reduce from 2*ECC_LIMB_SIZE to ECC_LIMB_SIZE + 1 */
- rp[ECC_LIMB_SIZE]
- = mpn_addmul_1 (rp, rp + ECC_LIMB_SIZE, ECC_LIMB_SIZE, BMODP);
- hi = mpn_addmul_1 (rp, rp + ECC_LIMB_SIZE, 1, BMODP);
- hi = sec_add_1 (rp + 1, rp + 1, ECC_LIMB_SIZE - 1, hi);
+ xp[ECC_LIMB_SIZE]
+ = mpn_addmul_1 (xp, xp + ECC_LIMB_SIZE, ECC_LIMB_SIZE, BMODP);
+ hi = mpn_addmul_1 (xp, xp + ECC_LIMB_SIZE, 1, BMODP);
+ hi = sec_add_1 (xp + 1, xp + 1, ECC_LIMB_SIZE - 1, hi);
/* Combine hi with top bits, and add in. */
- hi = (hi << BMODP_SHIFT) | (rp[ECC_LIMB_SIZE-1] >> B_SHIFT);
- rp[ECC_LIMB_SIZE-1] = (rp[ECC_LIMB_SIZE-1]
+ hi = (hi << BMODP_SHIFT) | (xp[ECC_LIMB_SIZE-1] >> B_SHIFT);
+ rp[ECC_LIMB_SIZE-1] = (xp[ECC_LIMB_SIZE-1]
& (((mp_limb_t) 1 << B_SHIFT)-1))
- + sec_add_1 (rp, rp, ECC_LIMB_SIZE - 1, hi);
+ + sec_add_1 (rp, xp, ECC_LIMB_SIZE - 1, hi);
}
#endif
diff --git a/eddsa-hash.c b/eddsa-hash.c
index 3f21dac4..95e0dd70 100644
--- a/eddsa-hash.c
+++ b/eddsa-hash.c
@@ -74,5 +74,5 @@ _eddsa_hash (const struct ecc_modulo *m,
hi = mpn_cnd_add_n (hi, rp + m->size, rp + m->size, m->B, m->size);
assert (hi == 0);
}
- m->mod (m, rp);
+ m->mod (m, rp, rp);
}
diff --git a/examples/ecc-benchmark.c b/examples/ecc-benchmark.c
index a529cf16..3ab269c7 100644
--- a/examples/ecc-benchmark.c
+++ b/examples/ecc-benchmark.c
@@ -139,7 +139,7 @@ bench_modp (void *p)
{
struct ecc_ctx *ctx = (struct ecc_ctx *) p;
mpn_copyi (ctx->rp, ctx->ap, 2*ctx->ecc->p.size);
- ctx->ecc->p.mod (&ctx->ecc->p, ctx->rp);
+ ctx->ecc->p.mod (&ctx->ecc->p, ctx->rp, ctx->rp);
}
static void
@@ -147,7 +147,7 @@ bench_reduce (void *p)
{
struct ecc_ctx *ctx = (struct ecc_ctx *) p;
mpn_copyi (ctx->rp, ctx->ap, 2*ctx->ecc->p.size);
- ctx->ecc->p.reduce (&ctx->ecc->p, ctx->rp);
+ ctx->ecc->p.reduce (&ctx->ecc->p, ctx->rp, ctx->rp);
}
static void
@@ -155,7 +155,7 @@ bench_modq (void *p)
{
struct ecc_ctx *ctx = (struct ecc_ctx *) p;
mpn_copyi (ctx->rp, ctx->ap, 2*ctx->ecc->p.size);
- ctx->ecc->q.mod(&ctx->ecc->q, ctx->rp);
+ ctx->ecc->q.mod(&ctx->ecc->q, ctx->rp, ctx->rp);
}
static void
diff --git a/testsuite/ecc-mod-test.c b/testsuite/ecc-mod-test.c
index 41933b6f..8a074c7d 100644
--- a/testsuite/ecc-mod-test.c
+++ b/testsuite/ecc-mod-test.c
@@ -19,6 +19,15 @@ ref_mod (mp_limb_t *rp, const mp_limb_t *ap, const mp_limb_t *mp, mp_size_t mn)
#define MAX_SIZE (2*MAX_ECC_SIZE)
#define COUNT 50000
+/* Destructively normalize tp, then compare */
+static int
+mod_equal(const struct ecc_modulo *m, const mp_limb_t *ref, mp_limb_t *tp)
+{
+ if (mpn_cmp (tp, m->m, m->size) >= 0)
+ mpn_sub_n (tp, tp, m->m, m->size);
+ return mpn_cmp (ref, tp, m->size) == 0;
+}
+
static void
test_one(const char *name,
const struct ecc_modulo *m,
@@ -33,13 +42,10 @@ test_one(const char *name,
ref_mod (ref, a, m->m, m->size);
mpn_copyi (t, a, 2*m->size);
- m->mod (m, t);
- if (mpn_cmp (t, m->m, m->size) >= 0)
- mpn_sub_n (t, t, m->m, m->size);
-
- if (mpn_cmp (t, ref, m->size))
+ m->mod (m, t, t);
+ if (!mod_equal (m, ref, t))
{
- fprintf (stderr, "m->mod %s failed: bit_size = %u\n",
+ fprintf (stderr, "m->mod %s failed: bit_size = %u, rp == xp\n",
name, m->bit_size);
fprintf (stderr, "a = ");
@@ -52,16 +58,30 @@ test_one(const char *name,
abort ();
}
+ mpn_copyi (t, a, 2*m->size);
+ m->mod (m, t + m->size, t);
+ if (!mod_equal (m, ref, t + m->size))
+ {
+ fprintf (stderr, "m->mod %s failed: bit_size = %u, rp == xp + size\n",
+ name, m->bit_size);
+
+ fprintf (stderr, "a = ");
+ mpn_out_str (stderr, 16, a, 2*m->size);
+ fprintf (stderr, "\nt = ");
+ mpn_out_str (stderr, 16, t + m->size, m->size);
+ fprintf (stderr, " (bad)\nref = ");
+ mpn_out_str (stderr, 16, ref, m->size);
+ fprintf (stderr, "\n");
+ abort ();
+ }
+
if (m->B_size < m->size)
{
mpn_copyi (t, a, 2*m->size);
- ecc_mod (m, t);
- if (mpn_cmp (t, m->m, m->size) >= 0)
- mpn_sub_n (t, t, m->m, m->size);
-
- if (mpn_cmp (t, ref, m->size))
+ ecc_mod (m, t, t);
+ if (!mod_equal (m, ref, t))
{
- fprintf (stderr, "ecc_mod %s failed: bit_size = %u\n",
+ fprintf (stderr, "ecc_mod %s failed: bit_size = %u, rp == xp\n",
name, m->bit_size);
fprintf (stderr, "a = ");
mpn_out_str (stderr, 16, a, 2*m->size);
@@ -72,6 +92,22 @@ test_one(const char *name,
fprintf (stderr, "\n");
abort ();
}
+
+ mpn_copyi (t, a, 2*m->size);
+ ecc_mod (m, t + m->size, t);
+ if (!mod_equal (m, ref, t + m->size))
+ {
+ fprintf (stderr, "ecc_mod %s failed: bit_size = %u, rp == xp + size\n",
+ name, m->bit_size);
+ fprintf (stderr, "a = ");
+ mpn_out_str (stderr, 16, a, 2*m->size);
+ fprintf (stderr, "\nt = ");
+ mpn_out_str (stderr, 16, t + m->size, m->size);
+ fprintf (stderr, " (bad)\nref = ");
+ mpn_out_str (stderr, 16, ref, m->size);
+ fprintf (stderr, "\n");
+ abort ();
+ }
}
}
diff --git a/testsuite/ecc-redc-test.c b/testsuite/ecc-redc-test.c
index 2d165f43..2c18ea25 100644
--- a/testsuite/ecc-redc-test.c
+++ b/testsuite/ecc-redc-test.c
@@ -64,7 +64,7 @@ test_main (void)
if (ecc->p.reduce != ecc->p.mod)
{
mpn_copyi (m, a, 2*ecc->p.size);
- ecc->p.reduce (&ecc->p, m);
+ ecc->p.reduce (&ecc->p, m, m);
if (mpn_cmp (m, ecc->p.m, ecc->p.size) >= 0)
mpn_sub_n (m, m, ecc->p.m, ecc->p.size);
@@ -86,9 +86,9 @@ test_main (void)
{
mpn_copyi (m, a, 2*ecc->p.size);
if (ecc->p.m[0] == 1)
- ecc_pm1_redc (&ecc->p, m);
+ ecc_pm1_redc (&ecc->p, m, m);
else
- ecc_pp1_redc (&ecc->p, m);
+ ecc_pp1_redc (&ecc->p, m, m);
if (mpn_cmp (m, ecc->p.m, ecc->p.size) >= 0)
mpn_sub_n (m, m, ecc->p.m, ecc->p.size);