diff options
author | Niels Möller <nisse@lysator.liu.se> | 2020-10-30 15:01:39 +0100 |
---|---|---|
committer | Niels Möller <nisse@lysator.liu.se> | 2020-10-30 20:22:40 +0100 |
commit | f4f5625edb7d899972431b838ac19ced9288f68a (patch) | |
tree | bc18e9bd95b9f940bc6d4229e012c063562e8c9e | |
parent | cbf217b15f66a73041e6fe1a108453efd0297d27 (diff) | |
download | nettle-f4f5625edb7d899972431b838ac19ced9288f68a.tar.gz |
Add separate result argument to all mod functions.
* ecc-internal.h (typedef ecc_mod_func): Add separate result
argument. Updated all C implementations and callers.
-rw-r--r-- | ChangeLog | 5 | ||||
-rw-r--r-- | ecc-a-to-j.c | 4 | ||||
-rw-r--r-- | ecc-curve25519.c | 24 | ||||
-rw-r--r-- | ecc-curve448.c | 16 | ||||
-rw-r--r-- | ecc-gost-gc256b.c | 8 | ||||
-rw-r--r-- | ecc-gost-gc512a.c | 8 | ||||
-rw-r--r-- | ecc-internal.h | 6 | ||||
-rw-r--r-- | ecc-j-to-a.c | 2 | ||||
-rw-r--r-- | ecc-mod-arith.c | 4 | ||||
-rw-r--r-- | ecc-mod-inv.c | 4 | ||||
-rw-r--r-- | ecc-mod.c | 35 | ||||
-rw-r--r-- | ecc-pm1-redc.c | 10 | ||||
-rw-r--r-- | ecc-pp1-redc.c | 8 | ||||
-rw-r--r-- | ecc-secp192r1.c | 40 | ||||
-rw-r--r-- | ecc-secp224r1.c | 2 | ||||
-rw-r--r-- | ecc-secp256r1.c | 34 | ||||
-rw-r--r-- | ecc-secp384r1.c | 75 | ||||
-rw-r--r-- | ecc-secp521r1.c | 18 | ||||
-rw-r--r-- | eddsa-hash.c | 2 | ||||
-rw-r--r-- | examples/ecc-benchmark.c | 6 | ||||
-rw-r--r-- | testsuite/ecc-mod-test.c | 60 | ||||
-rw-r--r-- | testsuite/ecc-redc-test.c | 6 |
22 files changed, 215 insertions, 162 deletions
@@ -1,3 +1,8 @@ +2020-10-30 Niels Möller <nisse@lysator.liu.se> + + * ecc-internal.h (typedef ecc_mod_func): Add separate result + argument. Updated all C implementations and callers. + 2020-10-29 Niels Möller <nisse@lysator.liu.se> * ecc-mod.c (ecc_mod): More unified handling of final carry diff --git a/ecc-a-to-j.c b/ecc-a-to-j.c index 9fb0d2b8..5db5e4ea 100644 --- a/ecc-a-to-j.c +++ b/ecc-a-to-j.c @@ -47,10 +47,10 @@ ecc_a_to_j (const struct ecc_curve *ecc, mpn_copyd (r + ecc->p.size, p, 2*ecc->p.size); mpn_zero (r, ecc->p.size); - ecc->p.mod (&ecc->p, r); + ecc->p.mod (&ecc->p, r, r); mpn_zero (r + ecc->p.size, ecc->p.size); - ecc->p.mod (&ecc->p, r + ecc->p.size); + ecc->p.mod (&ecc->p, r + ecc->p.size, r + ecc->p.size); } else if (r != p) mpn_copyi (r, p, 2*ecc->p.size); diff --git a/ecc-curve25519.c b/ecc-curve25519.c index 05e772bc..bd2e19d2 100644 --- a/ecc-curve25519.c +++ b/ecc-curve25519.c @@ -50,7 +50,7 @@ #define ecc_curve25519_modp _nettle_ecc_curve25519_modp void -ecc_curve25519_modp (const struct ecc_modulo *m, mp_limb_t *rp); +ecc_curve25519_modp (const struct ecc_modulo *m, mp_limb_t *rp, mp_limb_t *xp); #else #if PHIGH_BITS == 0 @@ -58,16 +58,16 @@ ecc_curve25519_modp (const struct ecc_modulo *m, mp_limb_t *rp); #endif static void -ecc_curve25519_modp(const struct ecc_modulo *m UNUSED, mp_limb_t *rp) +ecc_curve25519_modp(const struct ecc_modulo *m UNUSED, mp_limb_t *rp, mp_limb_t *xp) { mp_limb_t hi, cy; - cy = mpn_addmul_1 (rp, rp + ECC_LIMB_SIZE, ECC_LIMB_SIZE, + cy = mpn_addmul_1 (xp, xp + ECC_LIMB_SIZE, ECC_LIMB_SIZE, (mp_limb_t) 19 << PHIGH_BITS); - hi = rp[ECC_LIMB_SIZE-1]; + hi = xp[ECC_LIMB_SIZE-1]; cy = (cy << PHIGH_BITS) + (hi >> (GMP_NUMB_BITS - PHIGH_BITS)); rp[ECC_LIMB_SIZE-1] = (hi & (GMP_NUMB_MASK >> PHIGH_BITS)) - + sec_add_1 (rp, rp, ECC_LIMB_SIZE - 1, 19 * cy); + + sec_add_1 (rp, xp, ECC_LIMB_SIZE - 1, 19 * cy); } #endif /* HAVE_NATIVE_ecc_curve25519_modp */ @@ -78,7 +78,7 @@ ecc_curve25519_modp(const struct ecc_modulo *m UNUSED, mp_limb_t *rp) #endif static void -ecc_curve25519_modq (const struct ecc_modulo *q, mp_limb_t *rp) +ecc_curve25519_modq (const struct ecc_modulo *q, mp_limb_t *rp, mp_limb_t *xp) { mp_size_t n; mp_limb_t cy; @@ -86,18 +86,18 @@ ecc_curve25519_modq (const struct ecc_modulo *q, mp_limb_t *rp) /* n is the offset where we add in the next term */ for (n = ECC_LIMB_SIZE; n-- > 0;) { - cy = mpn_submul_1 (rp + n, + cy = mpn_submul_1 (xp + n, q->B_shifted, ECC_LIMB_SIZE, - rp[n + ECC_LIMB_SIZE]); + xp[n + ECC_LIMB_SIZE]); /* Top limb of mBmodq_shifted is zero, so we get cy == 0 or 1 */ assert (cy < 2); - mpn_cnd_add_n (cy, rp+n, rp+n, q->m, ECC_LIMB_SIZE); + mpn_cnd_add_n (cy, xp+n, xp+n, q->m, ECC_LIMB_SIZE); } - cy = mpn_submul_1 (rp, q->m, ECC_LIMB_SIZE, - rp[ECC_LIMB_SIZE-1] >> (GMP_NUMB_BITS - QHIGH_BITS)); + cy = mpn_submul_1 (xp, q->m, ECC_LIMB_SIZE, + xp[ECC_LIMB_SIZE-1] >> (GMP_NUMB_BITS - QHIGH_BITS)); assert (cy < 2); - mpn_cnd_add_n (cy, rp, rp, q->m, ECC_LIMB_SIZE); + mpn_cnd_add_n (cy, rp, xp, q->m, ECC_LIMB_SIZE); } /* Computes a^{(p-5)/8} = a^{2^{252}-3} mod m. Needs 5 * n scratch diff --git a/ecc-curve448.c b/ecc-curve448.c index c00faa30..6b061606 100644 --- a/ecc-curve448.c +++ b/ecc-curve448.c @@ -48,10 +48,10 @@ #if HAVE_NATIVE_ecc_curve448_modp #define ecc_curve448_modp _nettle_ecc_curve448_modp void -ecc_curve448_modp (const struct ecc_modulo *m, mp_limb_t *rp); +ecc_curve448_modp (const struct ecc_modulo *m, mp_limb_t *rp, mp_limb_t *xp); #elif GMP_NUMB_BITS == 64 static void -ecc_curve448_modp(const struct ecc_modulo *m, mp_limb_t *rp) +ecc_curve448_modp(const struct ecc_modulo *m, mp_limb_t *rp, mp_limb_t *xp) { /* Let B = 2^64, b = 2^32 = sqrt(B). p = B^7 - b B^3 - 1 ==> B^7 = b B^3 + 1 @@ -79,18 +79,18 @@ ecc_curve448_modp(const struct ecc_modulo *m, mp_limb_t *rp) +----+----+----+----+----+----+----+ */ mp_limb_t c3, c4, c7; - mp_limb_t *tp = rp + 7; + mp_limb_t *tp = xp + 7; - c4 = mpn_add_n (rp, rp, rp + 7, 4); - c7 = mpn_addmul_1 (rp + 4, rp + 11, 3, 2); - c3 = mpn_addmul_1 (rp, rp + 11, 3, (mp_limb_t) 1 << 32); - c7 += mpn_addmul_1 (rp + 3, rp + 7, 4, (mp_limb_t) 1 << 32); + c4 = mpn_add_n (xp, xp, xp + 7, 4); + c7 = mpn_addmul_1 (xp + 4, xp + 11, 3, 2); + c3 = mpn_addmul_1 (xp, xp + 11, 3, (mp_limb_t) 1 << 32); + c7 += mpn_addmul_1 (xp + 3, xp + 7, 4, (mp_limb_t) 1 << 32); tp[0] = c7; tp[1] = tp[2] = 0; tp[3] = c3 + (c7 << 32); tp[4] = c4 + (c7 >> 32) + (tp[3] < c3); tp[5] = tp[6] = 0; - c7 = mpn_add_n (rp, rp, tp, 7); + c7 = mpn_add_n (rp, xp, tp, 7); c7 = mpn_cnd_add_n (c7, rp, rp, m->B, 7); assert (c7 == 0); } diff --git a/ecc-gost-gc256b.c b/ecc-gost-gc256b.c index a23d46fc..1490d7a8 100644 --- a/ecc-gost-gc256b.c +++ b/ecc-gost-gc256b.c @@ -43,14 +43,14 @@ #include "ecc-gost-gc256b.h" static void -ecc_gost_gc256b_modp (const struct ecc_modulo *m, mp_limb_t *rp) +ecc_gost_gc256b_modp (const struct ecc_modulo *m, mp_limb_t *rp, mp_limb_t *xp) { mp_size_t mn = m->size; mp_limb_t hi; - hi = mpn_addmul_1(rp, rp + mn, mn, 0x269); - hi = sec_add_1 (rp, rp, mn, hi * 0x269); - hi = sec_add_1 (rp, rp, mn, hi * 0x269); + hi = mpn_addmul_1(xp, xp + mn, mn, 0x269); + hi = sec_add_1 (xp, xp, mn, hi * 0x269); + hi = sec_add_1 (rp, xp, mn, hi * 0x269); assert(hi == 0); } diff --git a/ecc-gost-gc512a.c b/ecc-gost-gc512a.c index 398762c3..0f6e9b6d 100644 --- a/ecc-gost-gc512a.c +++ b/ecc-gost-gc512a.c @@ -43,14 +43,14 @@ #include "ecc-gost-gc512a.h" static void -ecc_gost_gc512a_modp (const struct ecc_modulo *m, mp_limb_t *rp) +ecc_gost_gc512a_modp (const struct ecc_modulo *m, mp_limb_t *rp, mp_limb_t *xp) { mp_size_t mn = m->size; mp_limb_t hi; - hi = mpn_addmul_1(rp, rp + mn, mn, 0x239); - hi = sec_add_1 (rp, rp, mn, hi * 0x239); - hi = sec_add_1 (rp, rp, mn, hi * 0x239); + hi = mpn_addmul_1(xp, xp + mn, mn, 0x239); + hi = sec_add_1 (xp, xp, mn, hi * 0x239); + hi = sec_add_1 (rp, xp, mn, hi * 0x239); assert(hi == 0); } diff --git a/ecc-internal.h b/ecc-internal.h index 3082917b..b2ee8350 100644 --- a/ecc-internal.h +++ b/ecc-internal.h @@ -114,8 +114,10 @@ struct ecc_modulo; /* Reduces from 2*ecc->size to ecc->size. */ /* Required to return a result < 2q. This property is inherited by - mod_mul and mod_sqr. */ -typedef void ecc_mod_func (const struct ecc_modulo *m, mp_limb_t *rp); + mod_mul and mod_sqr. May clobber input xp. rp may point to the + start or the middle of the xp area, but no other overlap is + allowed. */ +typedef void ecc_mod_func (const struct ecc_modulo *m, mp_limb_t *rp, mp_limb_t *xp); typedef void ecc_mod_inv_func (const struct ecc_modulo *m, mp_limb_t *vp, const mp_limb_t *ap, diff --git a/ecc-j-to-a.c b/ecc-j-to-a.c index fd329a48..915c056b 100644 --- a/ecc-j-to-a.c +++ b/ecc-j-to-a.c @@ -59,7 +59,7 @@ ecc_j_to_a (const struct ecc_curve *ecc, /* Divide this common factor by B, instead of applying redc to both x and y outputs. */ mpn_zero (iz2p + ecc->p.size, ecc->p.size); - ecc->p.reduce (&ecc->p, iz2p); + ecc->p.reduce (&ecc->p, iz2p, iz2p); } /* r_x <-- x / z^2 */ diff --git a/ecc-mod-arith.c b/ecc-mod-arith.c index 34a28544..686b56bf 100644 --- a/ecc-mod-arith.c +++ b/ecc-mod-arith.c @@ -115,7 +115,7 @@ ecc_mod_mul (const struct ecc_modulo *m, mp_limb_t *rp, const mp_limb_t *ap, const mp_limb_t *bp) { mpn_mul_n (rp, ap, bp, m->size); - m->reduce (m, rp); + m->reduce (m, rp, rp); } void @@ -123,7 +123,7 @@ ecc_mod_sqr (const struct ecc_modulo *m, mp_limb_t *rp, const mp_limb_t *ap) { mpn_sqr (rp, ap, m->size); - m->reduce (m, rp); + m->reduce (m, rp, rp); } /* Compute R <-- X^{2^k} mod M. Needs 2*ecc->size limbs at rp, and diff --git a/ecc-mod-inv.c b/ecc-mod-inv.c index e45c230a..570f0568 100644 --- a/ecc-mod-inv.c +++ b/ecc-mod-inv.c @@ -181,9 +181,9 @@ ecc_mod_inv_redc (const struct ecc_modulo *m, mpn_copyi (scratch, ap, m->size); mpn_zero (scratch + m->size, m->size); - m->reduce (m, scratch); + m->reduce (m, scratch, scratch); mpn_zero (scratch + m->size, m->size); - m->reduce (m, scratch); + m->reduce (m, scratch, scratch); ecc_mod_inv_destructive (m, vp, scratch); } @@ -39,9 +39,11 @@ #include "ecc-internal.h" -/* Computes r mod m, input 2*m->size, output m->size. */ +/* Computes r <-- x mod m, input 2*m->size, output m->size. It's + * allowed to have rp == xp or rp == xp + m->size, but no other kind + * of overlap is allowed. */ void -ecc_mod (const struct ecc_modulo *m, mp_limb_t *rp) +ecc_mod (const struct ecc_modulo *m, mp_limb_t *rp, mp_limb_t *xp) { mp_limb_t hi; mp_size_t mn = m->size; @@ -64,9 +66,9 @@ ecc_mod (const struct ecc_modulo *m, mp_limb_t *rp) rn -= sn; for (i = 0; i <= sn; i++) - rp[rn+i-1] = mpn_addmul_1 (rp + rn - mn - 1 + i, m->B, bn, rp[rn+i-1]); - rp[rn-1] = rp[rn+sn-1] - + mpn_add_n (rp + rn - sn - 1, rp + rn - sn - 1, rp + rn - 1, sn); + xp[rn+i-1] = mpn_addmul_1 (xp + rn - mn - 1 + i, m->B, bn, xp[rn+i-1]); + xp[rn-1] = xp[rn+sn-1] + + mpn_add_n (xp + rn - sn - 1, xp + rn - sn - 1, xp + rn - 1, sn); } } else @@ -76,10 +78,10 @@ ecc_mod (const struct ecc_modulo *m, mp_limb_t *rp) rn -= sn; for (i = 0; i < sn; i++) - rp[rn+i] = mpn_addmul_1 (rp + rn - mn + i, m->B, bn, rp[rn+i]); + xp[rn+i] = mpn_addmul_1 (xp + rn - mn + i, m->B, bn, xp[rn+i]); - hi = mpn_add_n (rp + rn - sn, rp + rn - sn, rp + rn, sn); - hi = mpn_cnd_add_n (hi, rp + rn - mn, rp + rn - mn, m->B, mn); + hi = mpn_add_n (xp + rn - sn, xp + rn - sn, xp + rn, sn); + hi = mpn_cnd_add_n (hi, xp + rn - mn, xp + rn - mn, m->B, mn); assert (hi == 0); } } @@ -89,23 +91,26 @@ ecc_mod (const struct ecc_modulo *m, mp_limb_t *rp) assert (rn <= sn); for (i = 0; i < rn; i++) - rp[mn+i] = mpn_addmul_1 (rp + i, m->B, bn, rp[mn+i]); + xp[mn+i] = mpn_addmul_1 (xp + i, m->B, bn, xp[mn+i]); - hi = mpn_add_n (rp + bn, rp + bn, rp + mn, rn); + hi = mpn_add_n (xp + bn, xp + bn, xp + mn, rn); if (rn < sn) - hi = sec_add_1 (rp + bn + rn, rp + bn + rn, sn - rn, hi); + hi = sec_add_1 (xp + bn + rn, xp + bn + rn, sn - rn, hi); shift = m->size * GMP_NUMB_BITS - m->bit_size; if (shift > 0) { /* Combine hi with top bits, add in */ - hi = (hi << shift) | (rp[mn-1] >> (GMP_NUMB_BITS - shift)); - rp[mn-1] = (rp[mn-1] & (((mp_limb_t) 1 << (GMP_NUMB_BITS - shift)) - 1)) - + mpn_addmul_1 (rp, m->B_shifted, mn-1, hi); + hi = (hi << shift) | (xp[mn-1] >> (GMP_NUMB_BITS - shift)); + xp[mn-1] = (xp[mn-1] & (((mp_limb_t) 1 << (GMP_NUMB_BITS - shift)) - 1)) + + mpn_addmul_1 (xp, m->B_shifted, mn-1, hi); + /* FIXME: Can this copying be eliminated? */ + if (rp != xp) + mpn_copyi (rp, xp, mn); } else { - hi = mpn_cnd_add_n (hi, rp, rp, m->B, mn); + hi = mpn_cnd_add_n (hi, rp, xp, m->B, mn); assert (hi == 0); } } diff --git a/ecc-pm1-redc.c b/ecc-pm1-redc.c index 1b07b793..cc95c6e4 100644 --- a/ecc-pm1-redc.c +++ b/ecc-pm1-redc.c @@ -42,7 +42,7 @@ /* Use that 1 = - (p - 1) (mod p), and that at least one low limb of p - 1 is zero. */ void -ecc_pm1_redc (const struct ecc_modulo *m, mp_limb_t *rp) +ecc_pm1_redc (const struct ecc_modulo *m, mp_limb_t *rp, mp_limb_t *xp) { unsigned i; mp_limb_t hi, cy; @@ -50,10 +50,10 @@ ecc_pm1_redc (const struct ecc_modulo *m, mp_limb_t *rp) mp_size_t k = m->redc_size; for (i = 0; i < m->size; i++) - rp[i] = mpn_submul_1 (rp + i + k, - m->redc_mpm1, m->size - k, rp[i]); - hi = mpn_sub_n (rp, rp + m->size, rp, m->size); - cy = mpn_cnd_add_n (hi, rp, rp, m->m, m->size); + xp[i] = mpn_submul_1 (xp + i + k, + m->redc_mpm1, m->size - k, xp[i]); + hi = mpn_sub_n (xp, xp + m->size, xp, m->size); + cy = mpn_cnd_add_n (hi, rp, xp, m->m, m->size); assert (cy == hi); if (shift > 0) diff --git a/ecc-pp1-redc.c b/ecc-pp1-redc.c index 9f643d97..b088c4c5 100644 --- a/ecc-pp1-redc.c +++ b/ecc-pp1-redc.c @@ -42,7 +42,7 @@ /* Use that 1 = p + 1 (mod p), and that at least one low limb of p + 1 is zero. */ void -ecc_pp1_redc (const struct ecc_modulo *m, mp_limb_t *rp) +ecc_pp1_redc (const struct ecc_modulo *m, mp_limb_t *rp, mp_limb_t *xp) { unsigned i; mp_limb_t hi, cy; @@ -50,9 +50,9 @@ ecc_pp1_redc (const struct ecc_modulo *m, mp_limb_t *rp) mp_size_t k = m->redc_size; for (i = 0; i < m->size; i++) - rp[i] = mpn_addmul_1 (rp + i + k, - m->redc_mpm1, m->size - k, rp[i]); - hi = mpn_add_n (rp, rp, rp + m->size, m->size); + xp[i] = mpn_addmul_1 (xp + i + k, + m->redc_mpm1, m->size - k, xp[i]); + hi = mpn_add_n (rp, xp, xp + m->size, m->size); if (shift > 0) { hi = (hi << shift) | (rp[m->size - 1] >> (GMP_NUMB_BITS - shift)); diff --git a/ecc-secp192r1.c b/ecc-secp192r1.c index 05c26408..ec97477c 100644 --- a/ecc-secp192r1.c +++ b/ecc-secp192r1.c @@ -52,57 +52,57 @@ #define ecc_secp192r1_modp _nettle_ecc_secp192r1_modp void -ecc_secp192r1_modp (const struct ecc_modulo *m, mp_limb_t *rp); +ecc_secp192r1_modp (const struct ecc_modulo *m, mp_limb_t *rp, mp_limb_t *xp); /* Use that p = 2^{192} - 2^64 - 1, to eliminate 128 bits at a time. */ #elif GMP_NUMB_BITS == 32 /* p is 6 limbs, p = B^6 - B^2 - 1 */ static void -ecc_secp192r1_modp (const struct ecc_modulo *m UNUSED, mp_limb_t *rp) +ecc_secp192r1_modp (const struct ecc_modulo *m UNUSED, mp_limb_t *rp, mp_limb_t *xp) { mp_limb_t cy; /* Reduce from 12 to 9 limbs (top limb small)*/ - cy = mpn_add_n (rp + 2, rp + 2, rp + 8, 4); - cy = sec_add_1 (rp + 6, rp + 6, 2, cy); - cy += mpn_add_n (rp + 4, rp + 4, rp + 8, 4); + cy = mpn_add_n (xp + 2, xp + 2, xp + 8, 4); + cy = sec_add_1 (xp + 6, xp + 6, 2, cy); + cy += mpn_add_n (xp + 4, xp + 4, xp + 8, 4); assert (cy <= 2); - rp[8] = cy; + xp[8] = cy; /* Reduce from 9 to 6 limbs */ - cy = mpn_add_n (rp, rp, rp + 6, 3); - cy = sec_add_1 (rp + 3, rp + 3, 2, cy); - cy += mpn_add_n (rp + 2, rp + 2, rp + 6, 3); - cy = sec_add_1 (rp + 5, rp + 5, 1, cy); + cy = mpn_add_n (xp, xp, xp + 6, 3); + cy = sec_add_1 (xp + 3, xp + 3, 2, cy); + cy += mpn_add_n (xp + 2, xp + 2, xp + 6, 3); + cy = sec_add_1 (xp + 5, xp + 5, 1, cy); assert (cy <= 1); - cy = mpn_cnd_add_n (cy, rp, rp, ecc_Bmodp, 6); + cy = mpn_cnd_add_n (cy, rp, xp, ecc_Bmodp, 6); assert (cy == 0); } #elif GMP_NUMB_BITS == 64 /* p is 3 limbs, p = B^3 - B - 1 */ static void -ecc_secp192r1_modp (const struct ecc_modulo *m UNUSED, mp_limb_t *rp) +ecc_secp192r1_modp (const struct ecc_modulo *m UNUSED, mp_limb_t *rp, mp_limb_t *xp) { mp_limb_t cy; /* Reduce from 6 to 5 limbs (top limb small)*/ - cy = mpn_add_n (rp + 1, rp + 1, rp + 4, 2); - cy = sec_add_1 (rp + 3, rp + 3, 1, cy); - cy += mpn_add_n (rp + 2, rp + 2, rp + 4, 2); + cy = mpn_add_n (xp + 1, xp + 1, xp + 4, 2); + cy = sec_add_1 (xp + 3, xp + 3, 1, cy); + cy += mpn_add_n (xp + 2, xp + 2, xp + 4, 2); assert (cy <= 2); - rp[4] = cy; + xp[4] = cy; /* Reduce from 5 to 4 limbs (high limb small) */ - cy = mpn_add_n (rp, rp, rp + 3, 2); - cy = sec_add_1 (rp + 2, rp + 2, 1, cy); - cy += mpn_add_n (rp + 1, rp + 1, rp + 3, 2); + cy = mpn_add_n (xp, xp, xp + 3, 2); + cy = sec_add_1 (xp + 2, xp + 2, 1, cy); + cy += mpn_add_n (xp + 1, xp + 1, xp + 3, 2); assert (cy <= 1); - cy = mpn_cnd_add_n (cy, rp, rp, ecc_Bmodp, 3); + cy = mpn_cnd_add_n (cy, rp, xp, ecc_Bmodp, 3); assert (cy == 0); } diff --git a/ecc-secp224r1.c b/ecc-secp224r1.c index c8d4d40e..30a9b5a9 100644 --- a/ecc-secp224r1.c +++ b/ecc-secp224r1.c @@ -45,7 +45,7 @@ #define USE_REDC 0 #define ecc_secp224r1_modp _nettle_ecc_secp224r1_modp void -ecc_secp224r1_modp (const struct ecc_modulo *m, mp_limb_t *rp); +ecc_secp224r1_modp (const struct ecc_modulo *m, mp_limb_t *rp, mp_limb_t *xp); #else #define USE_REDC (ECC_REDC_SIZE != 0) diff --git a/ecc-secp256r1.c b/ecc-secp256r1.c index 4b153327..e7ac62c4 100644 --- a/ecc-secp256r1.c +++ b/ecc-secp256r1.c @@ -53,7 +53,7 @@ #if HAVE_NATIVE_ecc_secp256r1_redc # define ecc_secp256r1_redc _nettle_ecc_secp256r1_redc void -ecc_secp256r1_redc (const struct ecc_modulo *p, mp_limb_t *rp); +ecc_secp256r1_redc (const struct ecc_modulo *p, mp_limb_t *rp, mp_limb_t *xp); #else /* !HAVE_NATIVE_ecc_secp256r1_redc */ # if ECC_REDC_SIZE > 0 # define ecc_secp256r1_redc ecc_pp1_redc @@ -70,14 +70,14 @@ ecc_secp256r1_redc (const struct ecc_modulo *p, mp_limb_t *rp); #elif GMP_NUMB_BITS == 64 static void -ecc_secp256r1_modp (const struct ecc_modulo *p, mp_limb_t *rp) +ecc_secp256r1_modp (const struct ecc_modulo *p, mp_limb_t *rp, mp_limb_t *xp) { mp_limb_t u1, u0; mp_size_t n; n = 2*p->size; - u1 = rp[--n]; - u0 = rp[n-1]; + u1 = xp[--n]; + u0 = xp[n-1]; /* This is not particularly fast, but should work well with assembly implementation. */ for (; n >= p->size; n--) @@ -126,41 +126,43 @@ ecc_secp256r1_modp (const struct ecc_modulo *p, mp_limb_t *rp) We multiply by two low limbs of p, 2^96 - 1, so we could use shifts rather than mul. */ - t = mpn_submul_1 (rp + n - 4, p->m, 2, q1); - t += mpn_cnd_sub_n (q2, rp + n - 3, rp + n - 3, p->m, 1); + t = mpn_submul_1 (xp + n - 4, p->m, 2, q1); + t += mpn_cnd_sub_n (q2, xp + n - 3, xp + n - 3, p->m, 1); t += (-q2) & 0xffffffff; - u0 = rp[n-2]; + u0 = xp[n-2]; cy = (u0 < t); u0 -= t; t = (u1 < cy); u1 -= cy; - cy = mpn_cnd_add_n (t, rp + n - 4, rp + n - 4, p->m, 2); + cy = mpn_cnd_add_n (t, xp + n - 4, xp + n - 4, p->m, 2); u0 += cy; u1 += (u0 < cy); u1 -= (-t) & 0xffffffff; } + rp[0] = xp[0]; + rp[1] = xp[1]; rp[2] = u0; rp[3] = u1; } static void -ecc_secp256r1_modq (const struct ecc_modulo *q, mp_limb_t *rp) +ecc_secp256r1_modq (const struct ecc_modulo *q, mp_limb_t *rp, mp_limb_t *xp) { mp_limb_t u2, u1, u0; mp_size_t n; n = 2*q->size; - u2 = rp[--n]; - u1 = rp[n-1]; + u2 = xp[--n]; + u1 = xp[n-1]; /* This is not particularly fast, but should work well with assembly implementation. */ for (; n >= q->size; n--) { mp_limb_t q2, q1, q0, t, c1, c0; - u0 = rp[n-2]; + u0 = xp[n-2]; /* <q2, q1, q0> = v * u2 + <u2,u1>, same method as above. @@ -210,9 +212,9 @@ ecc_secp256r1_modq (const struct ecc_modulo *q, mp_limb_t *rp) assert (q2 < 2); - c0 = mpn_cnd_sub_n (q2, rp + n - 3, rp + n - 3, q->m, 1); + c0 = mpn_cnd_sub_n (q2, xp + n - 3, xp + n - 3, q->m, 1); c0 += (-q2) & q->m[1]; - t = mpn_submul_1 (rp + n - 4, q->m, 2, q1); + t = mpn_submul_1 (xp + n - 4, q->m, 2, q1); c0 += t; c1 = c0 < t; @@ -227,10 +229,12 @@ ecc_secp256r1_modq (const struct ecc_modulo *q, mp_limb_t *rp) u1 += t; u2 += (t<<32) + (u1 < t); - t = mpn_cnd_add_n (t, rp + n - 4, rp + n - 4, q->m, 2); + t = mpn_cnd_add_n (t, xp + n - 4, xp + n - 4, q->m, 2); u1 += t; u2 += (u1 < t); } + rp[0] = xp[0]; + rp[1] = xp[1]; rp[2] = u1; rp[3] = u2; } diff --git a/ecc-secp384r1.c b/ecc-secp384r1.c index 317899e4..caa5d970 100644 --- a/ecc-secp384r1.c +++ b/ecc-secp384r1.c @@ -49,7 +49,7 @@ #if HAVE_NATIVE_ecc_secp384r1_modp #define ecc_secp384r1_modp _nettle_ecc_secp384r1_modp void -ecc_secp384r1_modp (const struct ecc_modulo *m, mp_limb_t *rp); +ecc_secp384r1_modp (const struct ecc_modulo *m, mp_limb_t *rp, mp_limb_t *xp); #elif GMP_NUMB_BITS == 32 /* Use that 2^{384} = 2^{128} + 2^{96} - 2^{32} + 1, and eliminate 256 @@ -62,91 +62,92 @@ ecc_secp384r1_modp (const struct ecc_modulo *m, mp_limb_t *rp); almost 8 at a time. Do only 7, to avoid additional carry propagation, followed by 5. */ static void -ecc_secp384r1_modp (const struct ecc_modulo *p, mp_limb_t *rp) +ecc_secp384r1_modp (const struct ecc_modulo *p, mp_limb_t *rp, mp_limb_t *xp) { mp_limb_t cy, bw; /* Reduce from 24 to 17 limbs. */ - cy = mpn_add_n (rp + 4, rp + 4, rp + 16, 8); - cy = sec_add_1 (rp + 12, rp + 12, 3, cy); + cy = mpn_add_n (xp + 4, xp + 4, xp + 16, 8); + cy = sec_add_1 (xp + 12, xp + 12, 3, cy); - bw = mpn_sub_n (rp + 5, rp + 5, rp + 16, 8); - bw = sec_sub_1 (rp + 13, rp + 13, 3, bw); + bw = mpn_sub_n (xp + 5, xp + 5, xp + 16, 8); + bw = sec_sub_1 (xp + 13, xp + 13, 3, bw); - cy += mpn_add_n (rp + 7, rp + 7, rp + 16, 8); - cy = sec_add_1 (rp + 15, rp + 15, 1, cy); + cy += mpn_add_n (xp + 7, xp + 7, xp + 16, 8); + cy = sec_add_1 (xp + 15, xp + 15, 1, cy); - cy += mpn_add_n (rp + 8, rp + 8, rp + 16, 8); + cy += mpn_add_n (xp + 8, xp + 8, xp + 16, 8); assert (bw <= cy); cy -= bw; assert (cy <= 2); - rp[16] = cy; + xp[16] = cy; /* Reduce from 17 to 12 limbs */ - cy = mpn_add_n (rp, rp, rp + 12, 5); - cy = sec_add_1 (rp + 5, rp + 5, 3, cy); + cy = mpn_add_n (xp, xp, xp + 12, 5); + cy = sec_add_1 (xp + 5, xp + 5, 3, cy); - bw = mpn_sub_n (rp + 1, rp + 1, rp + 12, 5); - bw = sec_sub_1 (rp + 6, rp + 6, 6, bw); + bw = mpn_sub_n (xp + 1, xp + 1, xp + 12, 5); + bw = sec_sub_1 (xp + 6, xp + 6, 6, bw); - cy += mpn_add_n (rp + 3, rp + 3, rp + 12, 5); - cy = sec_add_1 (rp + 8, rp + 8, 1, cy); + cy += mpn_add_n (xp + 3, xp + 3, xp + 12, 5); + cy = sec_add_1 (xp + 8, xp + 8, 1, cy); - cy += mpn_add_n (rp + 4, rp + 4, rp + 12, 5); - cy = sec_add_1 (rp + 9, rp + 9, 3, cy); + cy += mpn_add_n (xp + 4, xp + 4, xp + 12, 5); + cy = sec_add_1 (xp + 9, xp + 9, 3, cy); assert (cy >= bw); cy -= bw; assert (cy <= 1); - cy = mpn_cnd_add_n (cy, rp, rp, p->B, ECC_LIMB_SIZE); + cy = mpn_cnd_add_n (cy, rp, xp, p->B, ECC_LIMB_SIZE); assert (cy == 0); } #elif GMP_NUMB_BITS == 64 /* p is 6 limbs, and B^6 - p = B^2 + 2^32 (B - 1) + 1. Eliminate 3 (almost 4) limbs at a time. */ static void -ecc_secp384r1_modp (const struct ecc_modulo *p, mp_limb_t *rp) +ecc_secp384r1_modp (const struct ecc_modulo *p, mp_limb_t *rp, mp_limb_t *xp) { mp_limb_t tp[6]; mp_limb_t cy; /* Reduce from 12 to 9 limbs */ tp[0] = 0; /* FIXME: Could use mpn_sub_nc */ - mpn_copyi (tp + 1, rp + 8, 3); - tp[4] = rp[11] - mpn_sub_n (tp, tp, rp + 8, 4); + mpn_copyi (tp + 1, xp + 8, 3); + tp[4] = xp[11] - mpn_sub_n (tp, tp, xp + 8, 4); tp[5] = mpn_lshift (tp, tp, 5, 32); - cy = mpn_add_n (rp + 2, rp + 2, rp + 8, 4); - cy = sec_add_1 (rp + 6, rp + 6, 2, cy); + cy = mpn_add_n (xp + 2, xp + 2, xp + 8, 4); + cy = sec_add_1 (xp + 6, xp + 6, 2, cy); - cy += mpn_add_n (rp + 2, rp + 2, tp, 6); - cy += mpn_add_n (rp + 4, rp + 4, rp + 8, 4); + cy += mpn_add_n (xp + 2, xp + 2, tp, 6); + cy += mpn_add_n (xp + 4, xp + 4, xp + 8, 4); assert (cy <= 2); - rp[8] = cy; + xp[8] = cy; /* Reduce from 9 to 6 limbs */ tp[0] = 0; - mpn_copyi (tp + 1, rp + 6, 2); - tp[3] = rp[8] - mpn_sub_n (tp, tp, rp + 6, 3); + mpn_copyi (tp + 1, xp + 6, 2); + tp[3] = xp[8] - mpn_sub_n (tp, tp, xp + 6, 3); tp[4] = mpn_lshift (tp, tp, 4, 32); - cy = mpn_add_n (rp, rp, rp + 6, 3); - cy = sec_add_1 (rp + 3, rp + 3, 2, cy); - cy += mpn_add_n (rp, rp, tp, 5); - cy += mpn_add_n (rp + 2, rp + 2, rp + 6, 3); + cy = mpn_add_n (xp, xp, xp + 6, 3); + cy = sec_add_1 (xp + 3, xp + 3, 2, cy); + cy += mpn_add_n (xp, xp, tp, 5); + cy += mpn_add_n (xp + 2, xp + 2, xp + 6, 3); - cy = sec_add_1 (rp + 5, rp + 5, 1, cy); + cy = sec_add_1 (xp + 5, xp + 5, 1, cy); assert (cy <= 1); - cy = mpn_cnd_add_n (cy, rp, rp, p->B, ECC_LIMB_SIZE); - assert (cy == 0); + cy = mpn_cnd_add_n (cy, xp, xp, p->B, ECC_LIMB_SIZE); + assert (cy == 0); + mpn_copyi (rp, xp, ECC_LIMB_SIZE); } #else #define ecc_secp384r1_modp ecc_mod #endif - + const struct ecc_curve _nettle_secp_384r1 = { { diff --git a/ecc-secp521r1.c b/ecc-secp521r1.c index 776f7ae0..ec875dbf 100644 --- a/ecc-secp521r1.c +++ b/ecc-secp521r1.c @@ -47,7 +47,7 @@ #if HAVE_NATIVE_ecc_secp521r1_modp #define ecc_secp521r1_modp _nettle_ecc_secp521r1_modp void -ecc_secp521r1_modp (const struct ecc_modulo *m, mp_limb_t *rp); +ecc_secp521r1_modp (const struct ecc_modulo *m, mp_limb_t *rp, mp_limb_t *xp); #else @@ -57,21 +57,21 @@ ecc_secp521r1_modp (const struct ecc_modulo *m, mp_limb_t *rp); /* Result may be *slightly* larger than 2^521 */ static void -ecc_secp521r1_modp (const struct ecc_modulo *m UNUSED, mp_limb_t *rp) +ecc_secp521r1_modp (const struct ecc_modulo *m UNUSED, mp_limb_t *rp, mp_limb_t *xp) { /* FIXME: Should use mpn_addlsh_n_ip1 */ mp_limb_t hi; /* Reduce from 2*ECC_LIMB_SIZE to ECC_LIMB_SIZE + 1 */ - rp[ECC_LIMB_SIZE] - = mpn_addmul_1 (rp, rp + ECC_LIMB_SIZE, ECC_LIMB_SIZE, BMODP); - hi = mpn_addmul_1 (rp, rp + ECC_LIMB_SIZE, 1, BMODP); - hi = sec_add_1 (rp + 1, rp + 1, ECC_LIMB_SIZE - 1, hi); + xp[ECC_LIMB_SIZE] + = mpn_addmul_1 (xp, xp + ECC_LIMB_SIZE, ECC_LIMB_SIZE, BMODP); + hi = mpn_addmul_1 (xp, xp + ECC_LIMB_SIZE, 1, BMODP); + hi = sec_add_1 (xp + 1, xp + 1, ECC_LIMB_SIZE - 1, hi); /* Combine hi with top bits, and add in. */ - hi = (hi << BMODP_SHIFT) | (rp[ECC_LIMB_SIZE-1] >> B_SHIFT); - rp[ECC_LIMB_SIZE-1] = (rp[ECC_LIMB_SIZE-1] + hi = (hi << BMODP_SHIFT) | (xp[ECC_LIMB_SIZE-1] >> B_SHIFT); + rp[ECC_LIMB_SIZE-1] = (xp[ECC_LIMB_SIZE-1] & (((mp_limb_t) 1 << B_SHIFT)-1)) - + sec_add_1 (rp, rp, ECC_LIMB_SIZE - 1, hi); + + sec_add_1 (rp, xp, ECC_LIMB_SIZE - 1, hi); } #endif diff --git a/eddsa-hash.c b/eddsa-hash.c index 3f21dac4..95e0dd70 100644 --- a/eddsa-hash.c +++ b/eddsa-hash.c @@ -74,5 +74,5 @@ _eddsa_hash (const struct ecc_modulo *m, hi = mpn_cnd_add_n (hi, rp + m->size, rp + m->size, m->B, m->size); assert (hi == 0); } - m->mod (m, rp); + m->mod (m, rp, rp); } diff --git a/examples/ecc-benchmark.c b/examples/ecc-benchmark.c index a529cf16..3ab269c7 100644 --- a/examples/ecc-benchmark.c +++ b/examples/ecc-benchmark.c @@ -139,7 +139,7 @@ bench_modp (void *p) { struct ecc_ctx *ctx = (struct ecc_ctx *) p; mpn_copyi (ctx->rp, ctx->ap, 2*ctx->ecc->p.size); - ctx->ecc->p.mod (&ctx->ecc->p, ctx->rp); + ctx->ecc->p.mod (&ctx->ecc->p, ctx->rp, ctx->rp); } static void @@ -147,7 +147,7 @@ bench_reduce (void *p) { struct ecc_ctx *ctx = (struct ecc_ctx *) p; mpn_copyi (ctx->rp, ctx->ap, 2*ctx->ecc->p.size); - ctx->ecc->p.reduce (&ctx->ecc->p, ctx->rp); + ctx->ecc->p.reduce (&ctx->ecc->p, ctx->rp, ctx->rp); } static void @@ -155,7 +155,7 @@ bench_modq (void *p) { struct ecc_ctx *ctx = (struct ecc_ctx *) p; mpn_copyi (ctx->rp, ctx->ap, 2*ctx->ecc->p.size); - ctx->ecc->q.mod(&ctx->ecc->q, ctx->rp); + ctx->ecc->q.mod(&ctx->ecc->q, ctx->rp, ctx->rp); } static void diff --git a/testsuite/ecc-mod-test.c b/testsuite/ecc-mod-test.c index 41933b6f..8a074c7d 100644 --- a/testsuite/ecc-mod-test.c +++ b/testsuite/ecc-mod-test.c @@ -19,6 +19,15 @@ ref_mod (mp_limb_t *rp, const mp_limb_t *ap, const mp_limb_t *mp, mp_size_t mn) #define MAX_SIZE (2*MAX_ECC_SIZE) #define COUNT 50000 +/* Destructively normalize tp, then compare */ +static int +mod_equal(const struct ecc_modulo *m, const mp_limb_t *ref, mp_limb_t *tp) +{ + if (mpn_cmp (tp, m->m, m->size) >= 0) + mpn_sub_n (tp, tp, m->m, m->size); + return mpn_cmp (ref, tp, m->size) == 0; +} + static void test_one(const char *name, const struct ecc_modulo *m, @@ -33,13 +42,10 @@ test_one(const char *name, ref_mod (ref, a, m->m, m->size); mpn_copyi (t, a, 2*m->size); - m->mod (m, t); - if (mpn_cmp (t, m->m, m->size) >= 0) - mpn_sub_n (t, t, m->m, m->size); - - if (mpn_cmp (t, ref, m->size)) + m->mod (m, t, t); + if (!mod_equal (m, ref, t)) { - fprintf (stderr, "m->mod %s failed: bit_size = %u\n", + fprintf (stderr, "m->mod %s failed: bit_size = %u, rp == xp\n", name, m->bit_size); fprintf (stderr, "a = "); @@ -52,16 +58,30 @@ test_one(const char *name, abort (); } + mpn_copyi (t, a, 2*m->size); + m->mod (m, t + m->size, t); + if (!mod_equal (m, ref, t + m->size)) + { + fprintf (stderr, "m->mod %s failed: bit_size = %u, rp == xp + size\n", + name, m->bit_size); + + fprintf (stderr, "a = "); + mpn_out_str (stderr, 16, a, 2*m->size); + fprintf (stderr, "\nt = "); + mpn_out_str (stderr, 16, t + m->size, m->size); + fprintf (stderr, " (bad)\nref = "); + mpn_out_str (stderr, 16, ref, m->size); + fprintf (stderr, "\n"); + abort (); + } + if (m->B_size < m->size) { mpn_copyi (t, a, 2*m->size); - ecc_mod (m, t); - if (mpn_cmp (t, m->m, m->size) >= 0) - mpn_sub_n (t, t, m->m, m->size); - - if (mpn_cmp (t, ref, m->size)) + ecc_mod (m, t, t); + if (!mod_equal (m, ref, t)) { - fprintf (stderr, "ecc_mod %s failed: bit_size = %u\n", + fprintf (stderr, "ecc_mod %s failed: bit_size = %u, rp == xp\n", name, m->bit_size); fprintf (stderr, "a = "); mpn_out_str (stderr, 16, a, 2*m->size); @@ -72,6 +92,22 @@ test_one(const char *name, fprintf (stderr, "\n"); abort (); } + + mpn_copyi (t, a, 2*m->size); + ecc_mod (m, t + m->size, t); + if (!mod_equal (m, ref, t + m->size)) + { + fprintf (stderr, "ecc_mod %s failed: bit_size = %u, rp == xp + size\n", + name, m->bit_size); + fprintf (stderr, "a = "); + mpn_out_str (stderr, 16, a, 2*m->size); + fprintf (stderr, "\nt = "); + mpn_out_str (stderr, 16, t + m->size, m->size); + fprintf (stderr, " (bad)\nref = "); + mpn_out_str (stderr, 16, ref, m->size); + fprintf (stderr, "\n"); + abort (); + } } } diff --git a/testsuite/ecc-redc-test.c b/testsuite/ecc-redc-test.c index 2d165f43..2c18ea25 100644 --- a/testsuite/ecc-redc-test.c +++ b/testsuite/ecc-redc-test.c @@ -64,7 +64,7 @@ test_main (void) if (ecc->p.reduce != ecc->p.mod) { mpn_copyi (m, a, 2*ecc->p.size); - ecc->p.reduce (&ecc->p, m); + ecc->p.reduce (&ecc->p, m, m); if (mpn_cmp (m, ecc->p.m, ecc->p.size) >= 0) mpn_sub_n (m, m, ecc->p.m, ecc->p.size); @@ -86,9 +86,9 @@ test_main (void) { mpn_copyi (m, a, 2*ecc->p.size); if (ecc->p.m[0] == 1) - ecc_pm1_redc (&ecc->p, m); + ecc_pm1_redc (&ecc->p, m, m); else - ecc_pp1_redc (&ecc->p, m); + ecc_pp1_redc (&ecc->p, m, m); if (mpn_cmp (m, ecc->p.m, ecc->p.size) >= 0) mpn_sub_n (m, m, ecc->p.m, ecc->p.size); |