summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNiels Möller <nisse@lysator.liu.se>2020-11-01 10:22:48 +0100
committerNiels Möller <nisse@lysator.liu.se>2020-11-01 10:22:48 +0100
commit172448b05931e791ce3470b45afe8de4cdffaaad (patch)
treecd7d238a5ba07568c71619a9bb7819e107cfe205
parent98402b3b32b346a568c0d0d8e56a64f27116250c (diff)
downloadnettle-172448b05931e791ce3470b45afe8de4cdffaaad.tar.gz
Separate result area for ecc_mod_mul and ecc_mod_sqr.
* ecc-mod-arith.c (ecc_mod_mul, ecc_mod_sqr): Separate argument for scratch area, reducing required size of result area. Update all callers to naïvely keep using result in scratch area. (ecc_mod_pow_2k, ecc_mod_pow_2k_mul): Simplified, also reducing required size of result area.
-rw-r--r--ChangeLog6
-rw-r--r--curve25519-eh-to-x.c2
-rw-r--r--curve448-eh-to-x.c4
-rw-r--r--ecc-add-eh.c22
-rw-r--r--ecc-add-ehh.c24
-rw-r--r--ecc-add-jja.c22
-rw-r--r--ecc-add-jjj.c32
-rw-r--r--ecc-add-th.c22
-rw-r--r--ecc-add-thh.c24
-rw-r--r--ecc-curve25519.c54
-rw-r--r--ecc-curve448.c48
-rw-r--r--ecc-dup-eh.c14
-rw-r--r--ecc-dup-jj.c16
-rw-r--r--ecc-dup-th.c14
-rw-r--r--ecc-ecdsa-sign.c4
-rw-r--r--ecc-ecdsa-verify.c4
-rw-r--r--ecc-eh-to-a.c4
-rw-r--r--ecc-gostdsa-sign.c4
-rw-r--r--ecc-gostdsa-verify.c4
-rw-r--r--ecc-internal.h15
-rw-r--r--ecc-j-to-a.c8
-rw-r--r--ecc-mod-arith.c41
-rw-r--r--ecc-mul-m.c38
-rw-r--r--eddsa-decompress.c4
-rw-r--r--eddsa-sign.c2
-rw-r--r--eddsa-verify.c4
-rw-r--r--gostdsa-vko.c2
27 files changed, 214 insertions, 224 deletions
diff --git a/ChangeLog b/ChangeLog
index eeae4181..cf5b4ce2 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,11 @@
2020-11-01 Niels Möller <nisse@lysator.liu.se>
+ * ecc-mod-arith.c (ecc_mod_mul, ecc_mod_sqr): Separate argument
+ for scratch area, reducing required size of result area. Update
+ all callers to naïvely keep using result in scratch area.
+ (ecc_mod_pow_2k, ecc_mod_pow_2k_mul): Simplified, also reducing
+ required size of result area.
+
* testsuite/testutils.c (test_ecc_point): Show curve bits on failure.
2020-10-31 Niels Möller <nisse@lysator.liu.se>
diff --git a/curve25519-eh-to-x.c b/curve25519-eh-to-x.c
index 1ce2dd83..08ad3d91 100644
--- a/curve25519-eh-to-x.c
+++ b/curve25519-eh-to-x.c
@@ -69,7 +69,7 @@ curve25519_eh_to_x (mp_limb_t *xp, const mp_limb_t *p,
ecc->p.invert (&ecc->p, t1, t0, t2 + ecc->p.size);
ecc_mod_add (&ecc->p, t0, wp, vp);
- ecc_mod_mul (&ecc->p, t2, t0, t1);
+ ecc_mod_mul (&ecc->p, t2, t0, t1, t2);
cy = mpn_sub_n (xp, t2, ecc->p.m, ecc->p.size);
cnd_copy (cy, xp, t2, ecc->p.size);
diff --git a/curve448-eh-to-x.c b/curve448-eh-to-x.c
index ffeb83c1..6e3367ee 100644
--- a/curve448-eh-to-x.c
+++ b/curve448-eh-to-x.c
@@ -61,8 +61,8 @@ curve448_eh_to_x (mp_limb_t *xp, const mp_limb_t *p, mp_limb_t *scratch)
*/
/* Needs a total of 9*size storage. */
ecc->p.invert (&ecc->p, t0, p, t1 + ecc->p.size);
- ecc_mod_mul (&ecc->p, t1, t0, vp);
- ecc_mod_mul (&ecc->p, t2, t1, t1);
+ ecc_mod_mul (&ecc->p, t1, t0, vp, t1);
+ ecc_mod_mul (&ecc->p, t2, t1, t1, t2);
cy = mpn_sub_n (xp, t2, ecc->p.m, ecc->p.size);
cnd_copy (cy, xp, t2, ecc->p.size);
diff --git a/ecc-add-eh.c b/ecc-add-eh.c
index 05faa752..20adcf38 100644
--- a/ecc-add-eh.c
+++ b/ecc-add-eh.c
@@ -78,30 +78,30 @@ ecc_add_eh (const struct ecc_curve *ecc,
#define F D
#define G E
- ecc_mod_mul (&ecc->p, C, x1, x2);
- ecc_mod_mul (&ecc->p, D, y1, y2);
+ ecc_mod_mul (&ecc->p, C, x1, x2, C);
+ ecc_mod_mul (&ecc->p, D, y1, y2, D);
ecc_mod_add (&ecc->p, x3, x1, y1);
ecc_mod_add (&ecc->p, y3, x2, y2);
- ecc_mod_mul (&ecc->p, T, x3, y3);
+ ecc_mod_mul (&ecc->p, T, x3, y3, T);
ecc_mod_sub (&ecc->p, T, T, C);
ecc_mod_sub (&ecc->p, T, T, D);
- ecc_mod_mul (&ecc->p, x3, C, D);
- ecc_mod_mul (&ecc->p, E, x3, ecc->b);
+ ecc_mod_mul (&ecc->p, x3, C, D, x3);
+ ecc_mod_mul (&ecc->p, E, x3, ecc->b, E);
ecc_mod_sub (&ecc->p, C, D, C);
- ecc_mod_sqr (&ecc->p, B, z1);
+ ecc_mod_sqr (&ecc->p, B, z1, B);
ecc_mod_sub (&ecc->p, F, B, E);
ecc_mod_add (&ecc->p, G, B, E);
/* x3 */
- ecc_mod_mul (&ecc->p, B, F, T);
- ecc_mod_mul (&ecc->p, x3, B, z1);
+ ecc_mod_mul (&ecc->p, B, F, T, B);
+ ecc_mod_mul (&ecc->p, x3, B, z1, x3);
/* y3 */
- ecc_mod_mul (&ecc->p, B, G, z1);
- ecc_mod_mul (&ecc->p, y3, B, C); /* Clobbers z1 in case r == p. */
+ ecc_mod_mul (&ecc->p, B, G, z1, B);
+ ecc_mod_mul (&ecc->p, y3, B, C, y3); /* Clobbers z1 in case r == p. */
/* z3 */
- ecc_mod_mul (&ecc->p, B, F, G);
+ ecc_mod_mul (&ecc->p, B, F, G, B);
mpn_copyi (z3, B, ecc->p.size);
}
diff --git a/ecc-add-ehh.c b/ecc-add-ehh.c
index 1c57a728..414ded83 100644
--- a/ecc-add-ehh.c
+++ b/ecc-add-ehh.c
@@ -80,32 +80,32 @@ ecc_add_ehh (const struct ecc_curve *ecc,
#define F D
#define G E
- ecc_mod_mul (&ecc->p, C, x1, x2);
- ecc_mod_mul (&ecc->p, D, y1, y2);
+ ecc_mod_mul (&ecc->p, C, x1, x2, C);
+ ecc_mod_mul (&ecc->p, D, y1, y2, D);
ecc_mod_add (&ecc->p, A, x1, y1);
ecc_mod_add (&ecc->p, B, x2, y2);
- ecc_mod_mul (&ecc->p, T, A, B);
+ ecc_mod_mul (&ecc->p, T, A, B, T);
ecc_mod_sub (&ecc->p, T, T, C);
ecc_mod_sub (&ecc->p, T, T, D);
- ecc_mod_mul (&ecc->p, x3, C, D);
- ecc_mod_mul (&ecc->p, E, x3, ecc->b);
+ ecc_mod_mul (&ecc->p, x3, C, D, x3);
+ ecc_mod_mul (&ecc->p, E, x3, ecc->b, E);
ecc_mod_sub (&ecc->p, C, D, C);
- ecc_mod_mul (&ecc->p, A, z1, z2);
- ecc_mod_sqr (&ecc->p, B, A);
+ ecc_mod_mul (&ecc->p, A, z1, z2, A);
+ ecc_mod_sqr (&ecc->p, B, A, B);
ecc_mod_sub (&ecc->p, F, B, E);
ecc_mod_add (&ecc->p, G, B, E);
/* x3 */
- ecc_mod_mul (&ecc->p, B, F, T);
- ecc_mod_mul (&ecc->p, x3, B, A);
+ ecc_mod_mul (&ecc->p, B, F, T, B);
+ ecc_mod_mul (&ecc->p, x3, B, A, x3);
/* y3 */
- ecc_mod_mul (&ecc->p, B, G, C);
- ecc_mod_mul (&ecc->p, y3, B, A);
+ ecc_mod_mul (&ecc->p, B, G, C, B);
+ ecc_mod_mul (&ecc->p, y3, B, A, y3);
/* z3 */
- ecc_mod_mul (&ecc->p, B, F, G);
+ ecc_mod_mul (&ecc->p, B, F, G, B);
mpn_copyi (z3, B, ecc->p.size);
}
diff --git a/ecc-add-jja.c b/ecc-add-jja.c
index 037711d3..a8f92fe6 100644
--- a/ecc-add-jja.c
+++ b/ecc-add-jja.c
@@ -85,41 +85,41 @@ ecc_add_jja (const struct ecc_curve *ecc,
#define y2 (q + ecc->p.size)
/* zz */
- ecc_mod_sqr (&ecc->p, zz, z1);
+ ecc_mod_sqr (&ecc->p, zz, z1, zz);
/* h*/
- ecc_mod_mul (&ecc->p, h, x2, zz);
+ ecc_mod_mul (&ecc->p, h, x2, zz, h);
ecc_mod_sub (&ecc->p, h, h, x1);
/* hh */
- ecc_mod_sqr (&ecc->p, hh, h);
+ ecc_mod_sqr (&ecc->p, hh, h, hh);
/* Do z^3 early, store at w. */
- ecc_mod_mul (&ecc->p, w, zz, z1);
+ ecc_mod_mul (&ecc->p, w, zz, z1, w);
/* z_3, use j area for scratch */
ecc_mod_add (&ecc->p, r + 2*ecc->p.size, p + 2*ecc->p.size, h);
- ecc_mod_sqr (&ecc->p, j, r + 2*ecc->p.size);
+ ecc_mod_sqr (&ecc->p, j, r + 2*ecc->p.size, j);
ecc_mod_sub (&ecc->p, j, j, zz);
ecc_mod_sub (&ecc->p, r + 2*ecc->p.size, j, hh);
/* w */
- ecc_mod_mul (&ecc->p, j, y2, w);
+ ecc_mod_mul (&ecc->p, j, y2, w, j);
ecc_mod_sub (&ecc->p, w, j, y1);
ecc_mod_mul_1 (&ecc->p, w, w, 2);
/* i replaces hh, j */
ecc_mod_mul_1 (&ecc->p, hh, hh, 4);
- ecc_mod_mul (&ecc->p, j, hh, h);
+ ecc_mod_mul (&ecc->p, j, hh, h, j);
/* v */
- ecc_mod_mul (&ecc->p, v, x1, hh);
+ ecc_mod_mul (&ecc->p, v, x1, hh, v);
/* x_3, use (h, hh) as sqratch */
- ecc_mod_sqr (&ecc->p, h, w);
+ ecc_mod_sqr (&ecc->p, h, w, h);
ecc_mod_sub (&ecc->p, r, h, j);
ecc_mod_submul_1 (&ecc->p, r, v, 2);
/* y_3, use (h, hh) as sqratch */
- ecc_mod_mul (&ecc->p, h, y1, j); /* frees j */
+ ecc_mod_mul (&ecc->p, h, y1, j, h); /* frees j */
ecc_mod_sub (&ecc->p, r + ecc->p.size, v, r);
- ecc_mod_mul (&ecc->p, j, r + ecc->p.size, w);
+ ecc_mod_mul (&ecc->p, j, r + ecc->p.size, w, j);
ecc_mod_submul_1 (&ecc->p, j, h, 2);
mpn_copyi (r + ecc->p.size, j, ecc->p.size);
}
diff --git a/ecc-add-jjj.c b/ecc-add-jjj.c
index 54b2246a..5c416b81 100644
--- a/ecc-add-jjj.c
+++ b/ecc-add-jjj.c
@@ -74,25 +74,25 @@ ecc_add_jjj (const struct ecc_curve *ecc,
mp_limb_t *v = scratch + 6*ecc->p.size;
/* z1^2, z2^2, u1 = x1 x2^2, u2 = x2 z1^2 - u1 */
- ecc_mod_sqr (&ecc->p, z1z1, p + 2*ecc->p.size);
- ecc_mod_sqr (&ecc->p, z2z2, q + 2*ecc->p.size);
- ecc_mod_mul (&ecc->p, u1, p, z2z2);
- ecc_mod_mul (&ecc->p, u2, q, z1z1);
+ ecc_mod_sqr (&ecc->p, z1z1, p + 2*ecc->p.size, z1z1);
+ ecc_mod_sqr (&ecc->p, z2z2, q + 2*ecc->p.size, z2z2);
+ ecc_mod_mul (&ecc->p, u1, p, z2z2, u1);
+ ecc_mod_mul (&ecc->p, u2, q, z1z1, u2);
ecc_mod_sub (&ecc->p, u2, u2, u1); /* Store h in u2 */
/* z3, use i, j, v as scratch, result at i. */
ecc_mod_add (&ecc->p, i, p + 2*ecc->p.size, q + 2*ecc->p.size);
- ecc_mod_sqr (&ecc->p, v, i);
+ ecc_mod_sqr (&ecc->p, v, i, v);
ecc_mod_sub (&ecc->p, v, v, z1z1);
ecc_mod_sub (&ecc->p, v, v, z2z2);
- ecc_mod_mul (&ecc->p, i, v, u2);
+ ecc_mod_mul (&ecc->p, i, v, u2, i);
/* Delayed write, to support in-place operation. */
/* s1 = y1 z2^3, s2 = y2 z1^3, scratch at j and v */
- ecc_mod_mul (&ecc->p, j, z1z1, p + 2*ecc->p.size); /* z1^3 */
- ecc_mod_mul (&ecc->p, v, z2z2, q + 2*ecc->p.size); /* z2^3 */
- ecc_mod_mul (&ecc->p, s1, p + ecc->p.size, v);
- ecc_mod_mul (&ecc->p, v, j, q + ecc->p.size);
+ ecc_mod_mul (&ecc->p, j, z1z1, p + 2*ecc->p.size, j); /* z1^3 */
+ ecc_mod_mul (&ecc->p, v, z2z2, q + 2*ecc->p.size, v); /* z2^3 */
+ ecc_mod_mul (&ecc->p, s1, p + ecc->p.size, v, s1);
+ ecc_mod_mul (&ecc->p, v, j, q + ecc->p.size, v);
ecc_mod_sub (&ecc->p, s2, v, s1);
ecc_mod_mul_1 (&ecc->p, s2, s2, 2);
@@ -100,21 +100,21 @@ ecc_add_jjj (const struct ecc_curve *ecc,
mpn_copyi (r + 2*ecc->p.size, i, ecc->p.size);
/* i, j, v */
- ecc_mod_sqr (&ecc->p, i, u2);
+ ecc_mod_sqr (&ecc->p, i, u2, i);
ecc_mod_mul_1 (&ecc->p, i, i, 4);
- ecc_mod_mul (&ecc->p, j, u2, i);
- ecc_mod_mul (&ecc->p, v, u1, i);
+ ecc_mod_mul (&ecc->p, j, u2, i, j);
+ ecc_mod_mul (&ecc->p, v, u1, i, v);
/* now, u1, u2 and i are free for reuse .*/
/* x3, use u1, u2 as scratch */
- ecc_mod_sqr (&ecc->p, u1, s2);
+ ecc_mod_sqr (&ecc->p, u1, s2, u1);
ecc_mod_sub (&ecc->p, r, u1, j);
ecc_mod_submul_1 (&ecc->p, r, v, 2);
/* y3 */
- ecc_mod_mul (&ecc->p, u1, s1, j); /* Frees j */
+ ecc_mod_mul (&ecc->p, u1, s1, j, u1); /* Frees j */
ecc_mod_sub (&ecc->p, u2, v, r); /* Frees v */
- ecc_mod_mul (&ecc->p, i, s2, u2);
+ ecc_mod_mul (&ecc->p, i, s2, u2, i);
ecc_mod_submul_1 (&ecc->p, i, u1, 2);
mpn_copyi (r + ecc->p.size, i, ecc->p.size);
}
diff --git a/ecc-add-th.c b/ecc-add-th.c
index 1d61a32e..92028052 100644
--- a/ecc-add-th.c
+++ b/ecc-add-th.c
@@ -84,30 +84,30 @@ ecc_add_th (const struct ecc_curve *ecc,
#define F D
#define G E
- ecc_mod_mul (&ecc->p, C, x1, x2);
- ecc_mod_mul (&ecc->p, D, y1, y2);
+ ecc_mod_mul (&ecc->p, C, x1, x2, C);
+ ecc_mod_mul (&ecc->p, D, y1, y2, D);
ecc_mod_add (&ecc->p, x3, x1, y1);
ecc_mod_add (&ecc->p, y3, x2, y2);
- ecc_mod_mul (&ecc->p, T, x3, y3);
+ ecc_mod_mul (&ecc->p, T, x3, y3, T);
ecc_mod_sub (&ecc->p, T, T, C);
ecc_mod_sub (&ecc->p, T, T, D);
- ecc_mod_mul (&ecc->p, x3, C, D);
- ecc_mod_mul (&ecc->p, E, x3, ecc->b);
+ ecc_mod_mul (&ecc->p, x3, C, D, x3);
+ ecc_mod_mul (&ecc->p, E, x3, ecc->b, E);
ecc_mod_add (&ecc->p, C, D, C);
- ecc_mod_sqr (&ecc->p, B, z1);
+ ecc_mod_sqr (&ecc->p, B, z1, B);
ecc_mod_sub (&ecc->p, F, B, E);
ecc_mod_add (&ecc->p, G, B, E);
/* x3 */
- ecc_mod_mul (&ecc->p, B, G, T);
- ecc_mod_mul (&ecc->p, x3, B, z1);
+ ecc_mod_mul (&ecc->p, B, G, T, B);
+ ecc_mod_mul (&ecc->p, x3, B, z1, x3);
/* y3 */
- ecc_mod_mul (&ecc->p, B, F, z1);
- ecc_mod_mul (&ecc->p, y3, B, C); /* Clobbers z1 in case r == p. */
+ ecc_mod_mul (&ecc->p, B, F, z1, B);
+ ecc_mod_mul (&ecc->p, y3, B, C, y3); /* Clobbers z1 in case r == p. */
/* z3 */
- ecc_mod_mul (&ecc->p, B, F, G);
+ ecc_mod_mul (&ecc->p, B, F, G, B);
mpn_copyi (z3, B, ecc->p.size);
}
diff --git a/ecc-add-thh.c b/ecc-add-thh.c
index 59b33f38..80d05d7e 100644
--- a/ecc-add-thh.c
+++ b/ecc-add-thh.c
@@ -85,32 +85,32 @@ ecc_add_thh (const struct ecc_curve *ecc,
#define F D
#define G E
- ecc_mod_mul (&ecc->p, C, x1, x2);
- ecc_mod_mul (&ecc->p, D, y1, y2);
+ ecc_mod_mul (&ecc->p, C, x1, x2, C);
+ ecc_mod_mul (&ecc->p, D, y1, y2, D);
ecc_mod_add (&ecc->p, A, x1, y1);
ecc_mod_add (&ecc->p, B, x2, y2);
- ecc_mod_mul (&ecc->p, T, A, B);
+ ecc_mod_mul (&ecc->p, T, A, B, T);
ecc_mod_sub (&ecc->p, T, T, C);
ecc_mod_sub (&ecc->p, T, T, D);
- ecc_mod_mul (&ecc->p, x3, C, D);
- ecc_mod_mul (&ecc->p, E, x3, ecc->b);
+ ecc_mod_mul (&ecc->p, x3, C, D, x3);
+ ecc_mod_mul (&ecc->p, E, x3, ecc->b, E);
ecc_mod_add (&ecc->p, C, D, C);
- ecc_mod_mul (&ecc->p, A, z1, z2);
- ecc_mod_sqr (&ecc->p, B, A);
+ ecc_mod_mul (&ecc->p, A, z1, z2, A);
+ ecc_mod_sqr (&ecc->p, B, A, B);
ecc_mod_sub (&ecc->p, F, B, E);
ecc_mod_add (&ecc->p, G, B, E);
/* x3 */
- ecc_mod_mul (&ecc->p, B, G, T);
- ecc_mod_mul (&ecc->p, x3, B, A);
+ ecc_mod_mul (&ecc->p, B, G, T, B);
+ ecc_mod_mul (&ecc->p, x3, B, A, x3);
/* y3 */
- ecc_mod_mul (&ecc->p, B, F, C);
- ecc_mod_mul (&ecc->p, y3, B, A);
+ ecc_mod_mul (&ecc->p, B, F, C, B);
+ ecc_mod_mul (&ecc->p, y3, B, A, y3);
/* z3 */
- ecc_mod_mul (&ecc->p, B, F, G);
+ ecc_mod_mul (&ecc->p, B, F, G, B);
mpn_copyi (z3, B, ecc->p.size);
}
diff --git a/ecc-curve25519.c b/ecc-curve25519.c
index bd2e19d2..88a955bc 100644
--- a/ecc-curve25519.c
+++ b/ecc-curve25519.c
@@ -126,25 +126,25 @@ ecc_mod_pow_252m3 (const struct ecc_modulo *m,
*/
ecc_mod_pow_2kp1 (m, t0, ap, 1, t1); /* a^3 */
- ecc_mod_sqr (m, rp, t0); /* a^6 */
- ecc_mod_mul (m, a7, rp, ap); /* a^7 */
+ ecc_mod_sqr (m, rp, t0, rp); /* a^6 */
+ ecc_mod_mul (m, a7, rp, ap, a7); /* a^7 */
ecc_mod_pow_2kp1 (m, rp, a7, 3, t0); /* a^63 = a^{2^6-1} */
- ecc_mod_sqr (m, t0, rp); /* a^{2^7-2} */
- ecc_mod_mul (m, rp, t0, ap); /* a^{2^7-1} */
+ ecc_mod_sqr (m, t0, rp, t0); /* a^{2^7-2} */
+ ecc_mod_mul (m, rp, t0, ap, rp); /* a^{2^7-1} */
ecc_mod_pow_2kp1 (m, t0, rp, 7, t1); /* a^{2^14-1}*/
ecc_mod_pow_2kp1 (m, rp, t0, 14, t1); /* a^{2^28-1} */
- ecc_mod_sqr (m, t0, rp); /* a^{2^29-2} */
- ecc_mod_sqr (m, t1, t0); /* a^{2^30-4} */
- ecc_mod_sqr (m, t0, t1); /* a^{2^31-8} */
- ecc_mod_mul (m, rp, t0, a7); /* a^{2^31-1} */
+ ecc_mod_sqr (m, t0, rp, t0); /* a^{2^29-2} */
+ ecc_mod_sqr (m, t1, t0, t1); /* a^{2^30-4} */
+ ecc_mod_sqr (m, t0, t1, t0); /* a^{2^31-8} */
+ ecc_mod_mul (m, rp, t0, a7, rp); /* a^{2^31-1} */
ecc_mod_pow_2kp1 (m, t0, rp, 31, t1); /* a^{2^62-1} */
ecc_mod_pow_2kp1 (m, rp, t0, 62, t1); /* a^{2^124-1}*/
- ecc_mod_sqr (m, t0, rp); /* a^{2^125-2} */
- ecc_mod_mul (m, rp, t0, ap); /* a^{2^125-1} */
+ ecc_mod_sqr (m, t0, rp, t0); /* a^{2^125-2} */
+ ecc_mod_mul (m, rp, t0, ap, rp); /* a^{2^125-1} */
ecc_mod_pow_2kp1 (m, t0, rp, 125, t1);/* a^{2^250-1} */
- ecc_mod_sqr (m, rp, t0); /* a^{2^251-2} */
- ecc_mod_sqr (m, t0, rp); /* a^{2^252-4} */
- ecc_mod_mul (m, rp, t0, ap); /* a^{2^252-3} */
+ ecc_mod_sqr (m, rp, t0, rp); /* a^{2^251-2} */
+ ecc_mod_sqr (m, t0, rp, t0); /* a^{2^252-4} */
+ ecc_mod_mul (m, rp, t0, ap, rp); /* a^{2^252-3} */
#undef t0
#undef t1
#undef a7
@@ -165,11 +165,11 @@ static void ecc_curve25519_inv (const struct ecc_modulo *p,
= 1 + 2 (1 + 4 (2^{252}-3))
*/
ecc_mod_pow_252m3 (p, rp, ap, t0);
- ecc_mod_sqr (p, t0, rp);
- ecc_mod_sqr (p, rp, t0);
- ecc_mod_mul (p, t0, ap, rp);
- ecc_mod_sqr (p, rp, t0);
- ecc_mod_mul (p, t0, ap, rp);
+ ecc_mod_sqr (p, t0, rp, t0);
+ ecc_mod_sqr (p, rp, t0, rp);
+ ecc_mod_mul (p, t0, ap, rp, t0);
+ ecc_mod_sqr (p, rp, t0, rp);
+ ecc_mod_mul (p, t0, ap, rp, t0);
mpn_copyi (rp, t0, ECC_LIMB_SIZE); /* FIXME: Eliminate copy? */
#undef t0
}
@@ -232,23 +232,23 @@ ecc_curve25519_sqrt(const struct ecc_modulo *p, mp_limb_t *rp,
#define t0 (scratch + 2*ECC_LIMB_SIZE)
/* Live values */
- ecc_mod_sqr (p, v2, vp); /* v2 */
- ecc_mod_mul (p, uv, up, vp); /* uv, v2 */
- ecc_mod_mul (p, uv3, uv, v2); /* uv3, v2 */
- ecc_mod_sqr (p, v4, v2); /* uv3, v4 */
- ecc_mod_mul (p, uv7, uv3, v4); /* uv3, uv7 */
+ ecc_mod_sqr (p, v2, vp, v2); /* v2 */
+ ecc_mod_mul (p, uv, up, vp, uv); /* uv, v2 */
+ ecc_mod_mul (p, uv3, uv, v2, uv3); /* uv3, v2 */
+ ecc_mod_sqr (p, v4, v2, v4); /* uv3, v4 */
+ ecc_mod_mul (p, uv7, uv3, v4, uv7); /* uv3, uv7 */
ecc_mod_pow_252m3 (p, uv7p, uv7, scratch_out); /* uv3, uv7p */
- ecc_mod_mul (p, rp, uv7p, uv3); /* none */
+ ecc_mod_mul (p, rp, uv7p, uv3, rp); /* none */
/* Check sign. If square root exists, have v x^2 = ±u */
- ecc_mod_sqr (p, x2, rp);
- ecc_mod_mul (p, vx2, x2, vp);
+ ecc_mod_sqr (p, x2, rp, x2);
+ ecc_mod_mul (p, vx2, x2, vp, vx2);
ecc_mod_add (p, t0, vx2, up);
neg = ecc_curve25519_zero_p (p, t0);
ecc_mod_sub (p, t0, up, vx2);
pos = ecc_curve25519_zero_p (p, t0);
- ecc_mod_mul (p, t0, rp, ecc_sqrt_z);
+ ecc_mod_mul (p, t0, rp, ecc_sqrt_z, t0);
cnd_copy (neg, rp, t0, ECC_LIMB_SIZE);
return pos | neg;
diff --git a/ecc-curve448.c b/ecc-curve448.c
index 6b061606..634e6ae6 100644
--- a/ecc-curve448.c
+++ b/ecc-curve448.c
@@ -110,31 +110,31 @@ ecc_mod_pow_446m224m1 (const struct ecc_modulo *p,
#define t1 (scratch + 1*ECC_LIMB_SIZE)
#define t2 (scratch + 3*ECC_LIMB_SIZE)
- ecc_mod_sqr (p, rp, ap); /* a^2 */
- ecc_mod_mul (p, t0, ap, rp); /* a^3 */
- ecc_mod_sqr (p, rp, t0); /* a^6 */
- ecc_mod_mul (p, t0, ap, rp); /* a^{2^3-1} */
+ ecc_mod_sqr (p, rp, ap, rp); /* a^2 */
+ ecc_mod_mul (p, t0, ap, rp, t0); /* a^3 */
+ ecc_mod_sqr (p, rp, t0, rp); /* a^6 */
+ ecc_mod_mul (p, t0, ap, rp,t0); /* a^{2^3-1} */
ecc_mod_pow_2kp1 (p, t1, t0, 3, rp); /* a^{2^6-1} */
ecc_mod_pow_2k (p, rp, t1, 3, t2); /* a^{2^9-2^3} */
- ecc_mod_mul (p, t2, t0, rp); /* a^{2^9-1} */
+ ecc_mod_mul (p, t2, t0, rp, t2); /* a^{2^9-1} */
ecc_mod_pow_2kp1 (p, t0, t2, 9, rp); /* a^{2^18-1} */
- ecc_mod_sqr (p, t1, t0); /* a^{2^19-2} */
- ecc_mod_mul (p, rp, ap, t1); /* a^{2^19-1} */
+ ecc_mod_sqr (p, t1, t0, t1); /* a^{2^19-2} */
+ ecc_mod_mul (p, rp, ap, t1, rp); /* a^{2^19-1} */
ecc_mod_pow_2k (p, t1, rp, 18, t2); /* a^{2^37-2^18} */
- ecc_mod_mul (p, rp, t0, t1); /* a^{2^37-1} */
+ ecc_mod_mul (p, rp, t0, t1, rp); /* a^{2^37-1} */
mpn_copyi (t0, rp, p->size);
ecc_mod_pow_2kp1 (p, rp, t0, 37, t2); /* a^{2^74-1} */
ecc_mod_pow_2k (p, t1, rp, 37, t2); /* a^{2^111-2^37} */
- ecc_mod_mul (p, rp, t0, t1); /* a^{2^111-1} */
+ ecc_mod_mul (p, rp, t0, t1, rp); /* a^{2^111-1} */
ecc_mod_pow_2kp1 (p, t0, rp, 111, t2);/* a^{2^222-1} */
- ecc_mod_sqr (p, t1, t0); /* a^{2^223-2} */
- ecc_mod_mul (p, rp, ap, t1); /* a^{2^223-1} */
+ ecc_mod_sqr (p, t1, t0, t1); /* a^{2^223-2} */
+ ecc_mod_mul (p, rp, ap, t1, rp); /* a^{2^223-1} */
ecc_mod_pow_2k (p, t1, rp, 223, t2); /* a^{2^446-2^223} */
- ecc_mod_mul (p, rp, t0, t1); /* a^{2^446-2^222-1} */
+ ecc_mod_mul (p, rp, t0, t1, rp); /* a^{2^446-2^222-1} */
#undef t0
#undef t1
#undef t2
@@ -149,9 +149,9 @@ static void ecc_curve448_inv (const struct ecc_modulo *p,
#define t0 scratch
ecc_mod_pow_446m224m1 (p, rp, ap, scratch); /* a^{2^446-2^222-1} */
- ecc_mod_sqr (p, t0, rp); /* a^{2^447-2^223-2} */
- ecc_mod_sqr (p, rp, t0); /* a^{2^448-2^224-4} */
- ecc_mod_mul (p, t0, ap, rp); /* a^{2^448-2^224-3} */
+ ecc_mod_sqr (p, t0, rp, t0); /* a^{2^447-2^223-2} */
+ ecc_mod_sqr (p, rp, t0, rp); /* a^{2^448-2^224-4} */
+ ecc_mod_mul (p, t0, ap, rp, t0); /* a^{2^448-2^224-3} */
mpn_copyi (rp, t0, ECC_LIMB_SIZE); /* FIXME: Eliminate copy? */
#undef t0
@@ -204,18 +204,18 @@ ecc_curve448_sqrt(const struct ecc_modulo *p, mp_limb_t *rp,
#define t0 (scratch + 2*ECC_LIMB_SIZE)
/* Live values */
- ecc_mod_sqr (p, u2, up); /* u2 */
- ecc_mod_mul (p, u3, u2, up); /* u3 */
- ecc_mod_mul (p, u3v, u3, vp); /* u3v */
- ecc_mod_mul (p, uv, up, vp); /* u3v, uv */
- ecc_mod_sqr (p, u2v2, uv); /* u3v, u2v2 */
- ecc_mod_mul (p, u5v3, u3v, u2v2); /* u3v, u5v3 */
+ ecc_mod_sqr (p, u2, up, u2); /* u2 */
+ ecc_mod_mul (p, u3, u2, up, u3); /* u3 */
+ ecc_mod_mul (p, u3v, u3, vp, u3v); /* u3v */
+ ecc_mod_mul (p, uv, up, vp, uv); /* u3v, uv */
+ ecc_mod_sqr (p, u2v2, uv, u2v2); /* u3v, u2v2 */
+ ecc_mod_mul (p, u5v3, u3v, u2v2, u5v3); /* u3v, u5v3 */
ecc_mod_pow_446m224m1 (p, u5v3p, u5v3, scratch_out); /* u3v, u5v3p */
- ecc_mod_mul (p, rp, u5v3p, u3v); /* none */
+ ecc_mod_mul (p, rp, u5v3p, u3v, rp); /* none */
/* If square root exists, have v x^2 = u */
- ecc_mod_sqr (p, x2, rp);
- ecc_mod_mul (p, vx2, x2, vp);
+ ecc_mod_sqr (p, x2, rp, x2);
+ ecc_mod_mul (p, vx2, x2, vp, vx2);
ecc_mod_sub (p, t0, vx2, up);
return ecc_curve448_zero_p (p, t0);
diff --git a/ecc-dup-eh.c b/ecc-dup-eh.c
index b36c5540..f9429866 100644
--- a/ecc-dup-eh.c
+++ b/ecc-dup-eh.c
@@ -65,14 +65,14 @@ ecc_dup_eh (const struct ecc_curve *ecc,
/* b */
ecc_mod_add (&ecc->p, e, p, p + ecc->p.size);
- ecc_mod_sqr (&ecc->p, b, e);
+ ecc_mod_sqr (&ecc->p, b, e, b);
/* c */
- ecc_mod_sqr (&ecc->p, c, p);
+ ecc_mod_sqr (&ecc->p, c, p, c);
/* d */
- ecc_mod_sqr (&ecc->p, d, p + ecc->p.size);
+ ecc_mod_sqr (&ecc->p, d, p + ecc->p.size, d);
/* h, can use r as scratch, even for in-place operation. */
- ecc_mod_sqr (&ecc->p, r, p + 2*ecc->p.size);
+ ecc_mod_sqr (&ecc->p, r, p + 2*ecc->p.size, r);
/* e, */
ecc_mod_add (&ecc->p, e, c, d);
/* j */
@@ -81,11 +81,11 @@ ecc_dup_eh (const struct ecc_curve *ecc,
/* x' */
ecc_mod_sub (&ecc->p, b, b, e);
- ecc_mod_mul (&ecc->p, r, b, j);
+ ecc_mod_mul (&ecc->p, r, b, j, r);
/* y' */
ecc_mod_sub (&ecc->p, c, c, d); /* Redundant */
- ecc_mod_mul (&ecc->p, r + ecc->p.size, e, c);
+ ecc_mod_mul (&ecc->p, r + ecc->p.size, e, c, r + ecc->p.size);
/* z' */
- ecc_mod_mul (&ecc->p, b, e, j);
+ ecc_mod_mul (&ecc->p, b, e, j, b);
mpn_copyi (r + 2*ecc->p.size, b, ecc->p.size);
}
diff --git a/ecc-dup-jj.c b/ecc-dup-jj.c
index 2247e8fd..c338971b 100644
--- a/ecc-dup-jj.c
+++ b/ecc-dup-jj.c
@@ -72,39 +72,39 @@ ecc_dup_jj (const struct ecc_curve *ecc,
#define zp (p + 2*ecc->p.size)
/* delta */
- ecc_mod_sqr (&ecc->p, delta, zp);
+ ecc_mod_sqr (&ecc->p, delta, zp, delta);
/* gamma */
- ecc_mod_sqr (&ecc->p, gamma, yp);
+ ecc_mod_sqr (&ecc->p, gamma, yp, gamma);
/* z'. Can use beta area as scratch. */
ecc_mod_add (&ecc->p, r + 2*ecc->p.size, yp, zp);
- ecc_mod_sqr (&ecc->p, beta, r + 2*ecc->p.size);
+ ecc_mod_sqr (&ecc->p, beta, r + 2*ecc->p.size, beta);
ecc_mod_sub (&ecc->p, beta, beta, gamma);
ecc_mod_sub (&ecc->p, r + 2*ecc->p.size, beta, delta);
/* alpha. Can use beta area as scratch, and overwrite delta. */
ecc_mod_add (&ecc->p, sum, xp, delta);
ecc_mod_sub (&ecc->p, delta, xp, delta);
- ecc_mod_mul (&ecc->p, beta, sum, delta);
+ ecc_mod_mul (&ecc->p, beta, sum, delta, beta);
ecc_mod_mul_1 (&ecc->p, alpha, beta, 3);
/* beta */
- ecc_mod_mul (&ecc->p, beta, xp, gamma);
+ ecc_mod_mul (&ecc->p, beta, xp, gamma, beta);
/* Do gamma^2 and 4*beta early, to get them out of the way. We can
then use the old area at gamma as scratch. */
- ecc_mod_sqr (&ecc->p, g2, gamma);
+ ecc_mod_sqr (&ecc->p, g2, gamma, g2);
ecc_mod_mul_1 (&ecc->p, sum, beta, 4);
/* x' */
- ecc_mod_sqr (&ecc->p, gamma, alpha); /* Overwrites gamma and beta */
+ ecc_mod_sqr (&ecc->p, gamma, alpha, gamma); /* Overwrites gamma and beta */
ecc_mod_submul_1 (&ecc->p, gamma, sum, 2);
mpn_copyi (r, gamma, ecc->p.size);
/* y' */
ecc_mod_sub (&ecc->p, sum, sum, r);
- ecc_mod_mul (&ecc->p, gamma, sum, alpha);
+ ecc_mod_mul (&ecc->p, gamma, sum, alpha, gamma);
ecc_mod_submul_1 (&ecc->p, gamma, g2, 8);
mpn_copyi (r + ecc->p.size, gamma, ecc->p.size);
}
diff --git a/ecc-dup-th.c b/ecc-dup-th.c
index dd95b84a..c1582cd2 100644
--- a/ecc-dup-th.c
+++ b/ecc-dup-th.c
@@ -82,14 +82,14 @@ ecc_dup_th (const struct ecc_curve *ecc,
/* B */
ecc_mod_add (&ecc->p, F, p, p + ecc->p.size);
- ecc_mod_sqr (&ecc->p, B, F);
+ ecc_mod_sqr (&ecc->p, B, F, B);
/* C */
- ecc_mod_sqr (&ecc->p, C, p);
+ ecc_mod_sqr (&ecc->p, C, p, C);
/* D */
- ecc_mod_sqr (&ecc->p, D, p + ecc->p.size);
+ ecc_mod_sqr (&ecc->p, D, p + ecc->p.size, D);
/* Can use r as scratch, even for in-place operation. */
- ecc_mod_sqr (&ecc->p, r, p + 2*ecc->p.size);
+ ecc_mod_sqr (&ecc->p, r, p + 2*ecc->p.size, r);
/* F, */
ecc_mod_sub (&ecc->p, F, D, C);
/* B - C - D */
@@ -100,10 +100,10 @@ ecc_dup_th (const struct ecc_curve *ecc,
ecc_mod_sub (&ecc->p, J, r, F);
/* x' */
- ecc_mod_mul (&ecc->p, r, B, J);
+ ecc_mod_mul (&ecc->p, r, B, J, r);
/* y' */
- ecc_mod_mul (&ecc->p, r + ecc->p.size, F, C);
+ ecc_mod_mul (&ecc->p, r + ecc->p.size, F, C, r + ecc->p.size);
/* z' */
- ecc_mod_mul (&ecc->p, B, F, J);
+ ecc_mod_mul (&ecc->p, B, F, J, B);
mpn_copyi (r + 2*ecc->p.size, B, ecc->p.size);
}
diff --git a/ecc-ecdsa-sign.c b/ecc-ecdsa-sign.c
index d675bd9b..e79a636a 100644
--- a/ecc-ecdsa-sign.c
+++ b/ecc-ecdsa-sign.c
@@ -88,9 +88,9 @@ ecc_ecdsa_sign (const struct ecc_curve *ecc,
/* Process hash digest */
ecc_hash (&ecc->q, hp, length, digest);
- ecc_mod_mul (&ecc->q, tp, zp, rp);
+ ecc_mod_mul (&ecc->q, tp, zp, rp, tp);
ecc_mod_add (&ecc->q, hp, hp, tp);
- ecc_mod_mul (&ecc->q, tp, hp, kinv);
+ ecc_mod_mul (&ecc->q, tp, hp, kinv, tp);
mpn_copyi (sp, tp, ecc->p.size);
#undef P
diff --git a/ecc-ecdsa-verify.c b/ecc-ecdsa-verify.c
index c43bdadc..d88bf64c 100644
--- a/ecc-ecdsa-verify.c
+++ b/ecc-ecdsa-verify.c
@@ -102,10 +102,10 @@ ecc_ecdsa_verify (const struct ecc_curve *ecc,
/* u1 = h / s, P1 = u1 * G */
ecc_hash (&ecc->q, hp, length, digest);
- ecc_mod_mul (&ecc->q, u1, hp, sinv);
+ ecc_mod_mul (&ecc->q, u1, hp, sinv, u1);
/* u2 = r / s, P2 = u2 * Y */
- ecc_mod_mul (&ecc->q, u2, rp, sinv);
+ ecc_mod_mul (&ecc->q, u2, rp, sinv, u2);
/* Total storage: 5*ecc->p.size + ecc->mul_itch */
ecc->mul (ecc, P2, u2, pp, u2 + ecc->p.size);
diff --git a/ecc-eh-to-a.c b/ecc-eh-to-a.c
index 869e8ad5..ce71e93f 100644
--- a/ecc-eh-to-a.c
+++ b/ecc-eh-to-a.c
@@ -61,11 +61,11 @@ ecc_eh_to_a (const struct ecc_curve *ecc,
/* Needs 2*size + scratch for the invert call. */
ecc->p.invert (&ecc->p, izp, zp, tp + ecc->p.size);
- ecc_mod_mul (&ecc->p, tp, xp, izp);
+ ecc_mod_mul (&ecc->p, tp, xp, izp, tp);
cy = mpn_sub_n (r, tp, ecc->p.m, ecc->p.size);
cnd_copy (cy, r, tp, ecc->p.size);
- ecc_mod_mul (&ecc->p, tp, yp, izp);
+ ecc_mod_mul (&ecc->p, tp, yp, izp, tp);
cy = mpn_sub_n (r + ecc->p.size, tp, ecc->p.m, ecc->p.size);
cnd_copy (cy, r + ecc->p.size, tp, ecc->p.size);
}
diff --git a/ecc-gostdsa-sign.c b/ecc-gostdsa-sign.c
index a12eb2af..351ce93e 100644
--- a/ecc-gostdsa-sign.c
+++ b/ecc-gostdsa-sign.c
@@ -84,8 +84,8 @@ ecc_gostdsa_sign (const struct ecc_curve *ecc,
if (mpn_zero_p (hp, ecc->p.size))
mpn_add_1 (hp, hp, ecc->p.size, 1);
- ecc_mod_mul (&ecc->q, tp, rp, zp);
- ecc_mod_mul (&ecc->q, t2p, kp, hp);
+ ecc_mod_mul (&ecc->q, tp, rp, zp, tp);
+ ecc_mod_mul (&ecc->q, t2p, kp, hp, t2p);
ecc_mod_add (&ecc->q, sp, tp, t2p);
/* Also reduce mod ecc->q. It should already be < 2*ecc->q,
diff --git a/ecc-gostdsa-verify.c b/ecc-gostdsa-verify.c
index 29b82c84..1c02d41c 100644
--- a/ecc-gostdsa-verify.c
+++ b/ecc-gostdsa-verify.c
@@ -102,10 +102,10 @@ ecc_gostdsa_verify (const struct ecc_curve *ecc,
ecc->q.invert (&ecc->q, vp, hp, vp + 2*ecc->p.size);
/* z1 = s / h, P1 = z1 * G */
- ecc_mod_mul (&ecc->q, z1, sp, vp);
+ ecc_mod_mul (&ecc->q, z1, sp, vp, z1);
/* z2 = - r / h, P2 = z2 * Y */
- ecc_mod_mul (&ecc->q, z2, rp, vp);
+ ecc_mod_mul (&ecc->q, z2, rp, vp, z2);
mpn_sub_n (z2, ecc->q.m, z2, ecc->p.size);
/* Total storage: 5*ecc->p.size + ecc->mul_itch */
diff --git a/ecc-internal.h b/ecc-internal.h
index b2ee8350..52bea1c9 100644
--- a/ecc-internal.h
+++ b/ecc-internal.h
@@ -255,23 +255,26 @@ void
ecc_mod_submul_1 (const struct ecc_modulo *m, mp_limb_t *rp,
const mp_limb_t *ap, mp_limb_t b);
-/* The mul and sqr functions need 2*m->size limbs at rp */
+/* The mul and sqr function need 2*m->size limbs at tp. rp may overlap
+ ap or bp, and may equal tp or tp + m->size, but no other overlap
+ with tp is allowed. */
void
ecc_mod_mul (const struct ecc_modulo *m, mp_limb_t *rp,
- const mp_limb_t *ap, const mp_limb_t *bp);
+ const mp_limb_t *ap, const mp_limb_t *bp, mp_limb_t *tp);
void
ecc_mod_sqr (const struct ecc_modulo *m, mp_limb_t *rp,
- const mp_limb_t *ap);
+ const mp_limb_t *ap, mp_limb_t *tp);
-/* The pow functions needs 2*m->size limbs at both rp and tp. */
-/* R <-- X^{2^k} */
+/* R <-- X^{2^k} mod M. Needs 2*ecc->size limbs of scratch space, same
+ overlap requirements as mul and sqr above. */
void
ecc_mod_pow_2k (const struct ecc_modulo *m,
mp_limb_t *rp, const mp_limb_t *xp,
unsigned k, mp_limb_t *tp);
-/* R <-- X^{2^k} Y */
+/* R <-- X^{2^k} Y mod M. Similar requirements as ecc_mod_pow_2k, but
+ rp and yp can't overlap. */
void
ecc_mod_pow_2k_mul (const struct ecc_modulo *m,
mp_limb_t *rp, const mp_limb_t *xp,
diff --git a/ecc-j-to-a.c b/ecc-j-to-a.c
index 915c056b..882830c4 100644
--- a/ecc-j-to-a.c
+++ b/ecc-j-to-a.c
@@ -52,7 +52,7 @@ ecc_j_to_a (const struct ecc_curve *ecc,
mp_limb_t cy;
ecc->p.invert (&ecc->p, izp, p+2*ecc->p.size, izp + 2 * ecc->p.size);
- ecc_mod_sqr (&ecc->p, iz2p, izp);
+ ecc_mod_sqr (&ecc->p, iz2p, izp, iz2p);
if (ecc->use_redc)
{
@@ -63,7 +63,7 @@ ecc_j_to_a (const struct ecc_curve *ecc,
}
/* r_x <-- x / z^2 */
- ecc_mod_mul (&ecc->p, iz3p, iz2p, p);
+ ecc_mod_mul (&ecc->p, iz3p, iz2p, p, iz3p);
/* ecc_mod (and ecc_mod_mul) may return a value up to 2p - 1, so
do a conditional subtraction. */
cy = mpn_sub_n (r, iz3p, ecc->p.m, ecc->p.size);
@@ -82,8 +82,8 @@ ecc_j_to_a (const struct ecc_curve *ecc,
}
return;
}
- ecc_mod_mul (&ecc->p, iz3p, iz2p, izp);
- ecc_mod_mul (&ecc->p, tp, iz3p, p + ecc->p.size);
+ ecc_mod_mul (&ecc->p, iz3p, iz2p, izp, iz3p);
+ ecc_mod_mul (&ecc->p, tp, iz3p, p + ecc->p.size, tp);
/* And a similar subtraction. */
cy = mpn_sub_n (r + ecc->p.size, tp, ecc->p.m, ecc->p.size);
cnd_copy (cy, r + ecc->p.size, tp, ecc->p.size);
diff --git a/ecc-mod-arith.c b/ecc-mod-arith.c
index 686b56bf..551f2c14 100644
--- a/ecc-mod-arith.c
+++ b/ecc-mod-arith.c
@@ -109,57 +109,38 @@ ecc_mod_submul_1 (const struct ecc_modulo *m, mp_limb_t *rp,
assert (hi == 0);
}
-/* NOTE: mul and sqr needs 2*m->size limbs at rp */
void
ecc_mod_mul (const struct ecc_modulo *m, mp_limb_t *rp,
- const mp_limb_t *ap, const mp_limb_t *bp)
+ const mp_limb_t *ap, const mp_limb_t *bp, mp_limb_t *tp)
{
- mpn_mul_n (rp, ap, bp, m->size);
- m->reduce (m, rp, rp);
+ mpn_mul_n (tp, ap, bp, m->size);
+ m->reduce (m, rp, tp);
}
void
ecc_mod_sqr (const struct ecc_modulo *m, mp_limb_t *rp,
- const mp_limb_t *ap)
+ const mp_limb_t *ap, mp_limb_t *tp)
{
- mpn_sqr (rp, ap, m->size);
- m->reduce (m, rp, rp);
+ mpn_sqr (tp, ap, m->size);
+ m->reduce (m, rp, tp);
}
-/* Compute R <-- X^{2^k} mod M. Needs 2*ecc->size limbs at rp, and
- 2*ecc->size additional limbs of scratch space. No overlap
- allowed. */
void
ecc_mod_pow_2k (const struct ecc_modulo *m,
mp_limb_t *rp, const mp_limb_t *xp,
unsigned k, mp_limb_t *tp)
{
- if (k & 1)
- {
- ecc_mod_sqr (m, rp, xp);
- k--;
- }
- else
- {
- ecc_mod_sqr (m, tp, xp);
- ecc_mod_sqr (m, rp, tp);
- k -= 2;
- }
- while (k > 0)
- {
- ecc_mod_sqr (m, tp, rp);
- ecc_mod_sqr (m, rp, tp);
- k -= 2;
- }
+ ecc_mod_sqr (m, rp, xp, tp);
+ while (--k > 0)
+ ecc_mod_sqr (m, rp, rp, tp);
}
-/* Computes R <-- X^{2^k} * Y. Scratch requirements as ecc_mod_pow_2k. */
void
ecc_mod_pow_2k_mul (const struct ecc_modulo *m,
mp_limb_t *rp, const mp_limb_t *xp,
unsigned k, const mp_limb_t *yp,
mp_limb_t *tp)
{
- ecc_mod_pow_2k (m, tp, xp, k, rp);
- ecc_mod_mul (m, rp, tp, yp);
+ ecc_mod_pow_2k (m, rp, xp, k, tp);
+ ecc_mod_mul (m, rp, rp, yp, tp);
}
diff --git a/ecc-mul-m.c b/ecc-mul-m.c
index ce612360..2dfff6d1 100644
--- a/ecc-mul-m.c
+++ b/ecc-mul-m.c
@@ -76,12 +76,12 @@ ecc_mul_m (const struct ecc_modulo *m,
/* Get x3, z3 from doubling. Since most significant bit is forced to 1. */
ecc_mod_add (m, A, x2, z2);
ecc_mod_sub (m, B, x2, z2);
- ecc_mod_sqr (m, AA, A);
- ecc_mod_sqr (m, BB, B);
- ecc_mod_mul (m, x3, AA, BB);
+ ecc_mod_sqr (m, AA, A, AA);
+ ecc_mod_sqr (m, BB, B, BB);
+ ecc_mod_mul (m, x3, AA, BB, x3);
ecc_mod_sub (m, E, AA, BB);
ecc_mod_addmul_1 (m, AA, E, a24);
- ecc_mod_mul (m, z3, E, AA);
+ ecc_mod_mul (m, z3, E, AA, z3);
for (i = bit_high; i >= bit_low; i--)
{
@@ -94,23 +94,23 @@ ecc_mul_m (const struct ecc_modulo *m,
limbs. */
ecc_mod_add (m, A, x2, z2);
ecc_mod_sub (m, B, x2, z2);
- ecc_mod_sqr (m, AA, A);
- ecc_mod_sqr (m, BB, B);
- ecc_mod_mul (m, x2, AA, BB); /* Last use of BB */
+ ecc_mod_sqr (m, AA, A, AA);
+ ecc_mod_sqr (m, BB, B, BB);
+ ecc_mod_mul (m, x2, AA, BB, x2); /* Last use of BB */
ecc_mod_sub (m, E, AA, BB);
ecc_mod_addmul_1 (m, AA, E, a24);
ecc_mod_add (m, C, x3, z3);
ecc_mod_sub (m, D, x3, z3);
- ecc_mod_mul (m, z2, E, AA); /* Last use of E and AA */
- ecc_mod_mul (m, DA, D, A); /* Last use of D, A. FIXME: could
- let CB overlap. */
- ecc_mod_mul (m, CB, C, B);
+ ecc_mod_mul (m, z2, E, AA, z2); /* Last use of E and AA */
+ ecc_mod_mul (m, DA, D, A, DA); /* Last use of D, A. FIXME: could
+ let CB overlap. */
+ ecc_mod_mul (m, CB, C, B, CB);
ecc_mod_add (m, C, DA, CB);
- ecc_mod_sqr (m, x3, C);
+ ecc_mod_sqr (m, x3, C, x3);
ecc_mod_sub (m, C, DA, CB);
- ecc_mod_sqr (m, DA, C);
- ecc_mod_mul (m, z3, DA, px);
+ ecc_mod_sqr (m, DA, C, DA);
+ ecc_mod_mul (m, z3, DA, px, z3);
/* FIXME: Could be combined with the loop's initial mpn_cnd_swap. */
mpn_cnd_swap (bit, x2, x3, 2*m->size);
@@ -120,16 +120,16 @@ ecc_mul_m (const struct ecc_modulo *m,
{
ecc_mod_add (m, A, x2, z2);
ecc_mod_sub (m, B, x2, z2);
- ecc_mod_sqr (m, AA, A);
- ecc_mod_sqr (m, BB, B);
- ecc_mod_mul (m, x2, AA, BB);
+ ecc_mod_sqr (m, AA, A, AA);
+ ecc_mod_sqr (m, BB, B, BB);
+ ecc_mod_mul (m, x2, AA, BB, x2);
ecc_mod_sub (m, E, AA, BB);
ecc_mod_addmul_1 (m, AA, E, a24);
- ecc_mod_mul (m, z2, E, AA);
+ ecc_mod_mul (m, z2, E, AA, z2);
}
assert (m->invert_itch <= 7 * m->size);
m->invert (m, x3, z2, z3 + m->size);
- ecc_mod_mul (m, z3, x2, x3);
+ ecc_mod_mul (m, z3, x2, x3, z3);
cy = mpn_sub_n (qx, z3, m->m, m->size);
cnd_copy (cy, qx, z3, m->size);
}
diff --git a/eddsa-decompress.c b/eddsa-decompress.c
index 8116084d..dc92daa0 100644
--- a/eddsa-decompress.c
+++ b/eddsa-decompress.c
@@ -90,8 +90,8 @@ _eddsa_decompress (const struct ecc_curve *ecc, mp_limb_t *p,
/* For a valid input, y < p, so subtraction should underflow. */
res &= mpn_sub_n (scratch, scratch, ecc->p.m, ecc->p.size);
- ecc_mod_sqr (&ecc->p, y2, yp);
- ecc_mod_mul (&ecc->p, vp, y2, ecc->b);
+ ecc_mod_sqr (&ecc->p, y2, yp, y2);
+ ecc_mod_mul (&ecc->p, vp, y2, ecc->b, vp);
ecc_mod_sub (&ecc->p, vp, vp, ecc->unit);
/* The sign is different between curve25519 and curve448. */
if (ecc->p.bit_size == 255)
diff --git a/eddsa-sign.c b/eddsa-sign.c
index f8bdf255..c1a23cd7 100644
--- a/eddsa-sign.c
+++ b/eddsa-sign.c
@@ -91,7 +91,7 @@ _eddsa_sign (const struct ecc_curve *ecc,
eddsa->digest (ctx, 2*nbytes, hash);
_eddsa_hash (&ecc->q, hp, 2*nbytes, hash);
- ecc_mod_mul (&ecc->q, sp, hp, k2);
+ ecc_mod_mul (&ecc->q, sp, hp, k2, sp);
ecc_mod_add (&ecc->q, sp, sp, rp); /* FIXME: Can be plain add */
if (ecc->p.bit_size == 255)
{
diff --git a/eddsa-verify.c b/eddsa-verify.c
index a0ffe0c4..de68f240 100644
--- a/eddsa-verify.c
+++ b/eddsa-verify.c
@@ -53,11 +53,11 @@ equal_h (const struct ecc_modulo *p,
#define t0 scratch
#define t1 (scratch + p->size)
- ecc_mod_mul (p, t0, x1, z2);
+ ecc_mod_mul (p, t0, x1, z2, t0);
if (mpn_cmp (t0, p->m, p->size) >= 0)
mpn_sub_n (t0, t0, p->m, p->size);
- ecc_mod_mul (p, t1, x2, z1);
+ ecc_mod_mul (p, t1, x2, z1, t1);
if (mpn_cmp (t1, p->m, p->size) >= 0)
mpn_sub_n (t1, t1, p->m, p->size);
diff --git a/gostdsa-vko.c b/gostdsa-vko.c
index 7bdcdfc3..a02d59a9 100644
--- a/gostdsa-vko.c
+++ b/gostdsa-vko.c
@@ -87,7 +87,7 @@ gostdsa_vko (const struct ecc_scalar *priv,
if (mpn_zero_p (UKM, size))
UKM[0] = 1;
- ecc_mod_mul (&ecc->q, TEMP, priv->p, UKM); /* TEMP = UKM * priv */
+ ecc_mod_mul (&ecc->q, TEMP, priv->p, UKM, TEMP); /* TEMP = UKM * priv */
ecc->mul (ecc, XYZ, TEMP, pub->p, scratch + 4*size); /* XYZ = UKM * priv * pub */
ecc->h_to_a (ecc, 0, TEMP, XYZ, scratch + 5*size); /* TEMP = XYZ */
mpn_get_base256_le (out, bsize, TEMP, size);