summaryrefslogtreecommitdiff
path: root/ecc-add-jjj.c
diff options
context:
space:
mode:
authorNiels Möller <nisse@lysator.liu.se>2020-11-08 11:17:20 +0100
committerNiels Möller <nisse@lysator.liu.se>2020-11-08 11:17:20 +0100
commit110efbf4b740746cb9067dab194d3e652e3e92a2 (patch)
tree20e064f461e0a88970174eb5a4123874628d653b /ecc-add-jjj.c
parentfd0392de706b8093effef6c92daef309266c620d (diff)
downloadnettle-110efbf4b740746cb9067dab194d3e652e3e92a2.tar.gz
Reduce scratch need for ecc_add_jjj
Diffstat (limited to 'ecc-add-jjj.c')
-rw-r--r--ecc-add-jjj.c123
1 files changed, 69 insertions, 54 deletions
diff --git a/ecc-add-jjj.c b/ecc-add-jjj.c
index 5c416b81..a5a7e7a0 100644
--- a/ecc-add-jjj.c
+++ b/ecc-add-jjj.c
@@ -43,6 +43,17 @@ ecc_add_jjj (const struct ecc_curve *ecc,
mp_limb_t *r, const mp_limb_t *p, const mp_limb_t *q,
mp_limb_t *scratch)
{
+#define x1 p
+#define y1 (p + ecc->p.size)
+#define z1 (p + 2*ecc->p.size)
+
+#define x2 q
+#define y2 (q + ecc->p.size)
+#define z2 (q + 2*ecc->p.size)
+
+#define x3 r
+#define y3 (r + ecc->p.size)
+#define z3 (r + 2*ecc->p.size)
/* Formulas, from djb,
http://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-2007-bl:
@@ -63,58 +74,62 @@ ecc_add_jjj (const struct ecc_curve *ecc,
X3 = W^2-J-2*V sqr S1, W, J, V
Y3 = W*(V-X3)-2*S1*J mul, mul
*/
- mp_limb_t *z1z1 = scratch;
- mp_limb_t *z2z2 = scratch + ecc->p.size;
- mp_limb_t *u1 = scratch + 2*ecc->p.size;
- mp_limb_t *u2 = scratch + 3*ecc->p.size;
- mp_limb_t *s1 = scratch; /* overlap z1z1 */
- mp_limb_t *s2 = scratch + ecc->p.size; /* overlap z2z2 */
- mp_limb_t *i = scratch + 4*ecc->p.size;
- mp_limb_t *j = scratch + 5*ecc->p.size;
- mp_limb_t *v = scratch + 6*ecc->p.size;
-
- /* z1^2, z2^2, u1 = x1 x2^2, u2 = x2 z1^2 - u1 */
- ecc_mod_sqr (&ecc->p, z1z1, p + 2*ecc->p.size, z1z1);
- ecc_mod_sqr (&ecc->p, z2z2, q + 2*ecc->p.size, z2z2);
- ecc_mod_mul (&ecc->p, u1, p, z2z2, u1);
- ecc_mod_mul (&ecc->p, u2, q, z1z1, u2);
- ecc_mod_sub (&ecc->p, u2, u2, u1); /* Store h in u2 */
-
- /* z3, use i, j, v as scratch, result at i. */
- ecc_mod_add (&ecc->p, i, p + 2*ecc->p.size, q + 2*ecc->p.size);
- ecc_mod_sqr (&ecc->p, v, i, v);
- ecc_mod_sub (&ecc->p, v, v, z1z1);
- ecc_mod_sub (&ecc->p, v, v, z2z2);
- ecc_mod_mul (&ecc->p, i, v, u2, i);
- /* Delayed write, to support in-place operation. */
-
- /* s1 = y1 z2^3, s2 = y2 z1^3, scratch at j and v */
- ecc_mod_mul (&ecc->p, j, z1z1, p + 2*ecc->p.size, j); /* z1^3 */
- ecc_mod_mul (&ecc->p, v, z2z2, q + 2*ecc->p.size, v); /* z2^3 */
- ecc_mod_mul (&ecc->p, s1, p + ecc->p.size, v, s1);
- ecc_mod_mul (&ecc->p, v, j, q + ecc->p.size, v);
- ecc_mod_sub (&ecc->p, s2, v, s1);
- ecc_mod_mul_1 (&ecc->p, s2, s2, 2);
-
- /* Store z3 */
- mpn_copyi (r + 2*ecc->p.size, i, ecc->p.size);
-
- /* i, j, v */
- ecc_mod_sqr (&ecc->p, i, u2, i);
- ecc_mod_mul_1 (&ecc->p, i, i, 4);
- ecc_mod_mul (&ecc->p, j, u2, i, j);
- ecc_mod_mul (&ecc->p, v, u1, i, v);
-
- /* now, u1, u2 and i are free for reuse .*/
- /* x3, use u1, u2 as scratch */
- ecc_mod_sqr (&ecc->p, u1, s2, u1);
- ecc_mod_sub (&ecc->p, r, u1, j);
- ecc_mod_submul_1 (&ecc->p, r, v, 2);
-
- /* y3 */
- ecc_mod_mul (&ecc->p, u1, s1, j, u1); /* Frees j */
- ecc_mod_sub (&ecc->p, u2, v, r); /* Frees v */
- ecc_mod_mul (&ecc->p, i, s2, u2, i);
- ecc_mod_submul_1 (&ecc->p, i, u1, 2);
- mpn_copyi (r + ecc->p.size, i, ecc->p.size);
+
+#define h scratch
+#define z1z1 (scratch + ecc->p.size)
+#define z2z2 (scratch + 2*ecc->p.size)
+#define z1z2 (scratch + 3*ecc->p.size)
+
+#define w (scratch + ecc->p.size)
+#define i (scratch + 2*ecc->p.size)
+#define j h
+#define v i
+
+#define tp (scratch + 4*ecc->p.size)
+
+ ecc_mod_sqr (&ecc->p, z1z1, z1, tp); /* z1z1 */
+ ecc_mod_sqr (&ecc->p, z2z2, z2, tp); /* z1z1, z2z2 */
+ /* Store u1 at x3 */
+ ecc_mod_mul (&ecc->p, x3, x1, z2z2, tp); /* z1z1, z2z2 */
+ ecc_mod_mul (&ecc->p, h, x2, z1z1, tp); /* z1z1, z2z2, h */
+ ecc_mod_sub (&ecc->p, h, h, x3);
+
+ ecc_mod_add (&ecc->p, z1z2, z1, z2); /* z1z1, z2z2, z1z2, h */
+ ecc_mod_sqr (&ecc->p, z1z2, z1z2, tp);
+ ecc_mod_sub (&ecc->p, z1z2, z1z2, z1z1);
+ ecc_mod_sub (&ecc->p, z1z2, z1z2, z2z2);
+
+ /* z1^3, z2^3 */
+ ecc_mod_mul (&ecc->p, z1z1, z1z1, z1, tp);
+ ecc_mod_mul (&ecc->p, z2z2, z2z2, z2, tp);
+
+ /* z3 <-- h z1 z2 delayed until now, since that may clobber z1. */
+ ecc_mod_mul (&ecc->p, z3, z1z2, h, tp); /* z1z1, z2z2, h */
+ /* Store s1 at y3 */
+ ecc_mod_mul (&ecc->p, y3, z2z2, y1, tp); /* z1z1, h */
+ /* w = 2 (s2 - s1) */
+ ecc_mod_mul (&ecc->p, w, z1z1, y2, tp); /* h, w */
+ ecc_mod_sub (&ecc->p, w, w, y3);
+ ecc_mod_add (&ecc->p, w, w, w);
+
+ /* i = (2h)^2 */
+ ecc_mod_add (&ecc->p, i, h, h); /* h, w, i */
+ ecc_mod_sqr (&ecc->p, i, i, tp);
+
+ /* j and h can overlap */
+ ecc_mod_mul (&ecc->p, j, h, i, tp); /* j, w, i */
+
+ /* v and i can overlap */
+ ecc_mod_mul (&ecc->p, v, x3, i, tp); /* j, w, v */
+
+ /* x3 <-- w^2 - j - 2v */
+ ecc_mod_sqr (&ecc->p, x3, w, tp);
+ ecc_mod_sub (&ecc->p, x3, x3, j);
+ ecc_mod_submul_1 (&ecc->p, x3, v, 2);
+
+ /* y3 <-- w (v - x3) - 2 s1 j */
+ ecc_mod_mul (&ecc->p, j, j, y3, tp);
+ ecc_mod_sub (&ecc->p, v, v, x3);
+ ecc_mod_mul (&ecc->p, y3, v, w, tp);
+ ecc_mod_submul_1 (&ecc->p, y3, j, 2);
}