diff options
author | Niels Möller <nisse@lysator.liu.se> | 2020-11-08 11:17:20 +0100 |
---|---|---|
committer | Niels Möller <nisse@lysator.liu.se> | 2020-11-08 11:17:20 +0100 |
commit | 110efbf4b740746cb9067dab194d3e652e3e92a2 (patch) | |
tree | 20e064f461e0a88970174eb5a4123874628d653b /ecc-add-jjj.c | |
parent | fd0392de706b8093effef6c92daef309266c620d (diff) | |
download | nettle-110efbf4b740746cb9067dab194d3e652e3e92a2.tar.gz |
Reduce scratch need for ecc_add_jjj
Diffstat (limited to 'ecc-add-jjj.c')
-rw-r--r-- | ecc-add-jjj.c | 123 |
1 files changed, 69 insertions, 54 deletions
diff --git a/ecc-add-jjj.c b/ecc-add-jjj.c index 5c416b81..a5a7e7a0 100644 --- a/ecc-add-jjj.c +++ b/ecc-add-jjj.c @@ -43,6 +43,17 @@ ecc_add_jjj (const struct ecc_curve *ecc, mp_limb_t *r, const mp_limb_t *p, const mp_limb_t *q, mp_limb_t *scratch) { +#define x1 p +#define y1 (p + ecc->p.size) +#define z1 (p + 2*ecc->p.size) + +#define x2 q +#define y2 (q + ecc->p.size) +#define z2 (q + 2*ecc->p.size) + +#define x3 r +#define y3 (r + ecc->p.size) +#define z3 (r + 2*ecc->p.size) /* Formulas, from djb, http://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-2007-bl: @@ -63,58 +74,62 @@ ecc_add_jjj (const struct ecc_curve *ecc, X3 = W^2-J-2*V sqr S1, W, J, V Y3 = W*(V-X3)-2*S1*J mul, mul */ - mp_limb_t *z1z1 = scratch; - mp_limb_t *z2z2 = scratch + ecc->p.size; - mp_limb_t *u1 = scratch + 2*ecc->p.size; - mp_limb_t *u2 = scratch + 3*ecc->p.size; - mp_limb_t *s1 = scratch; /* overlap z1z1 */ - mp_limb_t *s2 = scratch + ecc->p.size; /* overlap z2z2 */ - mp_limb_t *i = scratch + 4*ecc->p.size; - mp_limb_t *j = scratch + 5*ecc->p.size; - mp_limb_t *v = scratch + 6*ecc->p.size; - - /* z1^2, z2^2, u1 = x1 x2^2, u2 = x2 z1^2 - u1 */ - ecc_mod_sqr (&ecc->p, z1z1, p + 2*ecc->p.size, z1z1); - ecc_mod_sqr (&ecc->p, z2z2, q + 2*ecc->p.size, z2z2); - ecc_mod_mul (&ecc->p, u1, p, z2z2, u1); - ecc_mod_mul (&ecc->p, u2, q, z1z1, u2); - ecc_mod_sub (&ecc->p, u2, u2, u1); /* Store h in u2 */ - - /* z3, use i, j, v as scratch, result at i. */ - ecc_mod_add (&ecc->p, i, p + 2*ecc->p.size, q + 2*ecc->p.size); - ecc_mod_sqr (&ecc->p, v, i, v); - ecc_mod_sub (&ecc->p, v, v, z1z1); - ecc_mod_sub (&ecc->p, v, v, z2z2); - ecc_mod_mul (&ecc->p, i, v, u2, i); - /* Delayed write, to support in-place operation. */ - - /* s1 = y1 z2^3, s2 = y2 z1^3, scratch at j and v */ - ecc_mod_mul (&ecc->p, j, z1z1, p + 2*ecc->p.size, j); /* z1^3 */ - ecc_mod_mul (&ecc->p, v, z2z2, q + 2*ecc->p.size, v); /* z2^3 */ - ecc_mod_mul (&ecc->p, s1, p + ecc->p.size, v, s1); - ecc_mod_mul (&ecc->p, v, j, q + ecc->p.size, v); - ecc_mod_sub (&ecc->p, s2, v, s1); - ecc_mod_mul_1 (&ecc->p, s2, s2, 2); - - /* Store z3 */ - mpn_copyi (r + 2*ecc->p.size, i, ecc->p.size); - - /* i, j, v */ - ecc_mod_sqr (&ecc->p, i, u2, i); - ecc_mod_mul_1 (&ecc->p, i, i, 4); - ecc_mod_mul (&ecc->p, j, u2, i, j); - ecc_mod_mul (&ecc->p, v, u1, i, v); - - /* now, u1, u2 and i are free for reuse .*/ - /* x3, use u1, u2 as scratch */ - ecc_mod_sqr (&ecc->p, u1, s2, u1); - ecc_mod_sub (&ecc->p, r, u1, j); - ecc_mod_submul_1 (&ecc->p, r, v, 2); - - /* y3 */ - ecc_mod_mul (&ecc->p, u1, s1, j, u1); /* Frees j */ - ecc_mod_sub (&ecc->p, u2, v, r); /* Frees v */ - ecc_mod_mul (&ecc->p, i, s2, u2, i); - ecc_mod_submul_1 (&ecc->p, i, u1, 2); - mpn_copyi (r + ecc->p.size, i, ecc->p.size); + +#define h scratch +#define z1z1 (scratch + ecc->p.size) +#define z2z2 (scratch + 2*ecc->p.size) +#define z1z2 (scratch + 3*ecc->p.size) + +#define w (scratch + ecc->p.size) +#define i (scratch + 2*ecc->p.size) +#define j h +#define v i + +#define tp (scratch + 4*ecc->p.size) + + ecc_mod_sqr (&ecc->p, z1z1, z1, tp); /* z1z1 */ + ecc_mod_sqr (&ecc->p, z2z2, z2, tp); /* z1z1, z2z2 */ + /* Store u1 at x3 */ + ecc_mod_mul (&ecc->p, x3, x1, z2z2, tp); /* z1z1, z2z2 */ + ecc_mod_mul (&ecc->p, h, x2, z1z1, tp); /* z1z1, z2z2, h */ + ecc_mod_sub (&ecc->p, h, h, x3); + + ecc_mod_add (&ecc->p, z1z2, z1, z2); /* z1z1, z2z2, z1z2, h */ + ecc_mod_sqr (&ecc->p, z1z2, z1z2, tp); + ecc_mod_sub (&ecc->p, z1z2, z1z2, z1z1); + ecc_mod_sub (&ecc->p, z1z2, z1z2, z2z2); + + /* z1^3, z2^3 */ + ecc_mod_mul (&ecc->p, z1z1, z1z1, z1, tp); + ecc_mod_mul (&ecc->p, z2z2, z2z2, z2, tp); + + /* z3 <-- h z1 z2 delayed until now, since that may clobber z1. */ + ecc_mod_mul (&ecc->p, z3, z1z2, h, tp); /* z1z1, z2z2, h */ + /* Store s1 at y3 */ + ecc_mod_mul (&ecc->p, y3, z2z2, y1, tp); /* z1z1, h */ + /* w = 2 (s2 - s1) */ + ecc_mod_mul (&ecc->p, w, z1z1, y2, tp); /* h, w */ + ecc_mod_sub (&ecc->p, w, w, y3); + ecc_mod_add (&ecc->p, w, w, w); + + /* i = (2h)^2 */ + ecc_mod_add (&ecc->p, i, h, h); /* h, w, i */ + ecc_mod_sqr (&ecc->p, i, i, tp); + + /* j and h can overlap */ + ecc_mod_mul (&ecc->p, j, h, i, tp); /* j, w, i */ + + /* v and i can overlap */ + ecc_mod_mul (&ecc->p, v, x3, i, tp); /* j, w, v */ + + /* x3 <-- w^2 - j - 2v */ + ecc_mod_sqr (&ecc->p, x3, w, tp); + ecc_mod_sub (&ecc->p, x3, x3, j); + ecc_mod_submul_1 (&ecc->p, x3, v, 2); + + /* y3 <-- w (v - x3) - 2 s1 j */ + ecc_mod_mul (&ecc->p, j, j, y3, tp); + ecc_mod_sub (&ecc->p, v, v, x3); + ecc_mod_mul (&ecc->p, y3, v, w, tp); + ecc_mod_submul_1 (&ecc->p, y3, j, 2); } |