From 001f561974b823418c8353df770b3b1b5129cde0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niels=20M=C3=B6ller?= Date: Thu, 5 Nov 2020 20:37:11 +0100 Subject: Reduce scratch need for ecc_dup_jj --- ecc-dup-jj.c | 88 ++++++++++++++++++++++++++++-------------------------------- 1 file changed, 41 insertions(+), 47 deletions(-) (limited to 'ecc-dup-jj.c') diff --git a/ecc-dup-jj.c b/ecc-dup-jj.c index c338971b..c793097a 100644 --- a/ecc-dup-jj.c +++ b/ecc-dup-jj.c @@ -47,6 +47,14 @@ ecc_dup_jj (const struct ecc_curve *ecc, mp_limb_t *r, const mp_limb_t *p, mp_limb_t *scratch) { +#define x1 p +#define y1 (p + ecc->p.size) +#define z1 (p + 2*ecc->p.size) + +#define x2 r +#define y2 (r + ecc->p.size) +#define z2 (r + 2*ecc->p.size) + /* Formulas (from djb, http://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2001-b): @@ -60,51 +68,37 @@ ecc_dup_jj (const struct ecc_curve *ecc, y' = alpha*(4*beta-x')-8*gamma^2 mul, sqr */ -#define delta scratch -#define gamma (scratch + ecc->p.size) -#define beta (scratch + 2*ecc->p.size) -#define g2 (scratch + 3*ecc->p.size) -#define sum (scratch + 4*ecc->p.size) -#define alpha scratch /* Overlap delta */ - -#define xp p -#define yp (p + ecc->p.size) -#define zp (p + 2*ecc->p.size) - - /* delta */ - ecc_mod_sqr (&ecc->p, delta, zp, delta); - - /* gamma */ - ecc_mod_sqr (&ecc->p, gamma, yp, gamma); - - /* z'. Can use beta area as scratch. */ - ecc_mod_add (&ecc->p, r + 2*ecc->p.size, yp, zp); - ecc_mod_sqr (&ecc->p, beta, r + 2*ecc->p.size, beta); - ecc_mod_sub (&ecc->p, beta, beta, gamma); - ecc_mod_sub (&ecc->p, r + 2*ecc->p.size, beta, delta); - - /* alpha. Can use beta area as scratch, and overwrite delta. */ - ecc_mod_add (&ecc->p, sum, xp, delta); - ecc_mod_sub (&ecc->p, delta, xp, delta); - ecc_mod_mul (&ecc->p, beta, sum, delta, beta); - ecc_mod_mul_1 (&ecc->p, alpha, beta, 3); - - /* beta */ - ecc_mod_mul (&ecc->p, beta, xp, gamma, beta); - - /* Do gamma^2 and 4*beta early, to get them out of the way. We can - then use the old area at gamma as scratch. */ - ecc_mod_sqr (&ecc->p, g2, gamma, g2); - ecc_mod_mul_1 (&ecc->p, sum, beta, 4); - - /* x' */ - ecc_mod_sqr (&ecc->p, gamma, alpha, gamma); /* Overwrites gamma and beta */ - ecc_mod_submul_1 (&ecc->p, gamma, sum, 2); - mpn_copyi (r, gamma, ecc->p.size); - - /* y' */ - ecc_mod_sub (&ecc->p, sum, sum, r); - ecc_mod_mul (&ecc->p, gamma, sum, alpha, gamma); - ecc_mod_submul_1 (&ecc->p, gamma, g2, 8); - mpn_copyi (r + ecc->p.size, gamma, ecc->p.size); +#define gamma scratch +#define delta (scratch + ecc->p.size) +#define alpha delta + +#define beta (scratch + 2*ecc->p.size) +#define sum (scratch + 3*ecc->p.size) + + ecc_mod_sqr (&ecc->p, gamma, y1, gamma); /* x, y, z, gamma */ + ecc_mod_sqr (&ecc->p, delta, z1, delta); /* x, y, z, gamma, delta */ + + ecc_mod_add (&ecc->p, sum, z1, y1); /* x, gamma, delta, s */ + ecc_mod_sqr (&ecc->p, sum, sum, y2); /* Can use y-z as scratch */ + ecc_mod_sub (&ecc->p, z2, sum, delta); /* x, z, gamma, delta */ + ecc_mod_sub (&ecc->p, z2, z2, gamma); + + ecc_mod_mul (&ecc->p, beta, x1, gamma, beta); /* x, z, gamma, delta, beta */ + + ecc_mod_add (&ecc->p, sum, x1, delta); /* x, sum, z', gamma, delta, beta */ + ecc_mod_sub (&ecc->p, delta, x1, delta); /* sum, z', gamma, delta, beta */ + /* This multiplication peaks the storage need; can use x-y for scratch. */ + ecc_mod_mul (&ecc->p, alpha, sum, delta, x2); /* z', gamma, alpha, beta */ + ecc_mod_mul_1 (&ecc->p, alpha, alpha, 3); + + ecc_mod_mul_1 (&ecc->p, y2, beta, 4); + + /* From now on, can use beta as scratch. */ + ecc_mod_sqr (&ecc->p, x2, alpha, beta); /* alpha^2 */ + ecc_mod_submul_1 (&ecc->p, x2, y2, 2); /* alpha^2 - 8 beta */ + + ecc_mod_sub (&ecc->p, y2, y2, x2); /* 4 beta - x' */ + ecc_mod_mul (&ecc->p, y2, y2, alpha, beta); + ecc_mod_sqr (&ecc->p, gamma, gamma, beta); + ecc_mod_submul_1 (&ecc->p, y2, gamma, 8); } -- cgit v1.2.1