summaryrefslogtreecommitdiff
path: root/ecc-dup-jj.c
diff options
context:
space:
mode:
authorNiels Möller <nisse@lysator.liu.se>2020-11-05 20:37:11 +0100
committerNiels Möller <nisse@lysator.liu.se>2020-11-05 20:37:11 +0100
commit001f561974b823418c8353df770b3b1b5129cde0 (patch)
treee997c65e3c2655fddec57b3918c3369e15c0f943 /ecc-dup-jj.c
parent3c9e49b1d923c6b6169b11fc38dd6a21a60eaab4 (diff)
downloadnettle-001f561974b823418c8353df770b3b1b5129cde0.tar.gz
Reduce scratch need for ecc_dup_jj
Diffstat (limited to 'ecc-dup-jj.c')
-rw-r--r--ecc-dup-jj.c88
1 files changed, 41 insertions, 47 deletions
diff --git a/ecc-dup-jj.c b/ecc-dup-jj.c
index c338971b..c793097a 100644
--- a/ecc-dup-jj.c
+++ b/ecc-dup-jj.c
@@ -47,6 +47,14 @@ ecc_dup_jj (const struct ecc_curve *ecc,
mp_limb_t *r, const mp_limb_t *p,
mp_limb_t *scratch)
{
+#define x1 p
+#define y1 (p + ecc->p.size)
+#define z1 (p + 2*ecc->p.size)
+
+#define x2 r
+#define y2 (r + ecc->p.size)
+#define z2 (r + 2*ecc->p.size)
+
/* Formulas (from djb,
http://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2001-b):
@@ -60,51 +68,37 @@ ecc_dup_jj (const struct ecc_curve *ecc,
y' = alpha*(4*beta-x')-8*gamma^2 mul, sqr
*/
-#define delta scratch
-#define gamma (scratch + ecc->p.size)
-#define beta (scratch + 2*ecc->p.size)
-#define g2 (scratch + 3*ecc->p.size)
-#define sum (scratch + 4*ecc->p.size)
-#define alpha scratch /* Overlap delta */
-
-#define xp p
-#define yp (p + ecc->p.size)
-#define zp (p + 2*ecc->p.size)
-
- /* delta */
- ecc_mod_sqr (&ecc->p, delta, zp, delta);
-
- /* gamma */
- ecc_mod_sqr (&ecc->p, gamma, yp, gamma);
-
- /* z'. Can use beta area as scratch. */
- ecc_mod_add (&ecc->p, r + 2*ecc->p.size, yp, zp);
- ecc_mod_sqr (&ecc->p, beta, r + 2*ecc->p.size, beta);
- ecc_mod_sub (&ecc->p, beta, beta, gamma);
- ecc_mod_sub (&ecc->p, r + 2*ecc->p.size, beta, delta);
-
- /* alpha. Can use beta area as scratch, and overwrite delta. */
- ecc_mod_add (&ecc->p, sum, xp, delta);
- ecc_mod_sub (&ecc->p, delta, xp, delta);
- ecc_mod_mul (&ecc->p, beta, sum, delta, beta);
- ecc_mod_mul_1 (&ecc->p, alpha, beta, 3);
-
- /* beta */
- ecc_mod_mul (&ecc->p, beta, xp, gamma, beta);
-
- /* Do gamma^2 and 4*beta early, to get them out of the way. We can
- then use the old area at gamma as scratch. */
- ecc_mod_sqr (&ecc->p, g2, gamma, g2);
- ecc_mod_mul_1 (&ecc->p, sum, beta, 4);
-
- /* x' */
- ecc_mod_sqr (&ecc->p, gamma, alpha, gamma); /* Overwrites gamma and beta */
- ecc_mod_submul_1 (&ecc->p, gamma, sum, 2);
- mpn_copyi (r, gamma, ecc->p.size);
-
- /* y' */
- ecc_mod_sub (&ecc->p, sum, sum, r);
- ecc_mod_mul (&ecc->p, gamma, sum, alpha, gamma);
- ecc_mod_submul_1 (&ecc->p, gamma, g2, 8);
- mpn_copyi (r + ecc->p.size, gamma, ecc->p.size);
+#define gamma scratch
+#define delta (scratch + ecc->p.size)
+#define alpha delta
+
+#define beta (scratch + 2*ecc->p.size)
+#define sum (scratch + 3*ecc->p.size)
+
+ ecc_mod_sqr (&ecc->p, gamma, y1, gamma); /* x, y, z, gamma */
+ ecc_mod_sqr (&ecc->p, delta, z1, delta); /* x, y, z, gamma, delta */
+
+ ecc_mod_add (&ecc->p, sum, z1, y1); /* x, gamma, delta, s */
+ ecc_mod_sqr (&ecc->p, sum, sum, y2); /* Can use y-z as scratch */
+ ecc_mod_sub (&ecc->p, z2, sum, delta); /* x, z, gamma, delta */
+ ecc_mod_sub (&ecc->p, z2, z2, gamma);
+
+ ecc_mod_mul (&ecc->p, beta, x1, gamma, beta); /* x, z, gamma, delta, beta */
+
+ ecc_mod_add (&ecc->p, sum, x1, delta); /* x, sum, z', gamma, delta, beta */
+ ecc_mod_sub (&ecc->p, delta, x1, delta); /* sum, z', gamma, delta, beta */
+ /* This multiplication peaks the storage need; can use x-y for scratch. */
+ ecc_mod_mul (&ecc->p, alpha, sum, delta, x2); /* z', gamma, alpha, beta */
+ ecc_mod_mul_1 (&ecc->p, alpha, alpha, 3);
+
+ ecc_mod_mul_1 (&ecc->p, y2, beta, 4);
+
+ /* From now on, can use beta as scratch. */
+ ecc_mod_sqr (&ecc->p, x2, alpha, beta); /* alpha^2 */
+ ecc_mod_submul_1 (&ecc->p, x2, y2, 2); /* alpha^2 - 8 beta */
+
+ ecc_mod_sub (&ecc->p, y2, y2, x2); /* 4 beta - x' */
+ ecc_mod_mul (&ecc->p, y2, y2, alpha, beta);
+ ecc_mod_sqr (&ecc->p, gamma, gamma, beta);
+ ecc_mod_submul_1 (&ecc->p, y2, gamma, 8);
}