summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNiels Möller <nisse@lysator.liu.se>2020-11-08 21:48:10 +0100
committerNiels Möller <nisse@lysator.liu.se>2020-11-08 21:55:27 +0100
commit6cc9861dfbd509065788fdbe56486bd80bac02db (patch)
tree6eeb18c88206e7b108f7e47c11c1d6e6e0b9ae43
parent92f657b3d6038be6d23c2dfab05e40bbcda79ebb (diff)
downloadnettle-6cc9861dfbd509065788fdbe56486bd80bac02db.tar.gz
Reduce scratch need for ecc_mul_m
-rw-r--r--ChangeLog2
-rw-r--r--ecc-internal.h2
-rw-r--r--ecc-mul-m.c111
3 files changed, 75 insertions, 40 deletions
diff --git a/ChangeLog b/ChangeLog
index a7acd45d..cc06e5a3 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,8 +1,10 @@
2020-11-08 Niels Möller <nisse@lysator.liu.se>
+ * ecc-mul-m.c (ecc_mul_m): Reduce scratch need.
* ecc-add-jja.c (ecc_add_jja): Reduce scratch need.
* ecc-add-jjj.c (ecc_add_jjj): Reduce scratch need.
* ecc-internal.h (ECC_ADD_JJA_ITCH, ECC_ADD_JJJ_ITCH): Now 5*size.
+ (ECC_MUL_M_ITCH): New 8*size.
2020-11-06 Niels Möller <nisse@lysator.liu.se>
diff --git a/ecc-internal.h b/ecc-internal.h
index 81c1c39a..39166f85 100644
--- a/ecc-internal.h
+++ b/ecc-internal.h
@@ -467,7 +467,7 @@ curve448_eh_to_x (mp_limb_t *xp, const mp_limb_t *p,
#define ECC_MUL_A_EH_ITCH(size) \
(((3 << ECC_MUL_A_EH_WBITS) + 10) * (size))
#endif
-#define ECC_MUL_M_ITCH(size) (11*(size))
+#define ECC_MUL_M_ITCH(size) (8*(size))
#define ECC_ECDSA_SIGN_ITCH(size) (12*(size))
#define ECC_GOSTDSA_SIGN_ITCH(size) (12*(size))
#define ECC_MOD_RANDOM_ITCH(size) (size)
diff --git a/ecc-mul-m.c b/ecc-mul-m.c
index 2dfff6d1..820258ca 100644
--- a/ecc-mul-m.c
+++ b/ecc-mul-m.c
@@ -50,23 +50,49 @@ ecc_mul_m (const struct ecc_modulo *m,
unsigned i;
mp_limb_t cy;
- /* FIXME: Could save some more scratch space, e.g., by letting BB
- overlap C, D, and CB overlap A, D. And possibly reusing some of
- x2, z2, x3, z3. */
#define x2 (scratch)
#define z2 (scratch + m->size)
#define x3 (scratch + 2*m->size)
#define z3 (scratch + 3*m->size)
-#define A (scratch + 4*m->size)
-#define B (scratch + 5*m->size)
-#define C (scratch + 6*m->size)
-#define D (scratch + 7*m->size)
-#define AA (scratch + 8*m->size)
-#define BB (scratch +9*m->size)
-#define E (scratch + 9*m->size) /* Overlap BB */
-#define DA (scratch + 8*m->size) /* Overlap AA */
-#define CB (scratch + 9*m->size) /* Overlap BB */
+ /* Formulas from RFC 7748:
+
+ A = x_2 + z_2
+ AA = A^2
+ B = x_2 - z_2
+ BB = B^2
+ E = AA - BB
+ C = x_3 + z_3
+ D = x_3 - z_3
+ DA = D * A
+ CB = C * B
+ x_3 = (DA + CB)^2
+ z_3 = x_1 * (DA - CB)^2
+ x_2 = AA * BB
+ z_2 = E * (AA + a24 * E)
+
+ For pure doubling, we use:
+
+ A = x_2 + z_2
+ AA = A^2
+ B = x_2 - z_2
+ BB = B^2
+ E = AA - BB
+ x3 = AA * BB
+ z3 = E * (AA + a24 * E)
+ */
+
+#define A (scratch + 4*m->size)
+#define AA A
+#define D (scratch + 5*m->size)
+#define DA D
+
+#define tp (scratch + 6*m->size)
+
+ /* For the doubling formulas. */
+#define B D
+#define BB D
+#define E D
/* Initialize, x2 = px, z2 = 1 */
mpn_copyi (x2, px, m->size);
@@ -76,12 +102,12 @@ ecc_mul_m (const struct ecc_modulo *m,
/* Get x3, z3 from doubling. Since most significant bit is forced to 1. */
ecc_mod_add (m, A, x2, z2);
ecc_mod_sub (m, B, x2, z2);
- ecc_mod_sqr (m, AA, A, AA);
- ecc_mod_sqr (m, BB, B, BB);
- ecc_mod_mul (m, x3, AA, BB, x3);
+ ecc_mod_sqr (m, AA, A, tp);
+ ecc_mod_sqr (m, BB, B, tp);
+ ecc_mod_mul (m, x3, AA, BB, tp);
ecc_mod_sub (m, E, AA, BB);
ecc_mod_addmul_1 (m, AA, E, a24);
- ecc_mod_mul (m, z3, E, AA, z3);
+ ecc_mod_mul (m, z3, E, AA, tp);
for (i = bit_high; i >= bit_low; i--)
{
@@ -89,28 +115,35 @@ ecc_mul_m (const struct ecc_modulo *m,
mpn_cnd_swap (bit, x2, x3, 2*m->size);
- /* Formulas from RFC 7748. We compute new coordinates in
- memory-address order, since mul and sqr clobbers higher
- limbs. */
ecc_mod_add (m, A, x2, z2);
- ecc_mod_sub (m, B, x2, z2);
- ecc_mod_sqr (m, AA, A, AA);
- ecc_mod_sqr (m, BB, B, BB);
- ecc_mod_mul (m, x2, AA, BB, x2); /* Last use of BB */
- ecc_mod_sub (m, E, AA, BB);
- ecc_mod_addmul_1 (m, AA, E, a24);
- ecc_mod_add (m, C, x3, z3);
ecc_mod_sub (m, D, x3, z3);
- ecc_mod_mul (m, z2, E, AA, z2); /* Last use of E and AA */
- ecc_mod_mul (m, DA, D, A, DA); /* Last use of D, A. FIXME: could
- let CB overlap. */
- ecc_mod_mul (m, CB, C, B, CB);
+ ecc_mod_mul (m, DA, D, A, tp);
+ ecc_mod_sqr (m, AA, A, tp);
+
+ /* Store B, BB and E at z2 */
+ ecc_mod_sub (m, z2, x2, z2); /* B */
+ /* Store C and CB at z3 */
+ ecc_mod_add (m, z3, x3, z3); /* C */
+ ecc_mod_mul (m, z3, z3, z2, tp); /* CB */
+ ecc_mod_sqr (m, z2, z2, tp); /* BB */
+
+ /* Finish x2 */
+ ecc_mod_mul (m, x2, AA, z2, tp);
+
+ ecc_mod_sub (m, z2, AA, z2); /* E */
+
+ /* Finish z2 */
+ ecc_mod_addmul_1 (m, AA, z2, a24);
+ ecc_mod_mul (m, z2, z2, AA, tp);
+
+ /* Finish x3 */
+ ecc_mod_add (m, x3, DA, z3);
+ ecc_mod_sqr (m, x3, x3, tp);
- ecc_mod_add (m, C, DA, CB);
- ecc_mod_sqr (m, x3, C, x3);
- ecc_mod_sub (m, C, DA, CB);
- ecc_mod_sqr (m, DA, C, DA);
- ecc_mod_mul (m, z3, DA, px, z3);
+ /* Finish z3 */
+ ecc_mod_sub (m, z3, DA, z3); /* DA - CB */
+ ecc_mod_sqr (m, z3, z3, tp);
+ ecc_mod_mul (m, z3, z3, px, tp);
/* FIXME: Could be combined with the loop's initial mpn_cnd_swap. */
mpn_cnd_swap (bit, x2, x3, 2*m->size);
@@ -120,12 +153,12 @@ ecc_mul_m (const struct ecc_modulo *m,
{
ecc_mod_add (m, A, x2, z2);
ecc_mod_sub (m, B, x2, z2);
- ecc_mod_sqr (m, AA, A, AA);
- ecc_mod_sqr (m, BB, B, BB);
- ecc_mod_mul (m, x2, AA, BB, x2);
+ ecc_mod_sqr (m, AA, A, tp);
+ ecc_mod_sqr (m, BB, B, tp);
+ ecc_mod_mul (m, x2, AA, BB, tp);
ecc_mod_sub (m, E, AA, BB);
ecc_mod_addmul_1 (m, AA, E, a24);
- ecc_mod_mul (m, z2, E, AA, z2);
+ ecc_mod_mul (m, z2, E, AA, tp);
}
assert (m->invert_itch <= 7 * m->size);
m->invert (m, x3, z2, z3 + m->size);