diff options
author | Niels Möller <nisse@lysator.liu.se> | 2020-11-08 21:48:10 +0100 |
---|---|---|
committer | Niels Möller <nisse@lysator.liu.se> | 2020-11-08 21:55:27 +0100 |
commit | 6cc9861dfbd509065788fdbe56486bd80bac02db (patch) | |
tree | 6eeb18c88206e7b108f7e47c11c1d6e6e0b9ae43 | |
parent | 92f657b3d6038be6d23c2dfab05e40bbcda79ebb (diff) | |
download | nettle-6cc9861dfbd509065788fdbe56486bd80bac02db.tar.gz |
Reduce scratch need for ecc_mul_m
-rw-r--r-- | ChangeLog | 2 | ||||
-rw-r--r-- | ecc-internal.h | 2 | ||||
-rw-r--r-- | ecc-mul-m.c | 111 |
3 files changed, 75 insertions, 40 deletions
@@ -1,8 +1,10 @@ 2020-11-08 Niels Möller <nisse@lysator.liu.se> + * ecc-mul-m.c (ecc_mul_m): Reduce scratch need. * ecc-add-jja.c (ecc_add_jja): Reduce scratch need. * ecc-add-jjj.c (ecc_add_jjj): Reduce scratch need. * ecc-internal.h (ECC_ADD_JJA_ITCH, ECC_ADD_JJJ_ITCH): Now 5*size. + (ECC_MUL_M_ITCH): New 8*size. 2020-11-06 Niels Möller <nisse@lysator.liu.se> diff --git a/ecc-internal.h b/ecc-internal.h index 81c1c39a..39166f85 100644 --- a/ecc-internal.h +++ b/ecc-internal.h @@ -467,7 +467,7 @@ curve448_eh_to_x (mp_limb_t *xp, const mp_limb_t *p, #define ECC_MUL_A_EH_ITCH(size) \ (((3 << ECC_MUL_A_EH_WBITS) + 10) * (size)) #endif -#define ECC_MUL_M_ITCH(size) (11*(size)) +#define ECC_MUL_M_ITCH(size) (8*(size)) #define ECC_ECDSA_SIGN_ITCH(size) (12*(size)) #define ECC_GOSTDSA_SIGN_ITCH(size) (12*(size)) #define ECC_MOD_RANDOM_ITCH(size) (size) diff --git a/ecc-mul-m.c b/ecc-mul-m.c index 2dfff6d1..820258ca 100644 --- a/ecc-mul-m.c +++ b/ecc-mul-m.c @@ -50,23 +50,49 @@ ecc_mul_m (const struct ecc_modulo *m, unsigned i; mp_limb_t cy; - /* FIXME: Could save some more scratch space, e.g., by letting BB - overlap C, D, and CB overlap A, D. And possibly reusing some of - x2, z2, x3, z3. */ #define x2 (scratch) #define z2 (scratch + m->size) #define x3 (scratch + 2*m->size) #define z3 (scratch + 3*m->size) -#define A (scratch + 4*m->size) -#define B (scratch + 5*m->size) -#define C (scratch + 6*m->size) -#define D (scratch + 7*m->size) -#define AA (scratch + 8*m->size) -#define BB (scratch +9*m->size) -#define E (scratch + 9*m->size) /* Overlap BB */ -#define DA (scratch + 8*m->size) /* Overlap AA */ -#define CB (scratch + 9*m->size) /* Overlap BB */ + /* Formulas from RFC 7748: + + A = x_2 + z_2 + AA = A^2 + B = x_2 - z_2 + BB = B^2 + E = AA - BB + C = x_3 + z_3 + D = x_3 - z_3 + DA = D * A + CB = C * B + x_3 = (DA + CB)^2 + z_3 = x_1 * (DA - CB)^2 + x_2 = AA * BB + z_2 = E * (AA + a24 * E) + + For pure doubling, we use: + + A = x_2 + z_2 + AA = A^2 + B = x_2 - z_2 + BB = B^2 + E = AA - BB + x3 = AA * BB + z3 = E * (AA + a24 * E) + */ + +#define A (scratch + 4*m->size) +#define AA A +#define D (scratch + 5*m->size) +#define DA D + +#define tp (scratch + 6*m->size) + + /* For the doubling formulas. */ +#define B D +#define BB D +#define E D /* Initialize, x2 = px, z2 = 1 */ mpn_copyi (x2, px, m->size); @@ -76,12 +102,12 @@ ecc_mul_m (const struct ecc_modulo *m, /* Get x3, z3 from doubling. Since most significant bit is forced to 1. */ ecc_mod_add (m, A, x2, z2); ecc_mod_sub (m, B, x2, z2); - ecc_mod_sqr (m, AA, A, AA); - ecc_mod_sqr (m, BB, B, BB); - ecc_mod_mul (m, x3, AA, BB, x3); + ecc_mod_sqr (m, AA, A, tp); + ecc_mod_sqr (m, BB, B, tp); + ecc_mod_mul (m, x3, AA, BB, tp); ecc_mod_sub (m, E, AA, BB); ecc_mod_addmul_1 (m, AA, E, a24); - ecc_mod_mul (m, z3, E, AA, z3); + ecc_mod_mul (m, z3, E, AA, tp); for (i = bit_high; i >= bit_low; i--) { @@ -89,28 +115,35 @@ ecc_mul_m (const struct ecc_modulo *m, mpn_cnd_swap (bit, x2, x3, 2*m->size); - /* Formulas from RFC 7748. We compute new coordinates in - memory-address order, since mul and sqr clobbers higher - limbs. */ ecc_mod_add (m, A, x2, z2); - ecc_mod_sub (m, B, x2, z2); - ecc_mod_sqr (m, AA, A, AA); - ecc_mod_sqr (m, BB, B, BB); - ecc_mod_mul (m, x2, AA, BB, x2); /* Last use of BB */ - ecc_mod_sub (m, E, AA, BB); - ecc_mod_addmul_1 (m, AA, E, a24); - ecc_mod_add (m, C, x3, z3); ecc_mod_sub (m, D, x3, z3); - ecc_mod_mul (m, z2, E, AA, z2); /* Last use of E and AA */ - ecc_mod_mul (m, DA, D, A, DA); /* Last use of D, A. FIXME: could - let CB overlap. */ - ecc_mod_mul (m, CB, C, B, CB); + ecc_mod_mul (m, DA, D, A, tp); + ecc_mod_sqr (m, AA, A, tp); + + /* Store B, BB and E at z2 */ + ecc_mod_sub (m, z2, x2, z2); /* B */ + /* Store C and CB at z3 */ + ecc_mod_add (m, z3, x3, z3); /* C */ + ecc_mod_mul (m, z3, z3, z2, tp); /* CB */ + ecc_mod_sqr (m, z2, z2, tp); /* BB */ + + /* Finish x2 */ + ecc_mod_mul (m, x2, AA, z2, tp); + + ecc_mod_sub (m, z2, AA, z2); /* E */ + + /* Finish z2 */ + ecc_mod_addmul_1 (m, AA, z2, a24); + ecc_mod_mul (m, z2, z2, AA, tp); + + /* Finish x3 */ + ecc_mod_add (m, x3, DA, z3); + ecc_mod_sqr (m, x3, x3, tp); - ecc_mod_add (m, C, DA, CB); - ecc_mod_sqr (m, x3, C, x3); - ecc_mod_sub (m, C, DA, CB); - ecc_mod_sqr (m, DA, C, DA); - ecc_mod_mul (m, z3, DA, px, z3); + /* Finish z3 */ + ecc_mod_sub (m, z3, DA, z3); /* DA - CB */ + ecc_mod_sqr (m, z3, z3, tp); + ecc_mod_mul (m, z3, z3, px, tp); /* FIXME: Could be combined with the loop's initial mpn_cnd_swap. */ mpn_cnd_swap (bit, x2, x3, 2*m->size); @@ -120,12 +153,12 @@ ecc_mul_m (const struct ecc_modulo *m, { ecc_mod_add (m, A, x2, z2); ecc_mod_sub (m, B, x2, z2); - ecc_mod_sqr (m, AA, A, AA); - ecc_mod_sqr (m, BB, B, BB); - ecc_mod_mul (m, x2, AA, BB, x2); + ecc_mod_sqr (m, AA, A, tp); + ecc_mod_sqr (m, BB, B, tp); + ecc_mod_mul (m, x2, AA, BB, tp); ecc_mod_sub (m, E, AA, BB); ecc_mod_addmul_1 (m, AA, E, a24); - ecc_mod_mul (m, z2, E, AA, z2); + ecc_mod_mul (m, z2, E, AA, tp); } assert (m->invert_itch <= 7 * m->size); m->invert (m, x3, z2, z3 + m->size); |