summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNiels Möller <nisse@lysator.liu.se>2020-10-20 22:20:02 +0200
committerNiels Möller <nisse@lysator.liu.se>2020-11-11 20:56:46 +0100
commit219fcda017035d0e2af0af35b5a65115d092c1c2 (patch)
tree968f354028b52fd59366e55af85d7f6ab9d80474
parent8b3f84f8c47e3f425d5a1d322cea3557cefc5c89 (diff)
downloadnettle-219fcda017035d0e2af0af35b5a65115d092c1c2.tar.gz
Optimize modular inversion for secp224r1 and secp256r1
* ecc-secp224r1.c (ecc_secp224r1_inv): New function, modular inverse using powering. (_nettle_secp_224r1): Analogous updates. Increases signing performance roughly 17% on x86_64. * ecc-secp256r1.c (ecc_secp256r1_inv): New function, modular inverse using powering. (_nettle_secp_256r1): Analogous updates. Increases signing performance roughly 6% on x86_64.
-rw-r--r--ChangeLog12
-rw-r--r--ecc-secp224r1.c57
-rw-r--r--ecc-secp256r1.c58
3 files changed, 121 insertions, 6 deletions
diff --git a/ChangeLog b/ChangeLog
index 0f71d045..aae9374e 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,15 @@
+2020-10-20 Niels Möller <nisse@lysator.liu.se>
+
+ * ecc-secp256r1.c (ecc_secp256r1_inv): New function, modular
+ inverse using powering.
+ (_nettle_secp_256r1): Analogous updates. Increases signing
+ performance roughly 6% on x86_64.
+
+ * ecc-secp224r1.c (ecc_secp224r1_inv): New function, modular
+ inverse using powering.
+ (_nettle_secp_224r1): Analogous updates. Increases signing
+ performance roughly 17% on x86_64.
+
2020-10-19 Niels Möller <nisse@lysator.liu.se>
* ecc-secp521r1.c (ecc_secp521r1_inv): New function, modular
diff --git a/ecc-secp224r1.c b/ecc-secp224r1.c
index 30a9b5a9..4b0fd9ba 100644
--- a/ecc-secp224r1.c
+++ b/ecc-secp224r1.c
@@ -62,6 +62,57 @@ ecc_secp224r1_modp (const struct ecc_modulo *m, mp_limb_t *rp, mp_limb_t *xp);
# error Configuration error
#endif
+#define ECC_SECP224R1_INV_ITCH (4*ECC_LIMB_SIZE)
+
+static void
+ecc_secp224r1_inv (const struct ecc_modulo *p,
+ mp_limb_t *rp, const mp_limb_t *ap,
+ mp_limb_t *scratch)
+{
+#define a7 scratch
+#define t0 (scratch + 1*ECC_LIMB_SIZE)
+#define a31m1 t0
+#define a96m1 a7
+#define tp (scratch + 2*ECC_LIMB_SIZE)
+
+ /* Addition chain for p - 2 = 2^{224} - 2^{96} - 1
+
+ 7 = 1 + 2 (2+1) 2 S + 2 M
+ 2^{31} - 1 = 1 + 2 (2^{15} + 1)(1 + 2 (2^7 + 1) (1 + 2 (2^3+1) * 7))
+ 28 S + 6 M
+ 2^{34} - 1 = 2^3 (2^{31} - 1) + 7 3 S + M
+ 2^{65} - 1 = 2^{31}(2^{34} - 1) + 2^{31} - 1 31 S + M
+ 2^{96} - 1 = 2^{31}(2^{65} - 1) + 2^{31} - 1 31 S + M
+ 2^{127} - 1 = 2^{31}(2^{96} - 1) + 2^{31} - 1 31 S + M
+
+ 2^{224} - 2^{96} - 1 97 S + M
+ = 2^{97}(2^{127} - 1) + 2^{96} - 1
+
+ This addition chain needs 223 squarings and 13 multiplies.
+ */
+ ecc_mod_sqr (p, rp, ap, tp); /* a^2 */
+ ecc_mod_mul (p, rp, rp, ap, tp); /* a^3 */
+ ecc_mod_sqr (p, rp, rp, tp); /* a^6 */
+ ecc_mod_mul (p, a7, rp, ap, tp); /* a^{2^3-1} a7 */
+
+ ecc_mod_pow_2kp1 (p, rp, a7, 3, tp); /* a^{2^6 - 1} */
+ ecc_mod_sqr (p, rp, rp, tp); /* a^{2^7 - 2} */
+ ecc_mod_mul (p, rp, rp, ap, tp); /* a^{2^7 - 1} */
+ ecc_mod_pow_2kp1 (p, t0, rp, 7, tp); /* a^{2^14 - 1} */
+ ecc_mod_sqr (p, rp, t0, tp); /* a^{2^15 - 2} */
+ ecc_mod_mul (p, rp, rp, ap, tp); /* a^{2^15 - 1} */
+ ecc_mod_pow_2kp1 (p, t0, rp, 15, tp); /* a^{2^30 - 1} */
+ ecc_mod_sqr (p, rp, t0, tp); /* a^{2^31 - 2} */
+ ecc_mod_mul (p, a31m1, rp, ap, tp); /* a^{2^31 - 1} a7, a31m1 */
+
+ ecc_mod_pow_2k_mul (p, rp, a31m1, 3, a7, tp); /* a^{2^34 - 1} a31m1 */
+ ecc_mod_pow_2k_mul (p, rp, rp, 31, a31m1, tp); /* a^{2^65 - 1} a31m1 */
+ ecc_mod_pow_2k_mul (p, a96m1, rp, 31, a31m1, tp); /* a^{2^96 - 1} a31m1, a96m1 */
+ ecc_mod_pow_2k_mul (p, rp, a96m1, 31, a31m1, tp); /* a^{2^{127} - 1} a96m1 */
+ ecc_mod_pow_2k_mul (p, rp, rp, 97, a96m1, tp); /* a^{2^{224} - 2^{96} - 1 */
+}
+
+
const struct ecc_curve _nettle_secp_224r1 =
{
{
@@ -69,7 +120,7 @@ const struct ecc_curve _nettle_secp_224r1 =
ECC_LIMB_SIZE,
ECC_BMODP_SIZE,
-ECC_REDC_SIZE,
- ECC_MOD_INV_ITCH (ECC_LIMB_SIZE),
+ ECC_SECP224R1_INV_ITCH,
0,
ecc_p,
@@ -80,7 +131,7 @@ const struct ecc_curve _nettle_secp_224r1 =
ecc_secp224r1_modp,
USE_REDC ? ecc_secp224r1_redc : ecc_secp224r1_modp,
- USE_REDC ? ecc_mod_inv_redc : ecc_mod_inv,
+ ecc_secp224r1_inv,
NULL,
},
{
@@ -112,7 +163,7 @@ const struct ecc_curve _nettle_secp_224r1 =
ECC_DUP_JJ_ITCH (ECC_LIMB_SIZE),
ECC_MUL_A_ITCH (ECC_LIMB_SIZE),
ECC_MUL_G_ITCH (ECC_LIMB_SIZE),
- ECC_J_TO_A_ITCH (ECC_LIMB_SIZE),
+ 2*ECC_LIMB_SIZE + ECC_SECP224R1_INV_ITCH,
ecc_add_jja,
ecc_add_jjj,
diff --git a/ecc-secp256r1.c b/ecc-secp256r1.c
index e7ac62c4..d813da5d 100644
--- a/ecc-secp256r1.c
+++ b/ecc-secp256r1.c
@@ -243,6 +243,58 @@ ecc_secp256r1_modq (const struct ecc_modulo *q, mp_limb_t *rp, mp_limb_t *xp)
#error Unsupported parameters
#endif
+#define ECC_SECP256R1_INV_ITCH (4*ECC_LIMB_SIZE)
+
+static void
+ecc_secp256r1_inv (const struct ecc_modulo *p,
+ mp_limb_t *rp, const mp_limb_t *ap,
+ mp_limb_t *scratch)
+{
+#define a5m1 scratch
+#define t0 (scratch + ECC_LIMB_SIZE)
+#define a15m1 t0
+#define a32m1 a5m1
+#define tp (scratch + 2*ECC_LIMB_SIZE)
+/*
+ Addition chain for p - 2 = 2^{256} - 2^{224} + 2^{192} + 2^{96} - 3
+
+ 2^5 - 1 = 1 + 2 (2^4 - 1) = 1 + 2 (2^2+1)(2 + 1) 4 S + 3 M
+ 2^{15} - 1 = (2^5 - 1) (1 + 2^5 (1 + 2^5) 10 S + 2 M
+ 2^{16} - 1 = 1 + 2 (2^{15} - 1) S + M
+ 2^{32} - 1 = (2^{16} + 1) (2^{16} - 1) 16 S + M
+ 2^{64} - 2^{32} + 1 = 2^{32} (2^{32} - 1) + 1 32 S + M
+ 2^{192} - 2^{160} + 2^{128} + 2^{32} - 1
+ = 2^{128} (2^{64} - 2^{32} + 1) + 2^{32} - 1 128 S + M
+ 2^{224} - 2^{192} + 2^{160} + 2^{64} - 1
+ = 2^{32} (...) + 2^{32} - 1 32 S + M
+ 2^{239} - 2^{207} + 2^{175} + 2^{79} - 1
+ = 2^{15} (...) + 2^{15} - 1 15 S + M
+ 2^{254} - 2^{222} + 2^{190} + 2^{94} - 1
+ = 2^{15} (...) + 2^{15} - 1 15 S + M
+ p - 2 = 2^2 (...) + 1 2 S M
+ ---------------
+ 255 S + 13 M
+ */
+ ecc_mod_sqr (p, rp, ap, tp); /* a^2 */
+ ecc_mod_mul (p, rp, rp, ap, tp); /* a^3 */
+ ecc_mod_pow_2kp1 (p, t0, rp, 2, tp); /* a^{2^4 - 1} */
+ ecc_mod_sqr (p, rp, t0, tp); /* a^{2^5 - 2} */
+ ecc_mod_mul (p, a5m1, rp, ap, tp); /* a^{2^5 - 1}, a5m1 */
+
+ ecc_mod_pow_2kp1 (p, rp, a5m1, 5, tp); /* a^{2^{10} - 1, a5m1*/
+ ecc_mod_pow_2k_mul (p, a15m1, rp, 5, a5m1, tp); /* a^{2^{15} - 1}, a5m1 a15m1 */
+ ecc_mod_sqr (p, rp, a15m1, tp); /* a^{2^{16} - 2}, a15m1 */
+ ecc_mod_mul (p, rp, rp, ap, tp); /* a^{2^{16} - 1}, a15m1 */
+ ecc_mod_pow_2kp1 (p, a32m1, rp, 16, tp); /* a^{2^{32} - 1}, a15m1, a32m1 */
+
+ ecc_mod_pow_2k_mul (p, rp, a32m1, 32, ap, tp);/* a^{2^{64} - 2^{32} + 1 */
+ ecc_mod_pow_2k_mul (p, rp, rp, 128, a32m1, tp); /* a^{2^{192} - 2^{160} + 2^{128} + 2^{32} - 1} */
+ ecc_mod_pow_2k_mul (p, rp, rp, 32, a32m1, tp);/* a^{2^{224} - 2^{192} + 2^{160} + 2^{64} - 1} */
+ ecc_mod_pow_2k_mul (p, rp, rp, 15, a15m1, tp);/* a^{2^{239} - 2^{207} + 2^{175} + 2^{79} - 1} */
+ ecc_mod_pow_2k_mul (p, rp, rp, 15, a15m1, tp);/* a^{2^{254} - 2^{222} + 2^{190} + 2^{94} - 1} */
+ ecc_mod_pow_2k_mul (p, rp, rp, 2, ap, tp); /* a^{2^{256} - 2^{224} + 2^{192} + 2^{96} - 3} */
+}
+
const struct ecc_curve _nettle_secp_256r1 =
{
{
@@ -250,7 +302,7 @@ const struct ecc_curve _nettle_secp_256r1 =
ECC_LIMB_SIZE,
ECC_BMODP_SIZE,
ECC_REDC_SIZE,
- ECC_MOD_INV_ITCH (ECC_LIMB_SIZE),
+ ECC_SECP256R1_INV_ITCH,
0,
ecc_p,
@@ -261,7 +313,7 @@ const struct ecc_curve _nettle_secp_256r1 =
ecc_secp256r1_modp,
USE_REDC ? ecc_secp256r1_redc : ecc_secp256r1_modp,
- USE_REDC ? ecc_mod_inv_redc : ecc_mod_inv,
+ ecc_secp256r1_inv,
NULL,
},
{
@@ -293,7 +345,7 @@ const struct ecc_curve _nettle_secp_256r1 =
ECC_DUP_JJ_ITCH (ECC_LIMB_SIZE),
ECC_MUL_A_ITCH (ECC_LIMB_SIZE),
ECC_MUL_G_ITCH (ECC_LIMB_SIZE),
- ECC_J_TO_A_ITCH (ECC_LIMB_SIZE),
+ 2*ECC_LIMB_SIZE + ECC_SECP256R1_INV_ITCH,
ecc_add_jja,
ecc_add_jjj,