diff options
author | Jussi Kivilinna <jussi.kivilinna@iki.fi> | 2021-06-28 21:01:32 +0300 |
---|---|---|
committer | Jussi Kivilinna <jussi.kivilinna@iki.fi> | 2021-06-30 17:57:30 +0300 |
commit | fc92c609dfdbcf59a09ca3aaf53a1c1b8408c351 (patch) | |
tree | 71e4c149f795f1750dec9b9b73836c404b8bcbdb /mpi | |
parent | 6dfab8cfb94ccb485a15b13df3c499cbb06fddf2 (diff) | |
download | libgcrypt-fc92c609dfdbcf59a09ca3aaf53a1c1b8408c351.tar.gz |
ec-nist: fix 'mod p' carry adjustment and output masking
* mpi/ec-inline.h (MASK_AND64, LIMB_OR64): New.
[__x86_64__]: Use "rme" operand type instead of "g" to fix use
of large 32-bit constants.
* mpi/ec-nist.c (_gcry_mpi_ec_nist192_mod, _gcry_mpi_ec_nist224_mod)
(_gcry_mpi_ec_nist256_mod, _gcry_mpi_ec_nist384_mod): At end,
check if 's[]' is negative instead result of last addition, for
output masks; Use 'p_mult' table entry for P instead of 'ctx->p'.
(_gcry_mpi_ec_nist256_mod): Handle corner case were 2*P needs to be
added after carry based subtraction.
* tests/t-mpi-point.c (check_ec_mul_reduction): New.
(main): Call 'check_ec_mul_reduction'.
--
GnuPG-bug-id: T5510
Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Diffstat (limited to 'mpi')
-rw-r--r-- | mpi/ec-inline.h | 66 | ||||
-rw-r--r-- | mpi/ec-nist.c | 86 |
2 files changed, 99 insertions, 53 deletions
diff --git a/mpi/ec-inline.h b/mpi/ec-inline.h index 25c3b40d..a07826e3 100644 --- a/mpi/ec-inline.h +++ b/mpi/ec-inline.h @@ -40,6 +40,8 @@ #define HI32_LIMB64(v) (u32)((mpi_limb_t)(v) >> (BITS_PER_MPI_LIMB - 32)) #define LO32_LIMB64(v) ((u32)(v)) #define LIMB64_C(hi, lo) (((mpi_limb_t)(u32)(hi) << 32) | (u32)(lo)) +#define MASK_AND64(mask, val) ((mask) & (val)) +#define LIMB_OR64(val1, val2) ((val1) | (val2)) #define STORE64_COND(x, pos, mask1, val1, mask2, val2) \ ((x)[(pos)] = ((mask1) & (val1)) | ((mask2) & (val2))) @@ -73,9 +75,9 @@ LIMB64_HILO(u32 hi, u32 lo) : "0" ((mpi_limb_t)(B2)), \ "1" ((mpi_limb_t)(B1)), \ "2" ((mpi_limb_t)(B0)), \ - "g" ((mpi_limb_t)(C2)), \ - "g" ((mpi_limb_t)(C1)), \ - "g" ((mpi_limb_t)(C0)) \ + "rme" ((mpi_limb_t)(C2)), \ + "rme" ((mpi_limb_t)(C1)), \ + "rme" ((mpi_limb_t)(C0)) \ : "cc") #define SUB3_LIMB64(A3, A2, A1, A0, B2, B1, B0, C2, C1, C0) \ @@ -88,9 +90,9 @@ LIMB64_HILO(u32 hi, u32 lo) : "0" ((mpi_limb_t)(B2)), \ "1" ((mpi_limb_t)(B1)), \ "2" ((mpi_limb_t)(B0)), \ - "g" ((mpi_limb_t)(C2)), \ - "g" ((mpi_limb_t)(C1)), \ - "g" ((mpi_limb_t)(C0)) \ + "rme" ((mpi_limb_t)(C2)), \ + "rme" ((mpi_limb_t)(C1)), \ + "rme" ((mpi_limb_t)(C0)) \ : "cc") #define ADD4_LIMB64(A3, A2, A1, A0, B3, B2, B1, B0, C3, C2, C1, C0) \ @@ -106,10 +108,10 @@ LIMB64_HILO(u32 hi, u32 lo) "1" ((mpi_limb_t)(B2)), \ "2" ((mpi_limb_t)(B1)), \ "3" ((mpi_limb_t)(B0)), \ - "g" ((mpi_limb_t)(C3)), \ - "g" ((mpi_limb_t)(C2)), \ - "g" ((mpi_limb_t)(C1)), \ - "g" ((mpi_limb_t)(C0)) \ + "rme" ((mpi_limb_t)(C3)), \ + "rme" ((mpi_limb_t)(C2)), \ + "rme" ((mpi_limb_t)(C1)), \ + "rme" ((mpi_limb_t)(C0)) \ : "cc") #define SUB4_LIMB64(A3, A2, A1, A0, B3, B2, B1, B0, C3, C2, C1, C0) \ @@ -125,10 +127,10 @@ LIMB64_HILO(u32 hi, u32 lo) "1" ((mpi_limb_t)(B2)), \ "2" ((mpi_limb_t)(B1)), \ "3" ((mpi_limb_t)(B0)), \ - "g" ((mpi_limb_t)(C3)), \ - "g" ((mpi_limb_t)(C2)), \ - "g" ((mpi_limb_t)(C1)), \ - "g" ((mpi_limb_t)(C0)) \ + "rme" ((mpi_limb_t)(C3)), \ + "rme" ((mpi_limb_t)(C2)), \ + "rme" ((mpi_limb_t)(C1)), \ + "rme" ((mpi_limb_t)(C0)) \ : "cc") #define ADD5_LIMB64(A4, A3, A2, A1, A0, B4, B3, B2, B1, B0, \ @@ -148,11 +150,11 @@ LIMB64_HILO(u32 hi, u32 lo) "2" ((mpi_limb_t)(B2)), \ "3" ((mpi_limb_t)(B1)), \ "4" ((mpi_limb_t)(B0)), \ - "g" ((mpi_limb_t)(C4)), \ - "g" ((mpi_limb_t)(C3)), \ - "g" ((mpi_limb_t)(C2)), \ - "g" ((mpi_limb_t)(C1)), \ - "g" ((mpi_limb_t)(C0)) \ + "rme" ((mpi_limb_t)(C4)), \ + "rme" ((mpi_limb_t)(C3)), \ + "rme" ((mpi_limb_t)(C2)), \ + "rme" ((mpi_limb_t)(C1)), \ + "rme" ((mpi_limb_t)(C0)) \ : "cc") #define SUB5_LIMB64(A4, A3, A2, A1, A0, B4, B3, B2, B1, B0, \ @@ -172,11 +174,11 @@ LIMB64_HILO(u32 hi, u32 lo) "2" ((mpi_limb_t)(B2)), \ "3" ((mpi_limb_t)(B1)), \ "4" ((mpi_limb_t)(B0)), \ - "g" ((mpi_limb_t)(C4)), \ - "g" ((mpi_limb_t)(C3)), \ - "g" ((mpi_limb_t)(C2)), \ - "g" ((mpi_limb_t)(C1)), \ - "g" ((mpi_limb_t)(C0)) \ + "rme" ((mpi_limb_t)(C4)), \ + "rme" ((mpi_limb_t)(C3)), \ + "rme" ((mpi_limb_t)(C2)), \ + "rme" ((mpi_limb_t)(C1)), \ + "rme" ((mpi_limb_t)(C0)) \ : "cc") #endif /* __x86_64__ */ @@ -593,6 +595,22 @@ STORE64(mpi_ptr_t x, unsigned int pos, mpi_limb64_t v) x[pos * 2 + 1] = v.hi; } +static inline mpi_limb64_t +MASK_AND64(mpi_limb_t mask, mpi_limb64_t val) +{ + val.lo &= mask; + val.hi &= mask; + return val; +} + +static inline mpi_limb64_t +LIMB_OR64(mpi_limb64_t val1, mpi_limb64_t val2) +{ + val1.lo |= val2.lo; + val1.hi |= val2.hi; + return val1; +} + static inline void STORE64_COND(mpi_ptr_t x, unsigned int pos, mpi_limb_t mask1, mpi_limb64_t val1, mpi_limb_t mask2, mpi_limb64_t val2) diff --git a/mpi/ec-nist.c b/mpi/ec-nist.c index 955d2b7c..69b05a6d 100644 --- a/mpi/ec-nist.c +++ b/mpi/ec-nist.c @@ -94,12 +94,12 @@ _gcry_mpi_ec_nist192_mod (gcry_mpi_t w, mpi_ec_t ctx) }; const mpi_limb64_t zero = LIMB_TO64(0); mpi_ptr_t wp; - mpi_ptr_t pp; mpi_size_t wsize = 192 / BITS_PER_MPI_LIMB64; mpi_limb64_t s[wsize + 1]; mpi_limb64_t o[wsize + 1]; mpi_limb_t mask1; mpi_limb_t mask2; + mpi_limb_t s_is_negative; int carry; MPN_NORMALIZE (w->d, w->nlimbs); @@ -109,7 +109,6 @@ _gcry_mpi_ec_nist192_mod (gcry_mpi_t w, mpi_ec_t ctx) RESIZE_AND_CLEAR_IF_NEEDED (w, wsize * 2 * LIMBS_PER_LIMB64); RESIZE_AND_CLEAR_IF_NEEDED (ctx->p, wsize * LIMBS_PER_LIMB64); - pp = ctx->p->d; wp = w->d; prefetch (p_mult, sizeof(p_mult)); @@ -143,9 +142,13 @@ _gcry_mpi_ec_nist192_mod (gcry_mpi_t w, mpi_ec_t ctx) ADD4_LIMB64 (o[3], o[2], o[1], o[0], s[3], s[2], s[1], s[0], - zero, LOAD64(pp, 2), LOAD64(pp, 1), LOAD64(pp, 0)); - mask1 = vzero - (LO32_LIMB64(o[3]) >> 31); - mask2 = (LO32_LIMB64(o[3]) >> 31) - vone; + zero, + p_mult[0][2], p_mult[0][1], p_mult[0][0]); + + s_is_negative = LO32_LIMB64(s[3]) >> 31; + + mask2 = vzero - s_is_negative; + mask1 = s_is_negative - vone; STORE64_COND(wp, 0, mask2, o[0], mask1, s[0]); STORE64_COND(wp, 1, mask2, o[1], mask1, s[1]); @@ -183,13 +186,13 @@ _gcry_mpi_ec_nist224_mod (gcry_mpi_t w, mpi_ec_t ctx) }; const mpi_limb64_t zero = LIMB_TO64(0); mpi_ptr_t wp; - mpi_ptr_t pp; mpi_size_t wsize = (224 + BITS_PER_MPI_LIMB64 - 1) / BITS_PER_MPI_LIMB64; mpi_size_t psize = ctx->p->nlimbs; mpi_limb64_t s[wsize]; mpi_limb64_t d[wsize]; mpi_limb_t mask1; mpi_limb_t mask2; + mpi_limb_t s_is_negative; int carry; MPN_NORMALIZE (w->d, w->nlimbs); @@ -200,7 +203,6 @@ _gcry_mpi_ec_nist224_mod (gcry_mpi_t w, mpi_ec_t ctx) RESIZE_AND_CLEAR_IF_NEEDED (ctx->p, wsize * LIMBS_PER_LIMB64); ctx->p->nlimbs = psize; - pp = ctx->p->d; wp = w->d; prefetch (p_mult, sizeof(p_mult)); @@ -263,10 +265,13 @@ _gcry_mpi_ec_nist224_mod (gcry_mpi_t w, mpi_ec_t ctx) ADD4_LIMB64 (d[3], d[2], d[1], d[0], s[3], s[2], s[1], s[0], - LOAD64(pp, 3), LOAD64(pp, 2), LOAD64(pp, 1), LOAD64(pp, 0)); + p_mult[0 + 2][3], p_mult[0 + 2][2], + p_mult[0 + 2][1], p_mult[0 + 2][0]); - mask1 = vzero - (HI32_LIMB64(d[3]) >> 31); - mask2 = (HI32_LIMB64(d[3]) >> 31) - vone; + s_is_negative = (HI32_LIMB64(s[3]) >> 31); + + mask2 = vzero - s_is_negative; + mask1 = s_is_negative - vone; STORE64_COND(wp, 0, mask2, d[0], mask1, s[0]); STORE64_COND(wp, 1, mask2, d[1], mask1, s[1]); @@ -280,7 +285,7 @@ _gcry_mpi_ec_nist224_mod (gcry_mpi_t w, mpi_ec_t ctx) void _gcry_mpi_ec_nist256_mod (gcry_mpi_t w, mpi_ec_t ctx) { - static const mpi_limb64_t p_mult[11][5] = + static const mpi_limb64_t p_mult[12][5] = { { /* P * -3 */ LIMB64_C(0x00000000U, 0x00000003U), LIMB64_C(0xfffffffdU, 0x00000000U), @@ -340,14 +345,17 @@ _gcry_mpi_ec_nist256_mod (gcry_mpi_t w, mpi_ec_t ctx) }; const mpi_limb64_t zero = LIMB_TO64(0); mpi_ptr_t wp; - mpi_ptr_t pp; mpi_size_t wsize = (256 + BITS_PER_MPI_LIMB64 - 1) / BITS_PER_MPI_LIMB64; mpi_size_t psize = ctx->p->nlimbs; mpi_limb64_t s[wsize + 1]; mpi_limb64_t t[wsize + 1]; mpi_limb64_t d[wsize + 1]; + mpi_limb64_t e[wsize + 1]; mpi_limb_t mask1; mpi_limb_t mask2; + mpi_limb_t mask3; + mpi_limb_t s_is_negative; + mpi_limb_t d_is_negative; int carry; MPN_NORMALIZE (w->d, w->nlimbs); @@ -358,7 +366,6 @@ _gcry_mpi_ec_nist256_mod (gcry_mpi_t w, mpi_ec_t ctx) RESIZE_AND_CLEAR_IF_NEEDED (ctx->p, wsize * LIMBS_PER_LIMB64); ctx->p->nlimbs = psize; - pp = ctx->p->d; wp = w->d; prefetch (p_mult, sizeof(p_mult)); @@ -465,7 +472,7 @@ _gcry_mpi_ec_nist256_mod (gcry_mpi_t w, mpi_ec_t ctx) /* mod p: * 's[4]' holds carry value (-4..6). Subtract (carry + 1) * p. Result - * will be with in range -p...p. Handle result being negative with + * will be with in range -2*p...p. Handle result being negative with * addition and conditional store. */ carry = LO32_LIMB64(s[4]); @@ -476,18 +483,39 @@ _gcry_mpi_ec_nist256_mod (gcry_mpi_t w, mpi_ec_t ctx) p_mult[carry + 4][2], p_mult[carry + 4][1], p_mult[carry + 4][0]); + /* Add 1*P */ ADD5_LIMB64 (d[4], d[3], d[2], d[1], d[0], s[4], s[3], s[2], s[1], s[0], zero, - LOAD64(pp, 3), LOAD64(pp, 2), LOAD64(pp, 1), LOAD64(pp, 0)); - - mask1 = vzero - (LO32_LIMB64(d[4]) >> 31); - mask2 = (LO32_LIMB64(d[4]) >> 31) - vone; + p_mult[0 + 4][3], p_mult[0 + 4][2], + p_mult[0 + 4][1], p_mult[0 + 4][0]); - STORE64_COND(wp, 0, mask2, d[0], mask1, s[0]); - STORE64_COND(wp, 1, mask2, d[1], mask1, s[1]); - STORE64_COND(wp, 2, mask2, d[2], mask1, s[2]); - STORE64_COND(wp, 3, mask2, d[3], mask1, s[3]); + /* Add 2*P */ + ADD5_LIMB64 (e[4], e[3], e[2], e[1], e[0], + s[4], s[3], s[2], s[1], s[0], + zero, + p_mult[1 + 4][3], p_mult[1 + 4][2], + p_mult[1 + 4][1], p_mult[1 + 4][0]); + + s_is_negative = LO32_LIMB64(s[4]) >> 31; + d_is_negative = LO32_LIMB64(d[4]) >> 31; + mask3 = vzero - d_is_negative; + mask2 = (vzero - s_is_negative) & ~mask3; + mask1 = (s_is_negative - vone) & ~mask3; + + s[0] = LIMB_OR64(MASK_AND64(mask2, d[0]), MASK_AND64(mask1, s[0])); + s[1] = LIMB_OR64(MASK_AND64(mask2, d[1]), MASK_AND64(mask1, s[1])); + s[2] = LIMB_OR64(MASK_AND64(mask2, d[2]), MASK_AND64(mask1, s[2])); + s[3] = LIMB_OR64(MASK_AND64(mask2, d[3]), MASK_AND64(mask1, s[3])); + s[0] = LIMB_OR64(MASK_AND64(mask3, e[0]), s[0]); + s[1] = LIMB_OR64(MASK_AND64(mask3, e[1]), s[1]); + s[2] = LIMB_OR64(MASK_AND64(mask3, e[2]), s[2]); + s[3] = LIMB_OR64(MASK_AND64(mask3, e[3]), s[3]); + + STORE64(wp, 0, s[0]); + STORE64(wp, 1, s[1]); + STORE64(wp, 2, s[2]); + STORE64(wp, 3, s[3]); w->nlimbs = wsize * LIMBS_PER_LIMB64; MPN_NORMALIZE (wp, w->nlimbs); @@ -567,7 +595,6 @@ _gcry_mpi_ec_nist384_mod (gcry_mpi_t w, mpi_ec_t ctx) }; const mpi_limb64_t zero = LIMB_TO64(0); mpi_ptr_t wp; - mpi_ptr_t pp; mpi_size_t wsize = (384 + BITS_PER_MPI_LIMB64 - 1) / BITS_PER_MPI_LIMB64; mpi_size_t psize = ctx->p->nlimbs; #if (BITS_PER_MPI_LIMB64 == BITS_PER_MPI_LIMB) && defined(WORDS_BIGENDIAN) @@ -579,6 +606,7 @@ _gcry_mpi_ec_nist384_mod (gcry_mpi_t w, mpi_ec_t ctx) mpi_limb64_t x[wsize + 1]; mpi_limb_t mask1; mpi_limb_t mask2; + mpi_limb_t s_is_negative; int carry; MPN_NORMALIZE (w->d, w->nlimbs); @@ -589,7 +617,6 @@ _gcry_mpi_ec_nist384_mod (gcry_mpi_t w, mpi_ec_t ctx) RESIZE_AND_CLEAR_IF_NEEDED (ctx->p, wsize * LIMBS_PER_LIMB64); ctx->p->nlimbs = psize; - pp = ctx->p->d; wp = w->d; prefetch (p_mult, sizeof(p_mult)); @@ -738,12 +765,13 @@ _gcry_mpi_ec_nist384_mod (gcry_mpi_t w, mpi_ec_t ctx) ADD7_LIMB64 (d[6], d[5], d[4], d[3], d[2], d[1], d[0], s[6], s[5], s[4], s[3], s[2], s[1], s[0], zero, - LOAD64(pp, 5), LOAD64(pp, 4), - LOAD64(pp, 3), LOAD64(pp, 2), - LOAD64(pp, 1), LOAD64(pp, 0)); + p_mult[0 + 3][5], p_mult[0 + 3][4], + p_mult[0 + 3][3], p_mult[0 + 3][2], + p_mult[0 + 3][1], p_mult[0 + 3][0]); - mask1 = vzero - (LO32_LIMB64(d[6]) >> 31); - mask2 = (LO32_LIMB64(d[6]) >> 31) - vone; + s_is_negative = LO32_LIMB64(s[6]) >> 31; + mask2 = vzero - s_is_negative; + mask1 = s_is_negative - vone; STORE64_COND(wp, 0, mask2, d[0], mask1, s[0]); STORE64_COND(wp, 1, mask2, d[1], mask1, s[1]); |