diff options
author | Marius Schilder <mschilder@google.com> | 2016-11-02 16:26:51 -0700 |
---|---|---|
committer | chrome-bot <chrome-bot@chromium.org> | 2016-11-03 14:44:27 -0700 |
commit | b8c1ce6700ba9e467f4b6a42cac5ceb3aacdd1db (patch) | |
tree | 4353ff487a81b0a4cadb080199d1da61ca41bd15 | |
parent | a5dcd95432911955ac07dec25739ff9520c6d60d (diff) | |
download | chrome-ec-b8c1ce6700ba9e467f4b6a42cac5ceb3aacdd1db.tar.gz |
bn_div and faster modular inverse.
We previously used binary extended Euclid. That does not perform well
when inverting a small public exponent.
We also abused that routine to perform the division of n by one of its
factors. Really did not perform well there either.
This CL introduces a classic Knuth long division and a normal extended
Euclid based on that.
This drops the execution time of the common inversions into the single
msec range (vs. multiple seconds before..)
TEST=tcg_tests pass the usual 381/391; test/tpm_test/bn_test passes.
BUG=chrome-os-partner:57422
BRANCH=none
Change-Id: Ic9b4aecd0356fcab3e823dbd60c5b228a87447d3
Reviewed-on: https://chromium-review.googlesource.com/406940
Commit-Ready: Marius Schilder <mschilder@chromium.org>
Tested-by: Marius Schilder <mschilder@chromium.org>
Reviewed-by: Marius Schilder <mschilder@chromium.org>
Reviewed-by: Bill Richardson <wfrichar@chromium.org>
-rw-r--r-- | chip/g/dcrypto/bn.c | 435 | ||||
-rw-r--r-- | chip/g/dcrypto/dcrypto.h | 3 | ||||
-rw-r--r-- | chip/g/dcrypto/rsa.c | 11 |
3 files changed, 338 insertions, 111 deletions
diff --git a/chip/g/dcrypto/bn.c b/chip/g/dcrypto/bn.c index c876b5d868..17d1bae72e 100644 --- a/chip/g/dcrypto/bn.c +++ b/chip/g/dcrypto/bn.c @@ -241,7 +241,7 @@ static void bn_rshift(struct LITE_BIGNUM *r, uint32_t carry, uint32_t neg) /* Montgomery c[] += a * b[] / R % N. */ /* TODO(ngm): constant time. */ static void bn_mont_mul_add(struct LITE_BIGNUM *c, const uint32_t a, - const struct LITE_BIGNUM *b, const uint32_t nprime, + const struct LITE_BIGNUM *b, const uint32_t nprime, const struct LITE_BIGNUM *N) { uint32_t A, B, d0; @@ -341,13 +341,9 @@ void bn_mont_modexp(struct LITE_BIGNUM *output, const struct LITE_BIGNUM *input, struct LITE_BIGNUM aR; #ifndef CR50_NO_BN_ASM - if (bn_bits(N) == 2048 || bn_bits(N) == 1024) { - /* TODO(ngm): add hardware support for standard key sizes. */ + if ((bn_bits(N) & 255) == 0) { + /* Use hardware support for standard key sizes. */ bn_mont_modexp_asm(output, input, exp, N); - /* Final reduce. */ - /* TODO(ngm): constant time. */ - if (bn_sub(output, N)) - bn_add(output, N); return; } #endif @@ -398,7 +394,7 @@ void bn_mont_modexp(struct LITE_BIGNUM *output, const struct LITE_BIGNUM *input, /* c[] += a * b[] */ static uint32_t bn_mul_add(struct LITE_BIGNUM *c, uint32_t a, - const struct LITE_BIGNUM *b, uint32_t offset) + const struct LITE_BIGNUM *b, uint32_t offset) { int i; uint64_t carry = 0; @@ -429,118 +425,345 @@ void DCRYPTO_bn_mul(struct LITE_BIGNUM *c, const struct LITE_BIGNUM *a, BN_DIGIT(c, i + b->dmax - 1) = carry; } -#define bn_is_even(b) !bn_is_bit_set((b), 0) -#define bn_is_odd(b) bn_is_bit_set((b), 0) - -static int bn_is_zero(const struct LITE_BIGNUM *a) -{ - int i, result = 0; - - for (i = 0; i < a->dmax; ++i) - result |= BN_DIGIT(a, i); - return !result; -} - -/* d = (e ^ -1) mod MOD */ -/* TODO(ngm): this method is used in place of division to calculate - * q = N/p, i.e. q = p^-1 mod (N-1). The buffer e may be - * resized to uint32_t once division is implemented. */ -int bn_modinv_vartime(struct LITE_BIGNUM *d, const struct LITE_BIGNUM *e, - const struct LITE_BIGNUM *MOD) -{ - /* Buffers for B, D, and U must be as large as e. */ - uint32_t A_buf[RSA_MAX_WORDS + 1]; - uint32_t B_buf[RSA_MAX_WORDS + 1]; - uint32_t C_buf[RSA_MAX_WORDS + 1]; - uint32_t D_buf[RSA_MAX_WORDS + 1]; - uint32_t U_buf[RSA_MAX_WORDS]; - uint32_t V_buf[RSA_MAX_WORDS]; - int a_neg = 0; - int b_neg = 0; - int c_neg = 0; - int d_neg = 0; - int carry1; - int carry2; - int i = 0; +/* c[] = a[] * b[] */ +static void bn_mul_ex(struct LITE_BIGNUM *c, + const struct LITE_BIGNUM *a, int a_len, + const struct LITE_BIGNUM *b) +{ + int i; + uint32_t carry = 0; - struct LITE_BIGNUM A; - struct LITE_BIGNUM B; - struct LITE_BIGNUM C; - struct LITE_BIGNUM D; - struct LITE_BIGNUM U; - struct LITE_BIGNUM V; + memset(c->d, 0, bn_size(c)); + for (i = 0; i < a_len; i++) { + BN_DIGIT(c, i + b->dmax - 1) = carry; + carry = bn_mul_add(c, BN_DIGIT(a, i), b, i); + } - if (bn_size(e) > sizeof(U_buf)) + BN_DIGIT(c, i + b->dmax - 1) = carry; +} + +static int bn_div_word_ex(struct LITE_BIGNUM *q, + struct LITE_BIGNUM *r, + const struct LITE_BIGNUM *u, int m, + uint32_t div) +{ + uint32_t rem = 0; + int i; + + for (i = m - 1; i >= 0; --i) { + uint64_t tmp = ((uint64_t)rem << 32) + BN_DIGIT(u, i); + uint32_t qd = tmp / div; + + BN_DIGIT(q, i) = qd; + rem = tmp - (uint64_t)qd * div; + } + + if (r != NULL) + BN_DIGIT(r, 0) = rem; + + return 1; +} + +/* + * Knuth's long division. + * + * Returns 0 on error. + * |u| >= |v| + * v[n-1] must not be 0 + * r gets |v| digits written to. + * q gets |u| - |v| + 1 digits written to. + */ +static int bn_div_ex(struct LITE_BIGNUM *q, + struct LITE_BIGNUM *r, + const struct LITE_BIGNUM *u, int m, + const struct LITE_BIGNUM *v, int n) +{ + uint32_t vtop; + int s, i, j; + uint32_t vn[RSA_MAX_WORDS]; /* Normalized v */ + uint32_t un[RSA_MAX_WORDS + 1]; /* Normalized u */ + + if (m < n || n <= 0) return 0; - if (bn_is_even(e) && bn_is_even(MOD)) + vtop = BN_DIGIT(v, n - 1); + + if (vtop == 0) return 0; - bn_init(&A, A_buf, bn_size(MOD) + sizeof(uint32_t)); - bn_init(&B, B_buf, bn_size(MOD) + sizeof(uint32_t)); - bn_init(&C, C_buf, bn_size(MOD) + sizeof(uint32_t)); - bn_init(&D, D_buf, bn_size(MOD) + sizeof(uint32_t)); - bn_init(&U, U_buf, bn_size(MOD)); - bn_init(&V, V_buf, bn_size(MOD)); - - BN_DIGIT(&A, 0) = 1; - BN_DIGIT(&D, 0) = 1; - memcpy(U_buf, e->d, bn_size(e)); - memcpy(V_buf, MOD->d, bn_size(MOD)); - - /* Binary extended GCD, as per Handbook of Applied - * Cryptography, 14.61. */ - for (i = 0;; i++) { - carry1 = 0; - carry2 = 0; - if (bn_is_even(&U)) { - bn_rshift(&U, 0, 0); - if (bn_is_odd(&A) || bn_is_odd(&B)) { - carry1 = bn_signed_add(&A, &a_neg, MOD, 0); - carry2 = bn_signed_sub(&B, &b_neg, e, 0); + if (n == 1) + return bn_div_word_ex(q, r, u, m, vtop); + + /* Compute shift factor to make v have high bit set */ + s = 0; + while ((vtop & 0x80000000) == 0) { + s = s + 1; + vtop = vtop << 1; + } + + /* Normalize u and v into un and vn. + * Note un always gains a leading digit + */ + if (s != 0) { + for (i = n - 1; i > 0; i--) + vn[i] = (BN_DIGIT(v, i) << s) | + (BN_DIGIT(v, i - 1) >> (32 - s)); + vn[0] = BN_DIGIT(v, 0) << s; + + un[m] = BN_DIGIT(u, m - 1) >> (32 - s); + for (i = m - 1; i > 0; i--) + un[i] = (BN_DIGIT(u, i) << s) | + (BN_DIGIT(u, i - 1) >> (32 - s)); + un[0] = BN_DIGIT(u, 0) << s; + } else { + for (i = 0; i < n; ++i) + vn[i] = BN_DIGIT(v, i); + for (i = 0; i < m; ++i) + un[i] = BN_DIGIT(u, i); + un[m] = 0; + } + + /* Main loop, reducing un digit by digit */ + for (j = m - n; j >= 0; j--) { + uint32_t qd; + int64_t t, k; + + /* Estimate quotient digit */ + if (un[j + n] == vn[n - 1]) { + /* Maxed out */ + qd = 0xFFFFFFFF; + } else { + /* Fine tune estimate */ + uint64_t rhat = ((uint64_t)un[j + n] << 32) + + un[j + n - 1]; + + qd = rhat / vn[n - 1]; + rhat = rhat - (uint64_t)qd * vn[n - 1]; + while ((rhat >> 32) == 0 && + (uint64_t)qd * vn[n - 2] > + (rhat << 32) + un[j + n - 2]) { + qd = qd - 1; + rhat = rhat + vn[n - 1]; } - bn_rshift(&A, carry1, a_neg); - bn_rshift(&B, carry2, b_neg); - } else if (bn_is_even(&V)) { - bn_rshift(&V, 0, 0); - if (bn_is_odd(&C) || bn_is_odd(&D)) { - carry1 = bn_signed_add(&C, &c_neg, MOD, 0); - carry2 = bn_signed_sub(&D, &d_neg, e, 0); + } + + /* Multiply and subtract */ + k = 0; + for (i = 0; i < n; i++) { + uint64_t p = (uint64_t)qd * vn[i]; + + t = un[i + j] - k - (p & 0xFFFFFFFF); + un[i + j] = t; + k = (p >> 32) - (t >> 32); + } + t = un[j + n] - k; + un[j + n] = t; + + /* If borrowed, add one back and adjust estimate */ + if (t < 0) { + qd = qd - 1; + for (i = 0; i < n; i++) { + t = (uint64_t)un[i + j] + vn[i] + k; + un[i + j] = t; + k = t >> 32; } - bn_rshift(&C, carry1, c_neg); - bn_rshift(&D, carry2, d_neg); - } else { /* U, V both odd. */ - if (bn_gte(&U, &V)) { - assert(!bn_sub(&U, &V)); - bn_signed_sub(&A, &a_neg, &C, c_neg); - bn_signed_sub(&B, &b_neg, &D, d_neg); - if (bn_is_zero(&U)) - break; /* done. */ + un[j + n] = un[j + n] + k; + } + + BN_DIGIT(q, j) = qd; + } + + if (r != NULL) { + /* Denormalize un into r */ + if (s != 0) { + for (i = 0; i < n - 1; i++) + BN_DIGIT(r, i) = (un[i] >> s) | + (un[i + 1] << (32 - s)); + BN_DIGIT(r, n - 1) = un[n - 1] >> s; + } else { + for (i = 0; i < n; i++) + BN_DIGIT(r, i) = un[i]; + } + } + + return 1; +} + +static void bn_set_bn(struct LITE_BIGNUM *d, const struct LITE_BIGNUM *src, + size_t n) +{ + size_t i = 0; + + for (; i < n && i < d->dmax; ++i) + BN_DIGIT(d, i) = BN_DIGIT(src, i); + for (; i < d->dmax; ++i) + BN_DIGIT(d, i) = 0; +} + +static size_t bn_digits(const struct LITE_BIGNUM *a) +{ + size_t n = a->dmax - 1; + + while (BN_DIGIT(a, n) == 0 && n) + --n; + return n + 1; +} + +int DCRYPTO_bn_div(struct LITE_BIGNUM *quotient, + struct LITE_BIGNUM *remainder, + const struct LITE_BIGNUM *src, + const struct LITE_BIGNUM *divisor) +{ + int src_len = bn_digits(src); + int div_len = bn_digits(divisor); + int i, result; + + if (src_len < div_len) + return 0; + + result = bn_div_ex(quotient, remainder, + src, src_len, + divisor, div_len); + + if (!result) + return 0; + + /* 0-pad the destinations. */ + for (i = src_len - div_len + 1; i < quotient->dmax; ++i) + BN_DIGIT(quotient, i) = 0; + if (remainder) { + for (i = div_len; i < remainder->dmax; ++i) + BN_DIGIT(remainder, i) = 0; + } + + return result; +} + +/* + * Extended Euclid modular inverse. + * + * https://en.wikipedia.org/wiki/Extended_Euclidean_algorithm + * #Computing_multiplicative_inverses_in_modular_structures: + + * function inverse(a, n) + * t := 0; newt := 1; + * r := n; newr := a; + * while newr ≠ 0 + * quotient := r div newr + * (t, newt) := (newt, t - quotient * newt) + * (r, newr) := (newr, r - quotient * newr) + * if r > 1 then return "a is not invertible" + * if t < 0 then t := t + n + * return t + */ +int bn_modinv_vartime(struct LITE_BIGNUM *dst, const struct LITE_BIGNUM *src, + const struct LITE_BIGNUM *mod) +{ + uint32_t R_buf[RSA_MAX_WORDS]; + uint32_t nR_buf[RSA_MAX_WORDS]; + uint32_t Q_buf[RSA_MAX_WORDS]; + + uint32_t nT_buf[RSA_MAX_WORDS + 1]; /* Can go negative, hence +1 */ + uint32_t T_buf[RSA_MAX_WORDS + 1]; /* Can go negative */ + uint32_t tmp_buf[2 * RSA_MAX_WORDS + 1]; /* needs to hold Q*nT */ + + struct LITE_BIGNUM R; + struct LITE_BIGNUM nR; + struct LITE_BIGNUM Q; + struct LITE_BIGNUM T; + struct LITE_BIGNUM nT; + struct LITE_BIGNUM tmp; + + struct LITE_BIGNUM *pT = &T; + struct LITE_BIGNUM *pnT = &nT; + struct LITE_BIGNUM *pR = &R; + struct LITE_BIGNUM *pnR = &nR; + struct LITE_BIGNUM *bnswap; + + int t_neg = 0; + int nt_neg = 0; + int iswap; + + size_t r_len, nr_len; + + bn_init(&R, R_buf, bn_size(mod)); + bn_init(&nR, nR_buf, bn_size(mod)); + bn_init(&Q, Q_buf, bn_size(mod)); + bn_init(&T, T_buf, bn_size(mod) + sizeof(uint32_t)); + bn_init(&nT, nT_buf, bn_size(mod) + sizeof(uint32_t)); + bn_init(&tmp, tmp_buf, bn_size(mod) + sizeof(uint32_t)); + + r_len = bn_digits(mod); + nr_len = bn_digits(src); + + BN_DIGIT(&nT, 0) = 1; /* T = 0, nT = 1 */ + bn_set_bn(&R, mod, r_len); /* R = n */ + bn_set_bn(&nR, src, nr_len); /* nR = input */ + + /* Trim nR */ + while (nr_len && BN_DIGIT(&nR, nr_len - 1) == 0) + --nr_len; + + while (nr_len) { + size_t q_len = r_len - nr_len + 1; + + /* (r, nr) = (nr, r % nr), q = r / nr */ + if (!bn_div_ex(&Q, pR, pR, r_len, pnR, nr_len)) + return 0; + + /* swap R and nR */ + r_len = nr_len; + bnswap = pR; pR = pnR; pnR = bnswap; + + /* trim nR and Q */ + while (nr_len && BN_DIGIT(pnR, nr_len - 1) == 0) + --nr_len; + while (q_len && BN_DIGIT(&Q, q_len - 1) == 0) + --q_len; + + Q.dmax = q_len; + + /* compute t - q*nt */ + if (q_len == 1 && BN_DIGIT(&Q, 0) <= 2) { + /* Doing few direct subs is faster than mul + sub */ + uint32_t n = BN_DIGIT(&Q, 0); + + while (n--) + bn_signed_sub(pT, &t_neg, pnT, nt_neg); + } else { + /* Call bn_mul_ex with smallest operand first */ + if (nt_neg) { + /* Negative numbers use all digits, + * thus pnT is large + */ + bn_mul_ex(&tmp, &Q, q_len, pnT); } else { - assert(!bn_sub(&V, &U)); - bn_signed_sub(&C, &c_neg, &A, a_neg); - bn_signed_sub(&D, &d_neg, &B, b_neg); + int nt_len = bn_digits(pnT); + + if (q_len < nt_len) + bn_mul_ex(&tmp, &Q, q_len, pnT); + else + bn_mul_ex(&tmp, pnT, nt_len, &Q); } + bn_signed_sub(pT, &t_neg, &tmp, nt_neg); } - if ((i + 1) % 1000 == 0) - /* TODO(ngm): Poke the watchdog (only - * necessary for q = N/p). Remove once - * division is implemented. */ - watchdog_reload(); - } - BN_DIGIT(&V, 0) ^= 0x01; - if (bn_is_zero(&V)) { - while (c_neg) - bn_signed_add(&C, &c_neg, MOD, 0); - while (bn_gte(&C, MOD)) - bn_sub(&C, MOD); + /* swap T and nT */ + bnswap = pT; pT = pnT; pnT = bnswap; + iswap = t_neg; t_neg = nt_neg; nt_neg = iswap; + } - memcpy(d->d, C.d, bn_size(d)); - return 1; - } else { - return 0; /* Inverse not found. */ + if (r_len != 1 || BN_DIGIT(pR, 0) != 1) { + /* gcd not 1; no direct inverse */ + return 0; } + + if (t_neg) + bn_signed_add(pT, &t_neg, mod, 0); + + bn_set_bn(dst, pT, bn_digits(pT)); + + return 1; } #define NUM_PRIMES 4095 diff --git a/chip/g/dcrypto/dcrypto.h b/chip/g/dcrypto/dcrypto.h index aaa4b9d0cf..4f4c2df5ff 100644 --- a/chip/g/dcrypto/dcrypto.h +++ b/chip/g/dcrypto/dcrypto.h @@ -169,6 +169,9 @@ int DCRYPTO_bn_generate_prime(struct LITE_BIGNUM *p); void DCRYPTO_bn_wrap(struct LITE_BIGNUM *b, void *buf, size_t len); void DCRYPTO_bn_mul(struct LITE_BIGNUM *c, const struct LITE_BIGNUM *a, const struct LITE_BIGNUM *b); +int DCRYPTO_bn_div(struct LITE_BIGNUM *quotient, struct LITE_BIGNUM *remainder, + const struct LITE_BIGNUM *input, + const struct LITE_BIGNUM *divisor); /* * X509. diff --git a/chip/g/dcrypto/rsa.c b/chip/g/dcrypto/rsa.c index 359565d118..ace9961169 100644 --- a/chip/g/dcrypto/rsa.c +++ b/chip/g/dcrypto/rsa.c @@ -635,7 +635,7 @@ int DCRYPTO_rsa_key_compute(struct LITE_BIGNUM *N, struct LITE_BIGNUM *d, { uint32_t ONE_buf = 1; uint32_t phi_buf[RSA_MAX_WORDS]; - uint32_t q_buf[RSA_MAX_WORDS / 2]; + uint32_t q_buf[RSA_MAX_WORDS / 2 + 1]; struct LITE_BIGNUM ONE; struct LITE_BIGNUM e; @@ -648,14 +648,15 @@ int DCRYPTO_rsa_key_compute(struct LITE_BIGNUM *N, struct LITE_BIGNUM *d, /* q not provided, calculate it. */ memcpy(phi_buf, N->d, bn_size(N)); bn_init(&q_local, q_buf, bn_size(p)); - bn_sub(&phi, &ONE); - if (!bn_modinv_vartime(&q_local, p, &phi)) + q = &q_local; + + if (!DCRYPTO_bn_div(q, NULL, &phi, p)) return 0; + /* Check that p * q == N */ - DCRYPTO_bn_mul(&phi, p, &q_local); + DCRYPTO_bn_mul(&phi, p, q); if (!bn_eq(N, &phi)) return 0; - q = &q_local; } else { DCRYPTO_bn_mul(N, p, q); memcpy(phi_buf, N->d, bn_size(N)); |