summaryrefslogtreecommitdiff
path: root/chip
diff options
context:
space:
mode:
authorMarius Schilder <mschilder@google.com>2016-11-02 16:26:51 -0700
committerchrome-bot <chrome-bot@chromium.org>2016-11-03 14:44:27 -0700
commitb8c1ce6700ba9e467f4b6a42cac5ceb3aacdd1db (patch)
tree4353ff487a81b0a4cadb080199d1da61ca41bd15 /chip
parenta5dcd95432911955ac07dec25739ff9520c6d60d (diff)
downloadchrome-ec-b8c1ce6700ba9e467f4b6a42cac5ceb3aacdd1db.tar.gz
bn_div and faster modular inverse.
We previously used binary extended Euclid. That does not perform well when inverting a small public exponent. We also abused that routine to perform the division of n by one of its factors. Really did not perform well there either. This CL introduces a classic Knuth long division and a normal extended Euclid based on that. This drops the execution time of the common inversions into the single msec range (vs. multiple seconds before..) TEST=tcg_tests pass the usual 381/391; test/tpm_test/bn_test passes. BUG=chrome-os-partner:57422 BRANCH=none Change-Id: Ic9b4aecd0356fcab3e823dbd60c5b228a87447d3 Reviewed-on: https://chromium-review.googlesource.com/406940 Commit-Ready: Marius Schilder <mschilder@chromium.org> Tested-by: Marius Schilder <mschilder@chromium.org> Reviewed-by: Marius Schilder <mschilder@chromium.org> Reviewed-by: Bill Richardson <wfrichar@chromium.org>
Diffstat (limited to 'chip')
-rw-r--r--chip/g/dcrypto/bn.c435
-rw-r--r--chip/g/dcrypto/dcrypto.h3
-rw-r--r--chip/g/dcrypto/rsa.c11
3 files changed, 338 insertions, 111 deletions
diff --git a/chip/g/dcrypto/bn.c b/chip/g/dcrypto/bn.c
index c876b5d868..17d1bae72e 100644
--- a/chip/g/dcrypto/bn.c
+++ b/chip/g/dcrypto/bn.c
@@ -241,7 +241,7 @@ static void bn_rshift(struct LITE_BIGNUM *r, uint32_t carry, uint32_t neg)
/* Montgomery c[] += a * b[] / R % N. */
/* TODO(ngm): constant time. */
static void bn_mont_mul_add(struct LITE_BIGNUM *c, const uint32_t a,
- const struct LITE_BIGNUM *b, const uint32_t nprime,
+ const struct LITE_BIGNUM *b, const uint32_t nprime,
const struct LITE_BIGNUM *N)
{
uint32_t A, B, d0;
@@ -341,13 +341,9 @@ void bn_mont_modexp(struct LITE_BIGNUM *output, const struct LITE_BIGNUM *input,
struct LITE_BIGNUM aR;
#ifndef CR50_NO_BN_ASM
- if (bn_bits(N) == 2048 || bn_bits(N) == 1024) {
- /* TODO(ngm): add hardware support for standard key sizes. */
+ if ((bn_bits(N) & 255) == 0) {
+ /* Use hardware support for standard key sizes. */
bn_mont_modexp_asm(output, input, exp, N);
- /* Final reduce. */
- /* TODO(ngm): constant time. */
- if (bn_sub(output, N))
- bn_add(output, N);
return;
}
#endif
@@ -398,7 +394,7 @@ void bn_mont_modexp(struct LITE_BIGNUM *output, const struct LITE_BIGNUM *input,
/* c[] += a * b[] */
static uint32_t bn_mul_add(struct LITE_BIGNUM *c, uint32_t a,
- const struct LITE_BIGNUM *b, uint32_t offset)
+ const struct LITE_BIGNUM *b, uint32_t offset)
{
int i;
uint64_t carry = 0;
@@ -429,118 +425,345 @@ void DCRYPTO_bn_mul(struct LITE_BIGNUM *c, const struct LITE_BIGNUM *a,
BN_DIGIT(c, i + b->dmax - 1) = carry;
}
-#define bn_is_even(b) !bn_is_bit_set((b), 0)
-#define bn_is_odd(b) bn_is_bit_set((b), 0)
-
-static int bn_is_zero(const struct LITE_BIGNUM *a)
-{
- int i, result = 0;
-
- for (i = 0; i < a->dmax; ++i)
- result |= BN_DIGIT(a, i);
- return !result;
-}
-
-/* d = (e ^ -1) mod MOD */
-/* TODO(ngm): this method is used in place of division to calculate
- * q = N/p, i.e. q = p^-1 mod (N-1). The buffer e may be
- * resized to uint32_t once division is implemented. */
-int bn_modinv_vartime(struct LITE_BIGNUM *d, const struct LITE_BIGNUM *e,
- const struct LITE_BIGNUM *MOD)
-{
- /* Buffers for B, D, and U must be as large as e. */
- uint32_t A_buf[RSA_MAX_WORDS + 1];
- uint32_t B_buf[RSA_MAX_WORDS + 1];
- uint32_t C_buf[RSA_MAX_WORDS + 1];
- uint32_t D_buf[RSA_MAX_WORDS + 1];
- uint32_t U_buf[RSA_MAX_WORDS];
- uint32_t V_buf[RSA_MAX_WORDS];
- int a_neg = 0;
- int b_neg = 0;
- int c_neg = 0;
- int d_neg = 0;
- int carry1;
- int carry2;
- int i = 0;
+/* c[] = a[] * b[] */
+static void bn_mul_ex(struct LITE_BIGNUM *c,
+ const struct LITE_BIGNUM *a, int a_len,
+ const struct LITE_BIGNUM *b)
+{
+ int i;
+ uint32_t carry = 0;
- struct LITE_BIGNUM A;
- struct LITE_BIGNUM B;
- struct LITE_BIGNUM C;
- struct LITE_BIGNUM D;
- struct LITE_BIGNUM U;
- struct LITE_BIGNUM V;
+ memset(c->d, 0, bn_size(c));
+ for (i = 0; i < a_len; i++) {
+ BN_DIGIT(c, i + b->dmax - 1) = carry;
+ carry = bn_mul_add(c, BN_DIGIT(a, i), b, i);
+ }
- if (bn_size(e) > sizeof(U_buf))
+ BN_DIGIT(c, i + b->dmax - 1) = carry;
+}
+
+static int bn_div_word_ex(struct LITE_BIGNUM *q,
+ struct LITE_BIGNUM *r,
+ const struct LITE_BIGNUM *u, int m,
+ uint32_t div)
+{
+ uint32_t rem = 0;
+ int i;
+
+ for (i = m - 1; i >= 0; --i) {
+ uint64_t tmp = ((uint64_t)rem << 32) + BN_DIGIT(u, i);
+ uint32_t qd = tmp / div;
+
+ BN_DIGIT(q, i) = qd;
+ rem = tmp - (uint64_t)qd * div;
+ }
+
+ if (r != NULL)
+ BN_DIGIT(r, 0) = rem;
+
+ return 1;
+}
+
+/*
+ * Knuth's long division.
+ *
+ * Returns 0 on error.
+ * |u| >= |v|
+ * v[n-1] must not be 0
+ * r gets |v| digits written to.
+ * q gets |u| - |v| + 1 digits written to.
+ */
+static int bn_div_ex(struct LITE_BIGNUM *q,
+ struct LITE_BIGNUM *r,
+ const struct LITE_BIGNUM *u, int m,
+ const struct LITE_BIGNUM *v, int n)
+{
+ uint32_t vtop;
+ int s, i, j;
+ uint32_t vn[RSA_MAX_WORDS]; /* Normalized v */
+ uint32_t un[RSA_MAX_WORDS + 1]; /* Normalized u */
+
+ if (m < n || n <= 0)
return 0;
- if (bn_is_even(e) && bn_is_even(MOD))
+ vtop = BN_DIGIT(v, n - 1);
+
+ if (vtop == 0)
return 0;
- bn_init(&A, A_buf, bn_size(MOD) + sizeof(uint32_t));
- bn_init(&B, B_buf, bn_size(MOD) + sizeof(uint32_t));
- bn_init(&C, C_buf, bn_size(MOD) + sizeof(uint32_t));
- bn_init(&D, D_buf, bn_size(MOD) + sizeof(uint32_t));
- bn_init(&U, U_buf, bn_size(MOD));
- bn_init(&V, V_buf, bn_size(MOD));
-
- BN_DIGIT(&A, 0) = 1;
- BN_DIGIT(&D, 0) = 1;
- memcpy(U_buf, e->d, bn_size(e));
- memcpy(V_buf, MOD->d, bn_size(MOD));
-
- /* Binary extended GCD, as per Handbook of Applied
- * Cryptography, 14.61. */
- for (i = 0;; i++) {
- carry1 = 0;
- carry2 = 0;
- if (bn_is_even(&U)) {
- bn_rshift(&U, 0, 0);
- if (bn_is_odd(&A) || bn_is_odd(&B)) {
- carry1 = bn_signed_add(&A, &a_neg, MOD, 0);
- carry2 = bn_signed_sub(&B, &b_neg, e, 0);
+ if (n == 1)
+ return bn_div_word_ex(q, r, u, m, vtop);
+
+ /* Compute shift factor to make v have high bit set */
+ s = 0;
+ while ((vtop & 0x80000000) == 0) {
+ s = s + 1;
+ vtop = vtop << 1;
+ }
+
+ /* Normalize u and v into un and vn.
+ * Note un always gains a leading digit
+ */
+ if (s != 0) {
+ for (i = n - 1; i > 0; i--)
+ vn[i] = (BN_DIGIT(v, i) << s) |
+ (BN_DIGIT(v, i - 1) >> (32 - s));
+ vn[0] = BN_DIGIT(v, 0) << s;
+
+ un[m] = BN_DIGIT(u, m - 1) >> (32 - s);
+ for (i = m - 1; i > 0; i--)
+ un[i] = (BN_DIGIT(u, i) << s) |
+ (BN_DIGIT(u, i - 1) >> (32 - s));
+ un[0] = BN_DIGIT(u, 0) << s;
+ } else {
+ for (i = 0; i < n; ++i)
+ vn[i] = BN_DIGIT(v, i);
+ for (i = 0; i < m; ++i)
+ un[i] = BN_DIGIT(u, i);
+ un[m] = 0;
+ }
+
+ /* Main loop, reducing un digit by digit */
+ for (j = m - n; j >= 0; j--) {
+ uint32_t qd;
+ int64_t t, k;
+
+ /* Estimate quotient digit */
+ if (un[j + n] == vn[n - 1]) {
+ /* Maxed out */
+ qd = 0xFFFFFFFF;
+ } else {
+ /* Fine tune estimate */
+ uint64_t rhat = ((uint64_t)un[j + n] << 32) +
+ un[j + n - 1];
+
+ qd = rhat / vn[n - 1];
+ rhat = rhat - (uint64_t)qd * vn[n - 1];
+ while ((rhat >> 32) == 0 &&
+ (uint64_t)qd * vn[n - 2] >
+ (rhat << 32) + un[j + n - 2]) {
+ qd = qd - 1;
+ rhat = rhat + vn[n - 1];
}
- bn_rshift(&A, carry1, a_neg);
- bn_rshift(&B, carry2, b_neg);
- } else if (bn_is_even(&V)) {
- bn_rshift(&V, 0, 0);
- if (bn_is_odd(&C) || bn_is_odd(&D)) {
- carry1 = bn_signed_add(&C, &c_neg, MOD, 0);
- carry2 = bn_signed_sub(&D, &d_neg, e, 0);
+ }
+
+ /* Multiply and subtract */
+ k = 0;
+ for (i = 0; i < n; i++) {
+ uint64_t p = (uint64_t)qd * vn[i];
+
+ t = un[i + j] - k - (p & 0xFFFFFFFF);
+ un[i + j] = t;
+ k = (p >> 32) - (t >> 32);
+ }
+ t = un[j + n] - k;
+ un[j + n] = t;
+
+ /* If borrowed, add one back and adjust estimate */
+ if (t < 0) {
+ qd = qd - 1;
+ for (i = 0; i < n; i++) {
+ t = (uint64_t)un[i + j] + vn[i] + k;
+ un[i + j] = t;
+ k = t >> 32;
}
- bn_rshift(&C, carry1, c_neg);
- bn_rshift(&D, carry2, d_neg);
- } else { /* U, V both odd. */
- if (bn_gte(&U, &V)) {
- assert(!bn_sub(&U, &V));
- bn_signed_sub(&A, &a_neg, &C, c_neg);
- bn_signed_sub(&B, &b_neg, &D, d_neg);
- if (bn_is_zero(&U))
- break; /* done. */
+ un[j + n] = un[j + n] + k;
+ }
+
+ BN_DIGIT(q, j) = qd;
+ }
+
+ if (r != NULL) {
+ /* Denormalize un into r */
+ if (s != 0) {
+ for (i = 0; i < n - 1; i++)
+ BN_DIGIT(r, i) = (un[i] >> s) |
+ (un[i + 1] << (32 - s));
+ BN_DIGIT(r, n - 1) = un[n - 1] >> s;
+ } else {
+ for (i = 0; i < n; i++)
+ BN_DIGIT(r, i) = un[i];
+ }
+ }
+
+ return 1;
+}
+
+static void bn_set_bn(struct LITE_BIGNUM *d, const struct LITE_BIGNUM *src,
+ size_t n)
+{
+ size_t i = 0;
+
+ for (; i < n && i < d->dmax; ++i)
+ BN_DIGIT(d, i) = BN_DIGIT(src, i);
+ for (; i < d->dmax; ++i)
+ BN_DIGIT(d, i) = 0;
+}
+
+static size_t bn_digits(const struct LITE_BIGNUM *a)
+{
+ size_t n = a->dmax - 1;
+
+ while (BN_DIGIT(a, n) == 0 && n)
+ --n;
+ return n + 1;
+}
+
+int DCRYPTO_bn_div(struct LITE_BIGNUM *quotient,
+ struct LITE_BIGNUM *remainder,
+ const struct LITE_BIGNUM *src,
+ const struct LITE_BIGNUM *divisor)
+{
+ int src_len = bn_digits(src);
+ int div_len = bn_digits(divisor);
+ int i, result;
+
+ if (src_len < div_len)
+ return 0;
+
+ result = bn_div_ex(quotient, remainder,
+ src, src_len,
+ divisor, div_len);
+
+ if (!result)
+ return 0;
+
+ /* 0-pad the destinations. */
+ for (i = src_len - div_len + 1; i < quotient->dmax; ++i)
+ BN_DIGIT(quotient, i) = 0;
+ if (remainder) {
+ for (i = div_len; i < remainder->dmax; ++i)
+ BN_DIGIT(remainder, i) = 0;
+ }
+
+ return result;
+}
+
+/*
+ * Extended Euclid modular inverse.
+ *
+ * https://en.wikipedia.org/wiki/Extended_Euclidean_algorithm
+ * #Computing_multiplicative_inverses_in_modular_structures:
+
+ * function inverse(a, n)
+ * t := 0; newt := 1;
+ * r := n; newr := a;
+ * while newr ≠ 0
+ * quotient := r div newr
+ * (t, newt) := (newt, t - quotient * newt)
+ * (r, newr) := (newr, r - quotient * newr)
+ * if r > 1 then return "a is not invertible"
+ * if t < 0 then t := t + n
+ * return t
+ */
+int bn_modinv_vartime(struct LITE_BIGNUM *dst, const struct LITE_BIGNUM *src,
+ const struct LITE_BIGNUM *mod)
+{
+ uint32_t R_buf[RSA_MAX_WORDS];
+ uint32_t nR_buf[RSA_MAX_WORDS];
+ uint32_t Q_buf[RSA_MAX_WORDS];
+
+ uint32_t nT_buf[RSA_MAX_WORDS + 1]; /* Can go negative, hence +1 */
+ uint32_t T_buf[RSA_MAX_WORDS + 1]; /* Can go negative */
+ uint32_t tmp_buf[2 * RSA_MAX_WORDS + 1]; /* needs to hold Q*nT */
+
+ struct LITE_BIGNUM R;
+ struct LITE_BIGNUM nR;
+ struct LITE_BIGNUM Q;
+ struct LITE_BIGNUM T;
+ struct LITE_BIGNUM nT;
+ struct LITE_BIGNUM tmp;
+
+ struct LITE_BIGNUM *pT = &T;
+ struct LITE_BIGNUM *pnT = &nT;
+ struct LITE_BIGNUM *pR = &R;
+ struct LITE_BIGNUM *pnR = &nR;
+ struct LITE_BIGNUM *bnswap;
+
+ int t_neg = 0;
+ int nt_neg = 0;
+ int iswap;
+
+ size_t r_len, nr_len;
+
+ bn_init(&R, R_buf, bn_size(mod));
+ bn_init(&nR, nR_buf, bn_size(mod));
+ bn_init(&Q, Q_buf, bn_size(mod));
+ bn_init(&T, T_buf, bn_size(mod) + sizeof(uint32_t));
+ bn_init(&nT, nT_buf, bn_size(mod) + sizeof(uint32_t));
+ bn_init(&tmp, tmp_buf, bn_size(mod) + sizeof(uint32_t));
+
+ r_len = bn_digits(mod);
+ nr_len = bn_digits(src);
+
+ BN_DIGIT(&nT, 0) = 1; /* T = 0, nT = 1 */
+ bn_set_bn(&R, mod, r_len); /* R = n */
+ bn_set_bn(&nR, src, nr_len); /* nR = input */
+
+ /* Trim nR */
+ while (nr_len && BN_DIGIT(&nR, nr_len - 1) == 0)
+ --nr_len;
+
+ while (nr_len) {
+ size_t q_len = r_len - nr_len + 1;
+
+ /* (r, nr) = (nr, r % nr), q = r / nr */
+ if (!bn_div_ex(&Q, pR, pR, r_len, pnR, nr_len))
+ return 0;
+
+ /* swap R and nR */
+ r_len = nr_len;
+ bnswap = pR; pR = pnR; pnR = bnswap;
+
+ /* trim nR and Q */
+ while (nr_len && BN_DIGIT(pnR, nr_len - 1) == 0)
+ --nr_len;
+ while (q_len && BN_DIGIT(&Q, q_len - 1) == 0)
+ --q_len;
+
+ Q.dmax = q_len;
+
+ /* compute t - q*nt */
+ if (q_len == 1 && BN_DIGIT(&Q, 0) <= 2) {
+ /* Doing few direct subs is faster than mul + sub */
+ uint32_t n = BN_DIGIT(&Q, 0);
+
+ while (n--)
+ bn_signed_sub(pT, &t_neg, pnT, nt_neg);
+ } else {
+ /* Call bn_mul_ex with smallest operand first */
+ if (nt_neg) {
+ /* Negative numbers use all digits,
+ * thus pnT is large
+ */
+ bn_mul_ex(&tmp, &Q, q_len, pnT);
} else {
- assert(!bn_sub(&V, &U));
- bn_signed_sub(&C, &c_neg, &A, a_neg);
- bn_signed_sub(&D, &d_neg, &B, b_neg);
+ int nt_len = bn_digits(pnT);
+
+ if (q_len < nt_len)
+ bn_mul_ex(&tmp, &Q, q_len, pnT);
+ else
+ bn_mul_ex(&tmp, pnT, nt_len, &Q);
}
+ bn_signed_sub(pT, &t_neg, &tmp, nt_neg);
}
- if ((i + 1) % 1000 == 0)
- /* TODO(ngm): Poke the watchdog (only
- * necessary for q = N/p). Remove once
- * division is implemented. */
- watchdog_reload();
- }
- BN_DIGIT(&V, 0) ^= 0x01;
- if (bn_is_zero(&V)) {
- while (c_neg)
- bn_signed_add(&C, &c_neg, MOD, 0);
- while (bn_gte(&C, MOD))
- bn_sub(&C, MOD);
+ /* swap T and nT */
+ bnswap = pT; pT = pnT; pnT = bnswap;
+ iswap = t_neg; t_neg = nt_neg; nt_neg = iswap;
+ }
- memcpy(d->d, C.d, bn_size(d));
- return 1;
- } else {
- return 0; /* Inverse not found. */
+ if (r_len != 1 || BN_DIGIT(pR, 0) != 1) {
+ /* gcd not 1; no direct inverse */
+ return 0;
}
+
+ if (t_neg)
+ bn_signed_add(pT, &t_neg, mod, 0);
+
+ bn_set_bn(dst, pT, bn_digits(pT));
+
+ return 1;
}
#define NUM_PRIMES 4095
diff --git a/chip/g/dcrypto/dcrypto.h b/chip/g/dcrypto/dcrypto.h
index aaa4b9d0cf..4f4c2df5ff 100644
--- a/chip/g/dcrypto/dcrypto.h
+++ b/chip/g/dcrypto/dcrypto.h
@@ -169,6 +169,9 @@ int DCRYPTO_bn_generate_prime(struct LITE_BIGNUM *p);
void DCRYPTO_bn_wrap(struct LITE_BIGNUM *b, void *buf, size_t len);
void DCRYPTO_bn_mul(struct LITE_BIGNUM *c, const struct LITE_BIGNUM *a,
const struct LITE_BIGNUM *b);
+int DCRYPTO_bn_div(struct LITE_BIGNUM *quotient, struct LITE_BIGNUM *remainder,
+ const struct LITE_BIGNUM *input,
+ const struct LITE_BIGNUM *divisor);
/*
* X509.
diff --git a/chip/g/dcrypto/rsa.c b/chip/g/dcrypto/rsa.c
index 359565d118..ace9961169 100644
--- a/chip/g/dcrypto/rsa.c
+++ b/chip/g/dcrypto/rsa.c
@@ -635,7 +635,7 @@ int DCRYPTO_rsa_key_compute(struct LITE_BIGNUM *N, struct LITE_BIGNUM *d,
{
uint32_t ONE_buf = 1;
uint32_t phi_buf[RSA_MAX_WORDS];
- uint32_t q_buf[RSA_MAX_WORDS / 2];
+ uint32_t q_buf[RSA_MAX_WORDS / 2 + 1];
struct LITE_BIGNUM ONE;
struct LITE_BIGNUM e;
@@ -648,14 +648,15 @@ int DCRYPTO_rsa_key_compute(struct LITE_BIGNUM *N, struct LITE_BIGNUM *d,
/* q not provided, calculate it. */
memcpy(phi_buf, N->d, bn_size(N));
bn_init(&q_local, q_buf, bn_size(p));
- bn_sub(&phi, &ONE);
- if (!bn_modinv_vartime(&q_local, p, &phi))
+ q = &q_local;
+
+ if (!DCRYPTO_bn_div(q, NULL, &phi, p))
return 0;
+
/* Check that p * q == N */
- DCRYPTO_bn_mul(&phi, p, &q_local);
+ DCRYPTO_bn_mul(&phi, p, q);
if (!bn_eq(N, &phi))
return 0;
- q = &q_local;
} else {
DCRYPTO_bn_mul(N, p, q);
memcpy(phi_buf, N->d, bn_size(N));