diff options
-rw-r--r-- | common/rsa.c | 6 | ||||
-rw-r--r-- | core/cortex-m0/mula.S | 37 | ||||
-rw-r--r-- | include/util.h | 16 | ||||
-rw-r--r-- | test/utils.c | 10 |
4 files changed, 60 insertions, 9 deletions
diff --git a/common/rsa.c b/common/rsa.c index 3d8518e9ef..8b166e9dfb 100644 --- a/common/rsa.c +++ b/common/rsa.c @@ -56,8 +56,8 @@ static void mont_mul_add(const struct rsa_public_key *key, uint32_t i; for (i = 1; i < RSANUMWORDS; ++i) { - A = (A >> 32) + mula32(a, b[i], c[i]); - B = (B >> 32) + mula32(d0, key->n[i], A); + A = mulaa32(a, b[i], c[i], A >> 32); + B = mulaa32(d0, key->n[i], A, B >> 32); c[i - 1] = (uint32_t)B; } @@ -82,7 +82,7 @@ static void mont_mul_add_0(const struct rsa_public_key *key, uint32_t i; for (i = 1; i < RSANUMWORDS; ++i) { - B = (B >> 32) + mula32(d0, key->n[i], c[i]); + B = mulaa32(d0, key->n[i], c[i], B >> 32); c[i - 1] = (uint32_t)B; } diff --git a/core/cortex-m0/mula.S b/core/cortex-m0/mula.S index fc0a6f3ee0..02e617c328 100644 --- a/core/cortex-m0/mula.S +++ b/core/cortex-m0/mula.S @@ -2,7 +2,7 @@ * Use of this source code is governed by a BSD-style license that can be * found in the LICENSE file. * - * Cortex-M0 multiply-accumulate functions + * Cortex-M0 multiply-accumulate[-accumulate] functions */ .syntax unified @@ -44,3 +44,38 @@ mula32: pop {r4, r5} bx lr +@ uint64_t mulaa32(uint32_t a, uint32_t b, uint32_t c, uint32_t d) +@ +@ Multiply a (r0) and b (r1), add c (r2), add d (r3) and return the product in +@ r1:r0 + .thumb_func + .section .text.mulaa32 + .global mulaa32 +mulaa32: + + push {r4, r5, r6} + uxth r5, r0 /* r5 = a.lo16 */ + uxth r6, r1 /* r6 = b.lo16 */ + uxth r4, r2 /* r4 = c.lo16 */ + muls r5, r6 /* r5 = a.lo16 * b.lo16 */ + adds r5, r4 /* r5 = a.lo16 * b.lo16 + c.lo16 == r.lo32 */ + lsrs r4, r0, 16 /* r4 = a.hi16 */ + lsrs r2, r2, 16 /* r2 = c.hi16 */ + muls r6, r4 /* r6 = a.hi16 * b.lo16 */ + adds r6, r2 /* r6 = a.hi16 * b.lo16 + c.hi16 == r.mid32.1 */ + uxth r2, r0 /* r2 = a.lo16 */ + lsrs r1, r1, 16 /* r1 = b.hi16 */ + muls r2, r1 /* r2 = a.lo16 * b.hi16 == r.mid32.2 */ + muls r1, r4 /* r1 = b.hi16 * a.hi16 == r.hi32 */ + movs r4, 0 /* r4 = 0 */ + adds r6, r2 /* r6 = (r.mid32.1 + r.mid32.2).lo32 == r.mid.lo32 */ + adcs r4, r4 /* r4 = (r.mid32.1 + r.mid32.2).hi32 == r.mid.hi32 */ + lsls r0, r6, 16 /* r0 = r.mid.lo32.lo16 << 16 == r.mid.inpos.lo32 */ + lsrs r6, r6, 16 /* r6 = r.mid.lo32.hi16 >> 16 */ + lsls r4, r4, 16 /* r4 = r.mid.hi.lo16 << 16 */ + adds r0, r3 /* r0 = r.mid.inposition.lo32 + d */ + adcs r4, r6 /* r4 = r6 + r4 + carry = r.mid.inpos.hi32 */ + adds r0, r5 /* r0 = r.lo32 */ + adcs r1, r4 /* r1 = r.hi32 */ + pop {r4, r5, r6} + bx lr diff --git a/include/util.h b/include/util.h index 2670d4e5ad..c8ae09db93 100644 --- a/include/util.h +++ b/include/util.h @@ -184,10 +184,11 @@ int parse_offset_size(int argc, char **argv, int shift, #ifdef CONFIG_ASSEMBLY_MULA32 /* - * Compute (a*b)+c, where a, b, c are 32-bit integers, and the result is - * 64-bit long. + * Compute (a*b)+c[+d], where a, b, c[, d] are 32-bit integers, and the result + * is 64-bit long. */ uint64_t mula32(uint32_t a, uint32_t b, uint32_t c); +uint64_t mulaa32(uint32_t a, uint32_t b, uint32_t c, uint32_t d); #else static inline uint64_t mula32(uint32_t a, uint32_t b, uint32_t c) { @@ -198,6 +199,17 @@ static inline uint64_t mula32(uint32_t a, uint32_t b, uint32_t c) return ret; } + +static inline uint64_t mulaa32(uint32_t a, uint32_t b, uint32_t c, uint32_t d) +{ + uint64_t ret = a; + + ret *= b; + ret += c; + ret += d; + + return ret; +} #endif #endif /* __CROS_EC_UTIL_H */ diff --git a/test/utils.c b/test/utils.c index 7acf238c14..08a3511d77 100644 --- a/test/utils.c +++ b/test/utils.c @@ -360,6 +360,7 @@ static int test_cond_t(void) static int test_mula32(void) { uint64_t r = 0x0; + uint64_t r2 = 0x0; uint32_t b = 0x1; uint32_t c = 0x1; uint32_t i; @@ -368,15 +369,18 @@ static int test_mula32(void) t0 = get_time(); for (i = 0; i < 5000000; i++) { r = mula32(b, c, r + (r >> 32)); + r2 = mulaa32(b, c, r2 >> 32, r2); b = (b << 13) ^ (b >> 2) ^ i; c = (c << 16) ^ (c >> 7) ^ i; watchdog_reload(); } t1 = get_time(); - ccprintf("After %d iterations, r=%08x%08x (time: %d)\n", i, - (uint32_t)(r >> 32), (uint32_t)r, t1.le.lo-t0.le.lo); - TEST_ASSERT(r == 0x9df59b9fb0ab9d96L); + ccprintf("After %d iterations, r=%08x%08x, r2=%08x%08x (time: %d)\n", + i, (uint32_t)(r >> 32), (uint32_t)r, + (uint32_t)(r2 >> 32), (uint32_t)r2, t1.le.lo-t0.le.lo); + TEST_ASSERT(r == 0x9df59b9fb0ab9d96L); + TEST_ASSERT(r2 == 0x9df59b9fb0beabd6L); /* well okay then */ return EC_SUCCESS; |