diff options
author | Nicolas Boichat <drinkcat@google.com> | 2017-06-22 10:34:39 +0800 |
---|---|---|
committer | chrome-bot <chrome-bot@chromium.org> | 2017-07-03 21:44:34 -0700 |
commit | edc668ea6cca1d4f3e53c211406d6a8bf5fe6364 (patch) | |
tree | 6ee9dcc2fd49d6d34da4c108093ee322212823ce /core | |
parent | 136a80e1138633c2f2ac249c15078b587af9c7ec (diff) | |
download | chrome-ec-edc668ea6cca1d4f3e53c211406d6a8bf5fe6364.tar.gz |
core/cortex-m0/curve25519: Integrate fast curve25519 implementation
- Move generic implementation to curve25519-generic.o
- Always use optimized version on cortex-m0.
- Rename .s files to .S, remove unnecessary lines in assembly files.
- Rename crypto_scalarmult_curve25519 to x25519_scalar_mult to match
the signature provided by the generic implementation.
- Replace some handcoded memcpy with function calls
- Remove unnecessary "volatile" specifications in the code.
BRANCH=none
BUG=b:62813194
TEST=To test old implementation only:
- Increase CONFIG_RO_SIZE to 60kb
- Increase console stack size to 2048
make BOARD=hammer PROJECT=x25519 TEST_BUILD=y
./util/flash_ec --board=hammer --image=build/hammer/x25519.bin
EC console: runtest, taskinfo
=> Used to takes ~4'17" to run (X25519 duration 256347 us).
1496/2048 stack size usage in CONSOLE task
=> Now takes ~1'25" to run (X25519 duration 84520 us)
732/2048 stack size usage in CONSOLE task
TEST=In test/x25519.c, uncomment #define TEST_X25519_1M_ITERATIONS
make BOARD=hammer PROJECT=x25519 TEST_BUILD=y
./util/flash_ec --board=hammer --image=build/hammer/x25519.bin
EC console: runtest, wait ~23 hours, test passes.
TEST=- Define CONFIG_CURVE25519_CORTEXM0 (next patch)
makes newsizes
build/hammer/RW/ec.RW.flat shrank by 1888 bytes: (52208 to 50320)
Change-Id: Icce38d3c32f431a85ac0f951cf34456b490dc665
Reviewed-on: https://chromium-review.googlesource.com/540962
Commit-Ready: Nicolas Boichat <drinkcat@chromium.org>
Tested-by: Nicolas Boichat <drinkcat@chromium.org>
Reviewed-by: Nicolas Boichat <drinkcat@chromium.org>
Diffstat (limited to 'core')
-rw-r--r-- | core/cortex-m0/build.mk | 9 | ||||
-rw-r--r-- | core/cortex-m0/curve25519/mpy121666.S (renamed from core/cortex-m0/curve25519/cortex_m0_mpy121666.s) | 20 | ||||
-rw-r--r-- | core/cortex-m0/curve25519/mul.S (renamed from core/cortex-m0/curve25519/mul.s) | 0 | ||||
-rw-r--r-- | core/cortex-m0/curve25519/reduce25519.S (renamed from core/cortex-m0/curve25519/cortex_m0_reduce25519.s) | 13 | ||||
-rw-r--r-- | core/cortex-m0/curve25519/scalarmult.c | 59 | ||||
-rw-r--r-- | core/cortex-m0/curve25519/sqr.S (renamed from core/cortex-m0/curve25519/sqr.s) | 132 |
6 files changed, 25 insertions, 208 deletions
diff --git a/core/cortex-m0/build.mk b/core/cortex-m0/build.mk index 5f2bc32eb5..f8f33aa2a3 100644 --- a/core/cortex-m0/build.mk +++ b/core/cortex-m0/build.mk @@ -21,4 +21,13 @@ endif core-y=cpu.o init.o thumb_case.o div.o lmul.o ldivmod.o uldivmod.o core-$(CONFIG_COMMON_PANIC_OUTPUT)+=panic.o core-$(CONFIG_COMMON_RUNTIME)+=switch.o task.o + +dirs-y += core/$(CORE)/curve25519 + +core-$(CONFIG_CURVE25519)+=curve25519/mpy121666.o +core-$(CONFIG_CURVE25519)+=curve25519/reduce25519.o +core-$(CONFIG_CURVE25519)+=curve25519/mul.o +core-$(CONFIG_CURVE25519)+=curve25519/scalarmult.o +core-$(CONFIG_CURVE25519)+=curve25519/sqr.o + core-$(CONFIG_WATCHDOG)+=watchdog.o diff --git a/core/cortex-m0/curve25519/cortex_m0_mpy121666.s b/core/cortex-m0/curve25519/mpy121666.S index 8e74dd0265..d2a467459b 100644 --- a/core/cortex-m0/curve25519/cortex_m0_mpy121666.s +++ b/core/cortex-m0/curve25519/mpy121666.S @@ -11,21 +11,7 @@ // Not yet tested on target hardware. - .cpu cortex-m0 - .fpu softvfp - .eabi_attribute 20, 1 - .eabi_attribute 21, 1 - .eabi_attribute 23, 3 - .eabi_attribute 24, 1 - .eabi_attribute 25, 1 - .eabi_attribute 26, 1 - .eabi_attribute 30, 2 - .eabi_attribute 34, 0 - .eabi_attribute 18, 4 .code 16 - - .file "cortex_m0_reduce25519.s" - .text .align 2 @@ -36,7 +22,7 @@ fe25519_mpyWith121666_asm: push {r4,r5,r6,r7,r14} - ldr r7,__label_for_immediate_56130 + ldr r7,=56130 ldr r2,[r1,#28] lsl r5,r2,#16 lsr r6,r2,#16 @@ -191,9 +177,5 @@ fe25519_mpyWith121666_asm: str r6,[r0,#28] pop {r4,r5,r6,r7,r15} - .align 2 -__label_for_immediate_56130: - .word 56130 - .size fe25519_mpyWith121666_asm, .-fe25519_mpyWith121666_asm diff --git a/core/cortex-m0/curve25519/mul.s b/core/cortex-m0/curve25519/mul.S index 366713a7a3..366713a7a3 100644 --- a/core/cortex-m0/curve25519/mul.s +++ b/core/cortex-m0/curve25519/mul.S diff --git a/core/cortex-m0/curve25519/cortex_m0_reduce25519.s b/core/cortex-m0/curve25519/reduce25519.S index 8984752a38..9a3c29a0f6 100644 --- a/core/cortex-m0/curve25519/cortex_m0_reduce25519.s +++ b/core/cortex-m0/curve25519/reduce25519.S @@ -8,21 +8,8 @@ // Generated and tested with C++ functions in the test subdirectory and on the target. // - .cpu cortex-m0 - .fpu softvfp - .eabi_attribute 20, 1 - .eabi_attribute 21, 1 - .eabi_attribute 23, 3 - .eabi_attribute 24, 1 - .eabi_attribute 25, 1 - .eabi_attribute 26, 1 - .eabi_attribute 30, 2 - .eabi_attribute 34, 0 - .eabi_attribute 18, 4 .code 16 - .file "cortex_m0_reduce25519.s" - .text .align 2 diff --git a/core/cortex-m0/curve25519/scalarmult.c b/core/cortex-m0/curve25519/scalarmult.c index d4e8c06c3e..f7370d1f43 100644 --- a/core/cortex-m0/curve25519/scalarmult.c +++ b/core/cortex-m0/curve25519/scalarmult.c @@ -57,12 +57,14 @@ Creative Commons CC0 1.0 Universal public domain dedication ============================================================================*/ -#include <inttypes.h> +#include "curve25519.h" +#include "util.h" // comment out this line if implementing conditional swaps by data moves //#define DH_SWAP_BY_POINTERS // Define the symbol to 0 in order to only use ladder steps +#define DH_REPLACE_LAST_THREE_LADDERSTEPS_WITH_DOUBLINGS 0 //#define DH_REPLACE_LAST_THREE_LADDERSTEPS_WITH_DOUBLINGS 1 typedef uint8_t uint8; @@ -142,26 +144,17 @@ fe25519_cpy( const fe25519* source ) { - uint32 ctr; - - for (ctr = 0; ctr < 8; ctr++) - { - dest->as_uint32[ctr] = source->as_uint32[ctr]; - } + memcpy(dest, source, 32); } static void fe25519_unpack( - volatile fe25519* out, + fe25519* out, const unsigned char in[32] ) { - uint8 ctr; + memcpy(out, in, 32); - for (ctr = 0; ctr < 32; ctr++) - { - out->as_uint8[ctr] = in[ctr]; - } out->as_uint8[31] &= 0x7f; // make sure that the last bit is cleared. } @@ -257,7 +250,7 @@ fe25519_square( static void fe25519_reduceCompletely( - volatile fe25519* inout + fe25519* inout ) { uint32 numberOfTimesToSubstractPrime; @@ -307,17 +300,12 @@ fe25519_reduceCompletely( static void fe25519_pack( unsigned char out[32], - volatile fe25519* in + fe25519* in ) { - uint8 ctr; - fe25519_reduceCompletely(in); - for (ctr = 0; ctr < 32; ctr++) - { - out[ctr] = in->as_uint8[ctr]; - } + memcpy(out, in, 32); } // Note, that r and x are allowed to overlap! @@ -511,7 +499,7 @@ typedef struct _ST_curve25519ladderstepWorkingState fe25519 xq; fe25519 zq; - volatile UN_256bitValue s; + UN_256bitValue s; int nextScalarBitToProcess; uint8 previousProcessedBit; @@ -649,11 +637,11 @@ curve25519_doublePointP (ST_curve25519ladderstepWorkingState* pState) #endif // #ifdef DH_REPLACE_LAST_THREE_LADDERSTEPS_WITH_DOUBLINGS -int -crypto_scalarmult_curve25519( - unsigned char* r, - const unsigned char* s, - const unsigned char* p +void +x25519_scalar_mult( + uint8_t r[32], + const uint8_t s[32], + const uint8_t p[32] ) { ST_curve25519ladderstepWorkingState state; @@ -741,21 +729,4 @@ crypto_scalarmult_curve25519( fe25519_pack (r, &state.xp); #endif - - return 0; -} - -int -crypto_scalarmult_curve25519_base( - unsigned char* q, - const unsigned char* n -) -{ - static const uint8 base[32] = - { - 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 - }; - - return crypto_scalarmult_curve25519(q, n, base); } diff --git a/core/cortex-m0/curve25519/sqr.s b/core/cortex-m0/curve25519/sqr.S index 9666a1643c..b62121adb7 100644 --- a/core/cortex-m0/curve25519/sqr.s +++ b/core/cortex-m0/curve25519/sqr.S @@ -17,17 +17,9 @@ square256_asm: push {r1,r4,r5,r6,r7,r14} .syntax unified mov r3,r8 - .syntax divided - .syntax unified mov r4,r9 - .syntax divided - .syntax unified mov r5,r10 - .syntax divided - .syntax unified mov r6,r11 - .syntax divided - .syntax unified mov r7,r12 .syntax divided push {r3,r4,r5,r6,r7} @@ -41,8 +33,6 @@ square256_asm: // clobbers all registers except for r14 .syntax unified mov r0,r4 - .syntax divided - .syntax unified mov r1,r5 .syntax divided sub r0,r6 @@ -54,11 +44,7 @@ square256_asm: sbc r1,r2 .syntax unified mov r8,r0 - .syntax divided - .syntax unified mov r9,r1 - .syntax divided - .syntax unified mov r10,r6 .syntax divided // START: sqr 64 Refined Karatsuba @@ -144,17 +130,9 @@ square256_asm: // Leaves r6 zero. .syntax unified mov r6,r10 - .syntax divided - .syntax unified mov r10,r0 - .syntax divided - .syntax unified mov r11,r1 - .syntax divided - .syntax unified mov r12,r2 - .syntax divided - .syntax unified mov r1,r3 .syntax divided // START: sqr 64 Refined Karatsuba @@ -247,17 +225,9 @@ square256_asm: adc r5,r6 .syntax unified mov r12,r2 - .syntax divided - .syntax unified mov r2,r8 - .syntax divided - .syntax unified mov r8,r3 - .syntax divided - .syntax unified mov r3,r9 - .syntax divided - .syntax unified mov r9,r4 .syntax divided // START: sqr 64 Refined Karatsuba @@ -343,19 +313,13 @@ square256_asm: // Returns r4 as zero. .syntax unified mov r2,r12 - .syntax divided - .syntax unified mov r3,r8 - .syntax divided - .syntax unified mov r4,r9 .syntax divided sub r2,r6 sbc r3,r7 .syntax unified mov r6,r4 - .syntax divided - .syntax unified mov r7,r5 .syntax divided sbc r4,r0 @@ -401,8 +365,6 @@ square256_asm: // clobbers all registers except for r14 .syntax unified mov r0,r4 - .syntax divided - .syntax unified mov r1,r5 .syntax divided sub r0,r6 @@ -414,11 +376,7 @@ square256_asm: sbc r1,r2 .syntax unified mov r8,r0 - .syntax divided - .syntax unified mov r9,r1 - .syntax divided - .syntax unified mov r10,r6 .syntax divided // START: sqr 64 Refined Karatsuba @@ -504,17 +462,9 @@ square256_asm: // Leaves r6 zero. .syntax unified mov r6,r10 - .syntax divided - .syntax unified mov r10,r0 - .syntax divided - .syntax unified mov r11,r1 - .syntax divided - .syntax unified mov r12,r2 - .syntax divided - .syntax unified mov r1,r3 .syntax divided // START: sqr 64 Refined Karatsuba @@ -607,17 +557,9 @@ square256_asm: adc r5,r6 .syntax unified mov r12,r2 - .syntax divided - .syntax unified mov r2,r8 - .syntax divided - .syntax unified mov r8,r3 - .syntax divided - .syntax unified mov r3,r9 - .syntax divided - .syntax unified mov r9,r4 .syntax divided // START: sqr 64 Refined Karatsuba @@ -703,19 +645,13 @@ square256_asm: // Returns r4 as zero. .syntax unified mov r2,r12 - .syntax divided - .syntax unified mov r3,r8 - .syntax divided - .syntax unified mov r4,r9 .syntax divided sub r2,r6 sbc r3,r7 .syntax unified mov r6,r4 - .syntax divided - .syntax unified mov r7,r5 .syntax divided sbc r4,r0 @@ -749,14 +685,8 @@ square256_asm: // Result in r0 ... r7 .syntax unified mov r8,r4 - .syntax divided - .syntax unified mov r9,r5 - .syntax divided - .syntax unified mov r10,r6 - .syntax divided - .syntax unified mov r11,r7 .syntax divided pop {r4,r5,r6,r7} @@ -766,17 +696,9 @@ square256_asm: adc r3,r7 .syntax unified mov r4,r8 - .syntax divided - .syntax unified mov r5,r9 - .syntax divided - .syntax unified mov r6,r10 - .syntax divided - .syntax unified mov r7,r11 - .syntax divided - .syntax unified mov r8,r0 .syntax divided mov r0,#0 @@ -809,8 +731,6 @@ square256_asm: // clobbers all registers except for r14 .syntax unified mov r0,r4 - .syntax divided - .syntax unified mov r1,r5 .syntax divided sub r0,r6 @@ -822,11 +742,7 @@ square256_asm: sbc r1,r2 .syntax unified mov r8,r0 - .syntax divided - .syntax unified mov r9,r1 - .syntax divided - .syntax unified mov r10,r6 .syntax divided // START: sqr 64 Refined Karatsuba @@ -912,17 +828,9 @@ square256_asm: // Leaves r6 zero. .syntax unified mov r6,r10 - .syntax divided - .syntax unified mov r10,r0 - .syntax divided - .syntax unified mov r11,r1 - .syntax divided - .syntax unified mov r12,r2 - .syntax divided - .syntax unified mov r1,r3 .syntax divided // START: sqr 64 Refined Karatsuba @@ -1015,17 +923,9 @@ square256_asm: adc r5,r6 .syntax unified mov r12,r2 - .syntax divided - .syntax unified mov r2,r8 - .syntax divided - .syntax unified mov r8,r3 - .syntax divided - .syntax unified mov r3,r9 - .syntax divided - .syntax unified mov r9,r4 .syntax divided // START: sqr 64 Refined Karatsuba @@ -1111,19 +1011,13 @@ square256_asm: // Returns r4 as zero. .syntax unified mov r2,r12 - .syntax divided - .syntax unified mov r3,r8 - .syntax divided - .syntax unified mov r4,r9 .syntax divided sub r2,r6 sbc r3,r7 .syntax unified mov r6,r4 - .syntax divided - .syntax unified mov r7,r5 .syntax divided sbc r4,r0 @@ -1165,14 +1059,8 @@ square256_asm: mvn r7,r7 .syntax unified mov r8,r4 - .syntax divided - .syntax unified mov r9,r5 - .syntax divided - .syntax unified mov r10,r6 - .syntax divided - .syntax unified mov r11,r7 .syntax divided mov r4,#143 @@ -1190,29 +1078,21 @@ square256_asm: stm r4!,{r0,r1,r2,r3} .syntax unified mov r4,r12 - .syntax divided - .syntax unified mov r0,r8 .syntax divided adc r0,r4 .syntax unified mov r8,r0 - .syntax divided - .syntax unified mov r1,r9 .syntax divided adc r1,r5 .syntax unified mov r9,r1 - .syntax divided - .syntax unified mov r2,r10 .syntax divided adc r2,r6 .syntax unified mov r10,r2 - .syntax divided - .syntax unified mov r3,r11 .syntax divided adc r3,r7 @@ -1223,8 +1103,6 @@ square256_asm: adc r0,r0 .syntax unified mov r12,r0 - .syntax divided - .syntax unified mov r0,r14 .syntax divided ldm r0,{r0,r1,r2,r3,r4,r5,r6,r7} @@ -1237,8 +1115,6 @@ square256_asm: stm r4!,{r0,r1,r2,r3} .syntax unified mov r14,r4 - .syntax divided - .syntax unified mov r0,r13 .syntax divided ldm r0!,{r4,r5,r6,r7} @@ -1278,17 +1154,9 @@ square256_asm: pop {r3,r4,r5,r6,r7} .syntax unified mov r8,r3 - .syntax divided - .syntax unified mov r9,r4 - .syntax divided - .syntax unified mov r10,r5 - .syntax divided - .syntax unified mov r11,r6 - .syntax divided - .syntax unified mov r12,r7 .syntax divided pop {r0,r4,r5,r6,r7,r15} |