summaryrefslogtreecommitdiff
path: root/core/cortex-m0
diff options
context:
space:
mode:
authorNicolas Boichat <drinkcat@google.com>2017-06-22 10:34:39 +0800
committerchrome-bot <chrome-bot@chromium.org>2017-07-03 21:44:34 -0700
commitedc668ea6cca1d4f3e53c211406d6a8bf5fe6364 (patch)
tree6ee9dcc2fd49d6d34da4c108093ee322212823ce /core/cortex-m0
parent136a80e1138633c2f2ac249c15078b587af9c7ec (diff)
downloadchrome-ec-edc668ea6cca1d4f3e53c211406d6a8bf5fe6364.tar.gz
core/cortex-m0/curve25519: Integrate fast curve25519 implementation
- Move generic implementation to curve25519-generic.o - Always use optimized version on cortex-m0. - Rename .s files to .S, remove unnecessary lines in assembly files. - Rename crypto_scalarmult_curve25519 to x25519_scalar_mult to match the signature provided by the generic implementation. - Replace some handcoded memcpy with function calls - Remove unnecessary "volatile" specifications in the code. BRANCH=none BUG=b:62813194 TEST=To test old implementation only: - Increase CONFIG_RO_SIZE to 60kb - Increase console stack size to 2048 make BOARD=hammer PROJECT=x25519 TEST_BUILD=y ./util/flash_ec --board=hammer --image=build/hammer/x25519.bin EC console: runtest, taskinfo => Used to takes ~4'17" to run (X25519 duration 256347 us). 1496/2048 stack size usage in CONSOLE task => Now takes ~1'25" to run (X25519 duration 84520 us) 732/2048 stack size usage in CONSOLE task TEST=In test/x25519.c, uncomment #define TEST_X25519_1M_ITERATIONS make BOARD=hammer PROJECT=x25519 TEST_BUILD=y ./util/flash_ec --board=hammer --image=build/hammer/x25519.bin EC console: runtest, wait ~23 hours, test passes. TEST=- Define CONFIG_CURVE25519_CORTEXM0 (next patch) makes newsizes build/hammer/RW/ec.RW.flat shrank by 1888 bytes: (52208 to 50320) Change-Id: Icce38d3c32f431a85ac0f951cf34456b490dc665 Reviewed-on: https://chromium-review.googlesource.com/540962 Commit-Ready: Nicolas Boichat <drinkcat@chromium.org> Tested-by: Nicolas Boichat <drinkcat@chromium.org> Reviewed-by: Nicolas Boichat <drinkcat@chromium.org>
Diffstat (limited to 'core/cortex-m0')
-rw-r--r--core/cortex-m0/build.mk9
-rw-r--r--core/cortex-m0/curve25519/mpy121666.S (renamed from core/cortex-m0/curve25519/cortex_m0_mpy121666.s)20
-rw-r--r--core/cortex-m0/curve25519/mul.S (renamed from core/cortex-m0/curve25519/mul.s)0
-rw-r--r--core/cortex-m0/curve25519/reduce25519.S (renamed from core/cortex-m0/curve25519/cortex_m0_reduce25519.s)13
-rw-r--r--core/cortex-m0/curve25519/scalarmult.c59
-rw-r--r--core/cortex-m0/curve25519/sqr.S (renamed from core/cortex-m0/curve25519/sqr.s)132
6 files changed, 25 insertions, 208 deletions
diff --git a/core/cortex-m0/build.mk b/core/cortex-m0/build.mk
index 5f2bc32eb5..f8f33aa2a3 100644
--- a/core/cortex-m0/build.mk
+++ b/core/cortex-m0/build.mk
@@ -21,4 +21,13 @@ endif
core-y=cpu.o init.o thumb_case.o div.o lmul.o ldivmod.o uldivmod.o
core-$(CONFIG_COMMON_PANIC_OUTPUT)+=panic.o
core-$(CONFIG_COMMON_RUNTIME)+=switch.o task.o
+
+dirs-y += core/$(CORE)/curve25519
+
+core-$(CONFIG_CURVE25519)+=curve25519/mpy121666.o
+core-$(CONFIG_CURVE25519)+=curve25519/reduce25519.o
+core-$(CONFIG_CURVE25519)+=curve25519/mul.o
+core-$(CONFIG_CURVE25519)+=curve25519/scalarmult.o
+core-$(CONFIG_CURVE25519)+=curve25519/sqr.o
+
core-$(CONFIG_WATCHDOG)+=watchdog.o
diff --git a/core/cortex-m0/curve25519/cortex_m0_mpy121666.s b/core/cortex-m0/curve25519/mpy121666.S
index 8e74dd0265..d2a467459b 100644
--- a/core/cortex-m0/curve25519/cortex_m0_mpy121666.s
+++ b/core/cortex-m0/curve25519/mpy121666.S
@@ -11,21 +11,7 @@
// Not yet tested on target hardware.
- .cpu cortex-m0
- .fpu softvfp
- .eabi_attribute 20, 1
- .eabi_attribute 21, 1
- .eabi_attribute 23, 3
- .eabi_attribute 24, 1
- .eabi_attribute 25, 1
- .eabi_attribute 26, 1
- .eabi_attribute 30, 2
- .eabi_attribute 34, 0
- .eabi_attribute 18, 4
.code 16
-
- .file "cortex_m0_reduce25519.s"
-
.text
.align 2
@@ -36,7 +22,7 @@
fe25519_mpyWith121666_asm:
push {r4,r5,r6,r7,r14}
- ldr r7,__label_for_immediate_56130
+ ldr r7,=56130
ldr r2,[r1,#28]
lsl r5,r2,#16
lsr r6,r2,#16
@@ -191,9 +177,5 @@ fe25519_mpyWith121666_asm:
str r6,[r0,#28]
pop {r4,r5,r6,r7,r15}
- .align 2
-__label_for_immediate_56130:
- .word 56130
-
.size fe25519_mpyWith121666_asm, .-fe25519_mpyWith121666_asm
diff --git a/core/cortex-m0/curve25519/mul.s b/core/cortex-m0/curve25519/mul.S
index 366713a7a3..366713a7a3 100644
--- a/core/cortex-m0/curve25519/mul.s
+++ b/core/cortex-m0/curve25519/mul.S
diff --git a/core/cortex-m0/curve25519/cortex_m0_reduce25519.s b/core/cortex-m0/curve25519/reduce25519.S
index 8984752a38..9a3c29a0f6 100644
--- a/core/cortex-m0/curve25519/cortex_m0_reduce25519.s
+++ b/core/cortex-m0/curve25519/reduce25519.S
@@ -8,21 +8,8 @@
// Generated and tested with C++ functions in the test subdirectory and on the target.
//
- .cpu cortex-m0
- .fpu softvfp
- .eabi_attribute 20, 1
- .eabi_attribute 21, 1
- .eabi_attribute 23, 3
- .eabi_attribute 24, 1
- .eabi_attribute 25, 1
- .eabi_attribute 26, 1
- .eabi_attribute 30, 2
- .eabi_attribute 34, 0
- .eabi_attribute 18, 4
.code 16
- .file "cortex_m0_reduce25519.s"
-
.text
.align 2
diff --git a/core/cortex-m0/curve25519/scalarmult.c b/core/cortex-m0/curve25519/scalarmult.c
index d4e8c06c3e..f7370d1f43 100644
--- a/core/cortex-m0/curve25519/scalarmult.c
+++ b/core/cortex-m0/curve25519/scalarmult.c
@@ -57,12 +57,14 @@
Creative Commons CC0 1.0 Universal public domain dedication
============================================================================*/
-#include <inttypes.h>
+#include "curve25519.h"
+#include "util.h"
// comment out this line if implementing conditional swaps by data moves
//#define DH_SWAP_BY_POINTERS
// Define the symbol to 0 in order to only use ladder steps
+#define DH_REPLACE_LAST_THREE_LADDERSTEPS_WITH_DOUBLINGS 0
//#define DH_REPLACE_LAST_THREE_LADDERSTEPS_WITH_DOUBLINGS 1
typedef uint8_t uint8;
@@ -142,26 +144,17 @@ fe25519_cpy(
const fe25519* source
)
{
- uint32 ctr;
-
- for (ctr = 0; ctr < 8; ctr++)
- {
- dest->as_uint32[ctr] = source->as_uint32[ctr];
- }
+ memcpy(dest, source, 32);
}
static void
fe25519_unpack(
- volatile fe25519* out,
+ fe25519* out,
const unsigned char in[32]
)
{
- uint8 ctr;
+ memcpy(out, in, 32);
- for (ctr = 0; ctr < 32; ctr++)
- {
- out->as_uint8[ctr] = in[ctr];
- }
out->as_uint8[31] &= 0x7f; // make sure that the last bit is cleared.
}
@@ -257,7 +250,7 @@ fe25519_square(
static void
fe25519_reduceCompletely(
- volatile fe25519* inout
+ fe25519* inout
)
{
uint32 numberOfTimesToSubstractPrime;
@@ -307,17 +300,12 @@ fe25519_reduceCompletely(
static void
fe25519_pack(
unsigned char out[32],
- volatile fe25519* in
+ fe25519* in
)
{
- uint8 ctr;
-
fe25519_reduceCompletely(in);
- for (ctr = 0; ctr < 32; ctr++)
- {
- out[ctr] = in->as_uint8[ctr];
- }
+ memcpy(out, in, 32);
}
// Note, that r and x are allowed to overlap!
@@ -511,7 +499,7 @@ typedef struct _ST_curve25519ladderstepWorkingState
fe25519 xq;
fe25519 zq;
- volatile UN_256bitValue s;
+ UN_256bitValue s;
int nextScalarBitToProcess;
uint8 previousProcessedBit;
@@ -649,11 +637,11 @@ curve25519_doublePointP (ST_curve25519ladderstepWorkingState* pState)
#endif // #ifdef DH_REPLACE_LAST_THREE_LADDERSTEPS_WITH_DOUBLINGS
-int
-crypto_scalarmult_curve25519(
- unsigned char* r,
- const unsigned char* s,
- const unsigned char* p
+void
+x25519_scalar_mult(
+ uint8_t r[32],
+ const uint8_t s[32],
+ const uint8_t p[32]
)
{
ST_curve25519ladderstepWorkingState state;
@@ -741,21 +729,4 @@ crypto_scalarmult_curve25519(
fe25519_pack (r, &state.xp);
#endif
-
- return 0;
-}
-
-int
-crypto_scalarmult_curve25519_base(
- unsigned char* q,
- const unsigned char* n
-)
-{
- static const uint8 base[32] =
- {
- 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
- };
-
- return crypto_scalarmult_curve25519(q, n, base);
}
diff --git a/core/cortex-m0/curve25519/sqr.s b/core/cortex-m0/curve25519/sqr.S
index 9666a1643c..b62121adb7 100644
--- a/core/cortex-m0/curve25519/sqr.s
+++ b/core/cortex-m0/curve25519/sqr.S
@@ -17,17 +17,9 @@ square256_asm:
push {r1,r4,r5,r6,r7,r14}
.syntax unified
mov r3,r8
- .syntax divided
- .syntax unified
mov r4,r9
- .syntax divided
- .syntax unified
mov r5,r10
- .syntax divided
- .syntax unified
mov r6,r11
- .syntax divided
- .syntax unified
mov r7,r12
.syntax divided
push {r3,r4,r5,r6,r7}
@@ -41,8 +33,6 @@ square256_asm:
// clobbers all registers except for r14
.syntax unified
mov r0,r4
- .syntax divided
- .syntax unified
mov r1,r5
.syntax divided
sub r0,r6
@@ -54,11 +44,7 @@ square256_asm:
sbc r1,r2
.syntax unified
mov r8,r0
- .syntax divided
- .syntax unified
mov r9,r1
- .syntax divided
- .syntax unified
mov r10,r6
.syntax divided
// START: sqr 64 Refined Karatsuba
@@ -144,17 +130,9 @@ square256_asm:
// Leaves r6 zero.
.syntax unified
mov r6,r10
- .syntax divided
- .syntax unified
mov r10,r0
- .syntax divided
- .syntax unified
mov r11,r1
- .syntax divided
- .syntax unified
mov r12,r2
- .syntax divided
- .syntax unified
mov r1,r3
.syntax divided
// START: sqr 64 Refined Karatsuba
@@ -247,17 +225,9 @@ square256_asm:
adc r5,r6
.syntax unified
mov r12,r2
- .syntax divided
- .syntax unified
mov r2,r8
- .syntax divided
- .syntax unified
mov r8,r3
- .syntax divided
- .syntax unified
mov r3,r9
- .syntax divided
- .syntax unified
mov r9,r4
.syntax divided
// START: sqr 64 Refined Karatsuba
@@ -343,19 +313,13 @@ square256_asm:
// Returns r4 as zero.
.syntax unified
mov r2,r12
- .syntax divided
- .syntax unified
mov r3,r8
- .syntax divided
- .syntax unified
mov r4,r9
.syntax divided
sub r2,r6
sbc r3,r7
.syntax unified
mov r6,r4
- .syntax divided
- .syntax unified
mov r7,r5
.syntax divided
sbc r4,r0
@@ -401,8 +365,6 @@ square256_asm:
// clobbers all registers except for r14
.syntax unified
mov r0,r4
- .syntax divided
- .syntax unified
mov r1,r5
.syntax divided
sub r0,r6
@@ -414,11 +376,7 @@ square256_asm:
sbc r1,r2
.syntax unified
mov r8,r0
- .syntax divided
- .syntax unified
mov r9,r1
- .syntax divided
- .syntax unified
mov r10,r6
.syntax divided
// START: sqr 64 Refined Karatsuba
@@ -504,17 +462,9 @@ square256_asm:
// Leaves r6 zero.
.syntax unified
mov r6,r10
- .syntax divided
- .syntax unified
mov r10,r0
- .syntax divided
- .syntax unified
mov r11,r1
- .syntax divided
- .syntax unified
mov r12,r2
- .syntax divided
- .syntax unified
mov r1,r3
.syntax divided
// START: sqr 64 Refined Karatsuba
@@ -607,17 +557,9 @@ square256_asm:
adc r5,r6
.syntax unified
mov r12,r2
- .syntax divided
- .syntax unified
mov r2,r8
- .syntax divided
- .syntax unified
mov r8,r3
- .syntax divided
- .syntax unified
mov r3,r9
- .syntax divided
- .syntax unified
mov r9,r4
.syntax divided
// START: sqr 64 Refined Karatsuba
@@ -703,19 +645,13 @@ square256_asm:
// Returns r4 as zero.
.syntax unified
mov r2,r12
- .syntax divided
- .syntax unified
mov r3,r8
- .syntax divided
- .syntax unified
mov r4,r9
.syntax divided
sub r2,r6
sbc r3,r7
.syntax unified
mov r6,r4
- .syntax divided
- .syntax unified
mov r7,r5
.syntax divided
sbc r4,r0
@@ -749,14 +685,8 @@ square256_asm:
// Result in r0 ... r7
.syntax unified
mov r8,r4
- .syntax divided
- .syntax unified
mov r9,r5
- .syntax divided
- .syntax unified
mov r10,r6
- .syntax divided
- .syntax unified
mov r11,r7
.syntax divided
pop {r4,r5,r6,r7}
@@ -766,17 +696,9 @@ square256_asm:
adc r3,r7
.syntax unified
mov r4,r8
- .syntax divided
- .syntax unified
mov r5,r9
- .syntax divided
- .syntax unified
mov r6,r10
- .syntax divided
- .syntax unified
mov r7,r11
- .syntax divided
- .syntax unified
mov r8,r0
.syntax divided
mov r0,#0
@@ -809,8 +731,6 @@ square256_asm:
// clobbers all registers except for r14
.syntax unified
mov r0,r4
- .syntax divided
- .syntax unified
mov r1,r5
.syntax divided
sub r0,r6
@@ -822,11 +742,7 @@ square256_asm:
sbc r1,r2
.syntax unified
mov r8,r0
- .syntax divided
- .syntax unified
mov r9,r1
- .syntax divided
- .syntax unified
mov r10,r6
.syntax divided
// START: sqr 64 Refined Karatsuba
@@ -912,17 +828,9 @@ square256_asm:
// Leaves r6 zero.
.syntax unified
mov r6,r10
- .syntax divided
- .syntax unified
mov r10,r0
- .syntax divided
- .syntax unified
mov r11,r1
- .syntax divided
- .syntax unified
mov r12,r2
- .syntax divided
- .syntax unified
mov r1,r3
.syntax divided
// START: sqr 64 Refined Karatsuba
@@ -1015,17 +923,9 @@ square256_asm:
adc r5,r6
.syntax unified
mov r12,r2
- .syntax divided
- .syntax unified
mov r2,r8
- .syntax divided
- .syntax unified
mov r8,r3
- .syntax divided
- .syntax unified
mov r3,r9
- .syntax divided
- .syntax unified
mov r9,r4
.syntax divided
// START: sqr 64 Refined Karatsuba
@@ -1111,19 +1011,13 @@ square256_asm:
// Returns r4 as zero.
.syntax unified
mov r2,r12
- .syntax divided
- .syntax unified
mov r3,r8
- .syntax divided
- .syntax unified
mov r4,r9
.syntax divided
sub r2,r6
sbc r3,r7
.syntax unified
mov r6,r4
- .syntax divided
- .syntax unified
mov r7,r5
.syntax divided
sbc r4,r0
@@ -1165,14 +1059,8 @@ square256_asm:
mvn r7,r7
.syntax unified
mov r8,r4
- .syntax divided
- .syntax unified
mov r9,r5
- .syntax divided
- .syntax unified
mov r10,r6
- .syntax divided
- .syntax unified
mov r11,r7
.syntax divided
mov r4,#143
@@ -1190,29 +1078,21 @@ square256_asm:
stm r4!,{r0,r1,r2,r3}
.syntax unified
mov r4,r12
- .syntax divided
- .syntax unified
mov r0,r8
.syntax divided
adc r0,r4
.syntax unified
mov r8,r0
- .syntax divided
- .syntax unified
mov r1,r9
.syntax divided
adc r1,r5
.syntax unified
mov r9,r1
- .syntax divided
- .syntax unified
mov r2,r10
.syntax divided
adc r2,r6
.syntax unified
mov r10,r2
- .syntax divided
- .syntax unified
mov r3,r11
.syntax divided
adc r3,r7
@@ -1223,8 +1103,6 @@ square256_asm:
adc r0,r0
.syntax unified
mov r12,r0
- .syntax divided
- .syntax unified
mov r0,r14
.syntax divided
ldm r0,{r0,r1,r2,r3,r4,r5,r6,r7}
@@ -1237,8 +1115,6 @@ square256_asm:
stm r4!,{r0,r1,r2,r3}
.syntax unified
mov r14,r4
- .syntax divided
- .syntax unified
mov r0,r13
.syntax divided
ldm r0!,{r4,r5,r6,r7}
@@ -1278,17 +1154,9 @@ square256_asm:
pop {r3,r4,r5,r6,r7}
.syntax unified
mov r8,r3
- .syntax divided
- .syntax unified
mov r9,r4
- .syntax divided
- .syntax unified
mov r10,r5
- .syntax divided
- .syntax unified
mov r11,r6
- .syntax divided
- .syntax unified
mov r12,r7
.syntax divided
pop {r0,r4,r5,r6,r7,r15}