From a617511224e71fd084d10b869dd17922558b15fd Mon Sep 17 00:00:00 2001 From: Marius Schilder Date: Fri, 1 Jun 2018 15:19:58 -0700 Subject: g: speed up RSA keygen by 40% Add a specialized modexp for 1024 bit operands. Available under CONFIG_DCRYPTO_RSA_SPEEDUP. Image size increase 2184 bytes. Signed-off-by: mschilder@google.com BRANCH=none TEST=new console command "genp [seed]"; record timing w/ and w/o speed-up and check that primes are identical for identical seeds. BUG=b:68167013 Change-Id: I23e2b5ab13902354debcdb42c693127e1e26262a Reviewed-on: https://chromium-review.googlesource.com/1083697 Commit-Ready: Marius Schilder Tested-by: Marius Schilder Reviewed-by: Marius Schilder Reviewed-by: Vadim Bendebury Reviewed-by: Andrey Pronin --- chip/g/dcrypto/dcrypto_bn.c | 1603 +++++++++++++++++++++++++++++++------------ include/config.h | 6 + 2 files changed, 1186 insertions(+), 423 deletions(-) diff --git a/chip/g/dcrypto/dcrypto_bn.c b/chip/g/dcrypto/dcrypto_bn.c index 0e364b0d1c..cae54e6adf 100644 --- a/chip/g/dcrypto/dcrypto_bn.c +++ b/chip/g/dcrypto/dcrypto_bn.c @@ -11,443 +11,1103 @@ /* AUTO-GENERATED. DO NOT MODIFY. */ /* clang-format off */ -static const uint32_t IMEM_dcrypto[] = { +static const uint32_t IMEM_dcrypto_bn[] = { /* @0x0: function tag[1] { */ #define CF_tag_adr 0 - 0xf8000001, /* sigini #1 */ +0xf8000001, /* sigini #1 */ /* } */ /* @0x1: function d0inv[14] { */ #define CF_d0inv_adr 1 - 0x4c000000, /* xor r0, r0, r0 */ - 0x80000001, /* movi r0.0l, #1 */ - 0x7c740000, /* mov r29, r0 */ - 0x05100008, /* loop #256 ( */ - 0x5807bc00, /* mul128 r1, r28l, r29l */ - 0x588bbc00, /* mul128 r2, r28u, r29l */ - 0x50044110, /* add r1, r1, r2 << 128 */ - 0x590bbc00, /* mul128 r2, r28l, r29u */ - 0x50044110, /* add r1, r1, r2 << 128 */ - 0x40040100, /* and r1, r1, r0 */ - 0x44743d00, /* or r29, r29, r1 */ - 0x50000000, /* add r0, r0, r0 */ - /* ) */ - 0x5477bf00, /* sub r29, r31, r29 */ - 0x0c000000, /* ret */ +0x4c000000, /* xor r0, r0, r0 */ +0x80000001, /* movi r0.0l, #1 */ +0x7c740000, /* mov r29, r0 */ +0x05100008, /* loop #256 ( */ +0x5807bc00, /* mul128 r1, r28l, r29l */ +0x588bbc00, /* mul128 r2, r28u, r29l */ +0x50044110, /* add r1, r1, r2 << 128 */ +0x590bbc00, /* mul128 r2, r28l, r29u */ +0x50044110, /* add r1, r1, r2 << 128 */ +0x40040100, /* and r1, r1, r0 */ +0x44743d00, /* or r29, r29, r1 */ +0x50000000, /* add r0, r0, r0 */ +/* ) */ +0x5477bf00, /* sub r29, r31, r29 */ +0x0c000000, /* ret */ /* } */ -/* @0xf: function selcxSub[10] { */ +/* @0xf: function selcxSub[25] { */ #define CF_selcxSub_adr 15 - 0x97800100, /* ldrfp r1 */ - 0x95800000, /* lddmp r0 */ - 0x540c6300, /* sub r3, r3, r3 */ - 0x0600c005, /* loop *6 ( */ - 0x8c081800, /* ld *2, *0++ */ - 0x7c8c0000, /* ldr *3, *0 */ - 0x54906200, /* subb r4, r2, r3 */ - 0x66084408, /* selcx r2, r4, r2 */ - 0x7ca00300, /* ldr *0++, *3 */ - /* ) */ - 0x0c000000, /* ret */ +0x97800100, /* ldrfp r1 */ +0x95800000, /* lddmp r0 */ +0x99100000, /* strnd r4 */ +0x5013e400, /* add r4, r4, r31 */ +0x1000101e, /* bl selcxSub_invsel */ +0x528c8402, /* addcx r3, r4, r4 << 16 */ +0x0600c007, /* loop *6 ( */ +0x8c081800, /* ld *2, *0++ */ +0x7c8c0000, /* ldr *3, *0 */ +0x7c800400, /* ldr *0, *4 */ +0x54906200, /* subb r4, r2, r3 */ +0x990c0000, /* strnd r3 */ +0x660c4401, /* sellx r3, r4, r2 */ +0x7ca00200, /* ldr *0++, *2 */ +/* ) */ +0x0c000000, /* ret */ +/*selcxSub_invsel: */ +0x528c8402, /* addcx r3, r4, r4 << 16 */ +0x0600c007, /* loop *6 ( */ +0x8c081800, /* ld *2, *0++ */ +0x7c8c0000, /* ldr *3, *0 */ +0x7c800400, /* ldr *0, *4 */ +0x54906200, /* subb r4, r2, r3 */ +0x990c0000, /* strnd r3 */ +0x660c8201, /* sellx r3, r2, r4 */ +0x7ca00200, /* ldr *0++, *2 */ +/* ) */ +0x0c000000, /* ret */ /* } */ -/* @0x19: function computeRR[41] { */ -#define CF_computeRR_adr 25 - 0x4c7fff00, /* xor r31, r31, r31 */ - 0x84004000, /* ldi r0, [#0] */ - 0x95800000, /* lddmp r0 */ - 0x4c0c6300, /* xor r3, r3, r3 */ - 0x800cffff, /* movi r3.0l, #65535 */ - 0x40040398, /* and r1, r3, r0 >> 192 */ - 0x480c6000, /* not r3, r3 */ - 0x400c0300, /* and r3, r3, r0 */ - 0x500c2301, /* add r3, r3, r1 << 8 */ - 0x94800300, /* ldlc r3 */ - 0x80040005, /* movi r1.0l, #5 */ - 0x81040003, /* movi r1.2l, #3 */ - 0x81840002, /* movi r1.3l, #2 */ - 0x82040004, /* movi r1.4l, #4 */ - 0x97800100, /* ldrfp r1 */ - 0x4c0c6300, /* xor r3, r3, r3 */ - 0x0600c001, /* loop *6 ( */ - 0x7ca00200, /* ldr *0++, *2 */ - /* ) */ - 0x560c1f00, /* subx r3, r31, r0 */ - 0x0800000f, /* call &selcxSub */ - 0x06000010, /* loop *0 ( */ - 0x97800100, /* ldrfp r1 */ - 0x560c6300, /* subx r3, r3, r3 */ - 0x0600c003, /* loop *6 ( */ - 0x7c8c0000, /* ldr *3, *0 */ - 0x52884200, /* addcx r2, r2, r2 */ - 0x7ca00300, /* ldr *0++, *3 */ - /* ) */ - 0x0800000f, /* call &selcxSub */ - 0x97800100, /* ldrfp r1 */ - 0x95800000, /* lddmp r0 */ - 0x560c6300, /* subx r3, r3, r3 */ - 0x0600c003, /* loop *6 ( */ - 0x8c081800, /* ld *2, *0++ */ - 0x7c8c0800, /* ldr *3, *0++ */ - 0x5e804300, /* cmpbx r3, r2 */ - /* ) */ - 0x0800000f, /* call &selcxSub */ - 0xfc000000, /* nop */ - /* ) */ - 0x97800100, /* ldrfp r1 */ - 0x0600c001, /* loop *6 ( */ - 0x90680800, /* st *0++, *2++ */ - /* ) */ - 0x0c000000, /* ret */ +/* @0x28: function computeRR[41] { */ +#define CF_computeRR_adr 40 +0x4c7fff00, /* xor r31, r31, r31 */ +0x84004000, /* ldi r0, [#0] */ +0x95800000, /* lddmp r0 */ +0x4c0c6300, /* xor r3, r3, r3 */ +0x800cffff, /* movi r3.0l, #65535 */ +0x40040398, /* and r1, r3, r0 >> 192 */ +0x480c6000, /* not r3, r3 */ +0x400c0300, /* and r3, r3, r0 */ +0x500c2301, /* add r3, r3, r1 << 8 */ +0x94800300, /* ldlc r3 */ +0x80040005, /* movi r1.0l, #5 */ +0x81040003, /* movi r1.2l, #3 */ +0x81840002, /* movi r1.3l, #2 */ +0x82040004, /* movi r1.4l, #4 */ +0x97800100, /* ldrfp r1 */ +0x4c0c6300, /* xor r3, r3, r3 */ +0x0600c001, /* loop *6 ( */ +0x7ca00200, /* ldr *0++, *2 */ +/* ) */ +0x560c1f00, /* subx r3, r31, r0 */ +0x0800000f, /* call &selcxSub */ +0x06000010, /* loop *0 ( */ +0x97800100, /* ldrfp r1 */ +0x560c6300, /* subx r3, r3, r3 */ +0x0600c003, /* loop *6 ( */ +0x7c8c0000, /* ldr *3, *0 */ +0x52884200, /* addcx r2, r2, r2 */ +0x7ca00300, /* ldr *0++, *3 */ +/* ) */ +0x0800000f, /* call &selcxSub */ +0x97800100, /* ldrfp r1 */ +0x95800000, /* lddmp r0 */ +0x560c6300, /* subx r3, r3, r3 */ +0x0600c003, /* loop *6 ( */ +0x8c081800, /* ld *2, *0++ */ +0x7c8c0800, /* ldr *3, *0++ */ +0x5e804300, /* cmpbx r3, r2 */ +/* ) */ +0x0800000f, /* call &selcxSub */ +0xfc000000, /* nop */ +/* ) */ +0x97800100, /* ldrfp r1 */ +0x0600c001, /* loop *6 ( */ +0x90680800, /* st *0++, *2++ */ +/* ) */ +0x0c000000, /* ret */ /* } */ -/* @0x42: function dmXd0[9] { */ -#define CF_dmXd0_adr 66 - 0x586f3e00, /* mul128 r27, r30l, r25l */ - 0x59eb3e00, /* mul128 r26, r30u, r25u */ - 0x58df3e00, /* mul128 r23, r30u, r25l */ - 0x506efb10, /* add r27, r27, r23 << 128 */ - 0x50eafa90, /* addc r26, r26, r23 >> 128 */ - 0x595f3e00, /* mul128 r23, r30l, r25u */ - 0x506efb10, /* add r27, r27, r23 << 128 */ - 0x50eafa90, /* addc r26, r26, r23 >> 128 */ - 0x0c000000, /* ret */ +/* @0x51: function dmXd0[9] { */ +#define CF_dmXd0_adr 81 +0x586f3e00, /* mul128 r27, r30l, r25l */ +0x59eb3e00, /* mul128 r26, r30u, r25u */ +0x58df3e00, /* mul128 r23, r30u, r25l */ +0x506efb10, /* add r27, r27, r23 << 128 */ +0x50eafa90, /* addc r26, r26, r23 >> 128 */ +0x595f3e00, /* mul128 r23, r30l, r25u */ +0x506efb10, /* add r27, r27, r23 << 128 */ +0x50eafa90, /* addc r26, r26, r23 >> 128 */ +0x0c000000, /* ret */ /* } */ -/* @0x4b: function dmXa[9] { */ -#define CF_dmXa_adr 75 - 0x586c5e00, /* mul128 r27, r30l, r2l */ - 0x59e85e00, /* mul128 r26, r30u, r2u */ - 0x58dc5e00, /* mul128 r23, r30u, r2l */ - 0x506efb10, /* add r27, r27, r23 << 128 */ - 0x50eafa90, /* addc r26, r26, r23 >> 128 */ - 0x595c5e00, /* mul128 r23, r30l, r2u */ - 0x506efb10, /* add r27, r27, r23 << 128 */ - 0x50eafa90, /* addc r26, r26, r23 >> 128 */ - 0x0c000000, /* ret */ +/* @0x5a: function dmXa[9] { */ +#define CF_dmXa_adr 90 +0x586c5e00, /* mul128 r27, r30l, r2l */ +0x59e85e00, /* mul128 r26, r30u, r2u */ +0x58dc5e00, /* mul128 r23, r30u, r2l */ +0x506efb10, /* add r27, r27, r23 << 128 */ +0x50eafa90, /* addc r26, r26, r23 >> 128 */ +0x595c5e00, /* mul128 r23, r30l, r2u */ +0x506efb10, /* add r27, r27, r23 << 128 */ +0x50eafa90, /* addc r26, r26, r23 >> 128 */ +0x0c000000, /* ret */ /* } */ -/* @0x54: function mma[46] { */ -#define CF_mma_adr 84 - 0x8204001e, /* movi r1.4l, #30 */ - 0x82840018, /* movi r1.5l, #24 */ - 0x97800100, /* ldrfp r1 */ - 0x8c101b00, /* ld *4, *3++ */ - 0x0800004b, /* call &dmXa */ - 0x7c940800, /* ldr *5, *0++ */ - 0x507b1b00, /* add r30, r27, r24 */ - 0x50f7fa00, /* addc r29, r26, r31 */ - 0x7c640300, /* mov r25, r3 */ - 0x08000042, /* call &dmXd0 */ - 0x7c641b00, /* mov r25, r27 */ - 0x7c701a00, /* mov r28, r26 */ - 0x7c601e00, /* mov r24, r30 */ - 0x8c101800, /* ld *4, *0++ */ - 0x08000042, /* call &dmXd0 */ - 0x506f1b00, /* add r27, r27, r24 */ - 0x50f3fa00, /* addc r28, r26, r31 */ - 0x0600e00e, /* loop *7 ( */ - 0x8c101b00, /* ld *4, *3++ */ - 0x0800004b, /* call &dmXa */ - 0x7c940800, /* ldr *5, *0++ */ - 0x506f1b00, /* add r27, r27, r24 */ - 0x50ebfa00, /* addc r26, r26, r31 */ - 0x5063bb00, /* add r24, r27, r29 */ - 0x50f7fa00, /* addc r29, r26, r31 */ - 0x8c101800, /* ld *4, *0++ */ - 0x08000042, /* call &dmXd0 */ - 0x506f1b00, /* add r27, r27, r24 */ - 0x50ebfa00, /* addc r26, r26, r31 */ - 0x52639b00, /* addx r24, r27, r28 */ - 0x7ca80500, /* ldr *2++, *5 */ - 0x52f3fa00, /* addcx r28, r26, r31 */ - /* ) */ - 0x52e39d00, /* addcx r24, r29, r28 */ - 0x7ca80500, /* ldr *2++, *5 */ - 0x95800000, /* lddmp r0 */ - 0x97800100, /* ldrfp r1 */ - 0x54739c00, /* sub r28, r28, r28 */ - 0x0600c007, /* loop *6 ( */ - 0x8c141800, /* ld *5, *0++ */ - 0x7c900000, /* ldr *4, *0 */ - 0x54f71e00, /* subb r29, r30, r24 */ - 0x99600000, /* strnd r24 */ - 0x7c800500, /* ldr *0, *5 */ - 0x6663dd08, /* selcx r24, r29, r30 */ - 0x7ca00500, /* ldr *0++, *5 */ - /* ) */ - 0x0c000000, /* ret */ +/* @0x63: function mma_sub_cx[23] { */ +#define CF_mma_sub_cx_adr 99 +0x99700000, /* strnd r28 */ +0x5073fc00, /* add r28, r28, r31 */ +0x10001070, /* bl mma_invsel */ +0x52f39c02, /* addcx r28, r28, r28 << 16 */ +0x0600c007, /* loop *6 ( */ +0x8c141800, /* ld *5, *0++ */ +0x7c900000, /* ldr *4, *0 */ +0x54f71e00, /* subb r29, r30, r24 */ +0x99600000, /* strnd r24 */ +0x7c800500, /* ldr *0, *5 */ +0x6663dd01, /* sellx r24, r29, r30 */ +0x7ca00500, /* ldr *0++, *5 */ +/* ) */ +0x0c000000, /* ret */ +/*mma_invsel: */ +0x52f39c02, /* addcx r28, r28, r28 << 16 */ +0x0600c007, /* loop *6 ( */ +0x8c141800, /* ld *5, *0++ */ +0x7c900000, /* ldr *4, *0 */ +0x54f71e00, /* subb r29, r30, r24 */ +0x99600000, /* strnd r24 */ +0x7c800500, /* ldr *0, *5 */ +0x6663be01, /* sellx r24, r30, r29 */ +0x7ca00500, /* ldr *0++, *5 */ +/* ) */ +0x0c000000, /* ret */ /* } */ -/* @0x82: function setupPtrs[11] { */ -#define CF_setupPtrs_adr 130 - 0x847c4000, /* ldi r31, [#0] */ - 0x4c7fff00, /* xor r31, r31, r31 */ - 0x95800000, /* lddmp r0 */ - 0x94800000, /* ldlc r0 */ - 0x7c041f00, /* mov r1, r31 */ - 0x80040004, /* movi r1.0l, #4 */ - 0x80840003, /* movi r1.1l, #3 */ - 0x81040004, /* movi r1.2l, #4 */ - 0x81840002, /* movi r1.3l, #2 */ - 0x97800100, /* ldrfp r1 */ - 0x0c000000, /* ret */ +/* @0x7a: function mma[39] { */ +#define CF_mma_adr 122 +0x8204001e, /* movi r1.4l, #30 */ +0x82840018, /* movi r1.5l, #24 */ +0x97800100, /* ldrfp r1 */ +0x8c101b00, /* ld *4, *3++ */ +0x0800005a, /* call &dmXa */ +0x7c940800, /* ldr *5, *0++ */ +0x507b1b00, /* add r30, r27, r24 */ +0x50f7fa00, /* addc r29, r26, r31 */ +0x7c640300, /* mov r25, r3 */ +0x08000051, /* call &dmXd0 */ +0x7c641b00, /* mov r25, r27 */ +0x7c701a00, /* mov r28, r26 */ +0x7c601e00, /* mov r24, r30 */ +0x8c101800, /* ld *4, *0++ */ +0x08000051, /* call &dmXd0 */ +0x506f1b00, /* add r27, r27, r24 */ +0x50f3fa00, /* addc r28, r26, r31 */ +0x0600e00e, /* loop *7 ( */ +0x8c101b00, /* ld *4, *3++ */ +0x0800005a, /* call &dmXa */ +0x7c940800, /* ldr *5, *0++ */ +0x506f1b00, /* add r27, r27, r24 */ +0x50ebfa00, /* addc r26, r26, r31 */ +0x5063bb00, /* add r24, r27, r29 */ +0x50f7fa00, /* addc r29, r26, r31 */ +0x8c101800, /* ld *4, *0++ */ +0x08000051, /* call &dmXd0 */ +0x506f1b00, /* add r27, r27, r24 */ +0x50ebfa00, /* addc r26, r26, r31 */ +0x52639b00, /* addx r24, r27, r28 */ +0x7ca80500, /* ldr *2++, *5 */ +0x52f3fa00, /* addcx r28, r26, r31 */ +/* ) */ +0x52e39d00, /* addcx r24, r29, r28 */ +0x7ca80500, /* ldr *2++, *5 */ +0x95800000, /* lddmp r0 */ +0x97800100, /* ldrfp r1 */ +0x08000063, /* call &mma_sub_cx */ +0xfc000000, /* nop */ +0x0c000000, /* ret */ /* } */ -/* @0x8d: function mulx[19] { */ -#define CF_mulx_adr 141 - 0x84004000, /* ldi r0, [#0] */ - 0x08000082, /* call &setupPtrs */ - 0x8c041100, /* ld *1, *1 */ - 0x7c081f00, /* mov r2, r31 */ - 0x0600c001, /* loop *6 ( */ - 0x7ca80300, /* ldr *2++, *3 */ - /* ) */ - 0x97800100, /* ldrfp r1 */ - 0x0600c004, /* loop *6 ( */ - 0x8c0c1c00, /* ld *3, *4++ */ - 0x95000000, /* stdmp r0 */ - 0x08000054, /* call &mma */ - 0x95800000, /* lddmp r0 */ - /* ) */ - 0x97800100, /* ldrfp r1 */ - 0x95800000, /* lddmp r0 */ - 0x0600c001, /* loop *6 ( */ - 0x90740800, /* st *0++, *5++ */ - /* ) */ - 0x97800100, /* ldrfp r1 */ - 0x95800000, /* lddmp r0 */ - 0x0c000000, /* ret */ +/* @0xa1: function setupPtrs[11] { */ +#define CF_setupPtrs_adr 161 +0x847c4000, /* ldi r31, [#0] */ +0x4c7fff00, /* xor r31, r31, r31 */ +0x95800000, /* lddmp r0 */ +0x94800000, /* ldlc r0 */ +0x7c041f00, /* mov r1, r31 */ +0x80040004, /* movi r1.0l, #4 */ +0x80840003, /* movi r1.1l, #3 */ +0x81040004, /* movi r1.2l, #4 */ +0x81840002, /* movi r1.3l, #2 */ +0x97800100, /* ldrfp r1 */ +0x0c000000, /* ret */ /* } */ -/* @0xa0: function mul1_exp[30] { */ -#define CF_mul1_exp_adr 160 - 0x8c041100, /* ld *1, *1 */ - 0x7c081f00, /* mov r2, r31 */ - 0x0600c001, /* loop *6 ( */ - 0x7ca80300, /* ldr *2++, *3 */ - /* ) */ - 0x97800100, /* ldrfp r1 */ - 0x80080001, /* movi r2.0l, #1 */ - 0x0600c003, /* loop *6 ( */ - 0x95800000, /* lddmp r0 */ - 0x08000054, /* call &mma */ - 0x7c081f00, /* mov r2, r31 */ - /* ) */ - 0x97800100, /* ldrfp r1 */ - 0x95800000, /* lddmp r0 */ - 0x56084200, /* subx r2, r2, r2 */ - 0x0600c003, /* loop *6 ( */ - 0x8c041800, /* ld *1, *0++ */ - 0x7c8c0800, /* ldr *3, *0++ */ - 0x5e804300, /* cmpbx r3, r2 */ - /* ) */ - 0x97800100, /* ldrfp r1 */ - 0x95800000, /* lddmp r0 */ - 0x540c6300, /* sub r3, r3, r3 */ - 0x0600c006, /* loop *6 ( */ - 0x8c041800, /* ld *1, *0++ */ - 0x7c8c0800, /* ldr *3, *0++ */ - 0x548c6200, /* subb r3, r2, r3 */ - 0x66084308, /* selcx r2, r3, r2 */ - 0x90740300, /* st *3, *5++ */ - 0xfc000000, /* nop */ - /* ) */ - 0x97800100, /* ldrfp r1 */ - 0x95800000, /* lddmp r0 */ - 0x0c000000, /* ret */ +/* @0xac: function mulx[19] { */ +#define CF_mulx_adr 172 +0x84004000, /* ldi r0, [#0] */ +0x080000a1, /* call &setupPtrs */ +0x8c041100, /* ld *1, *1 */ +0x7c081f00, /* mov r2, r31 */ +0x0600c001, /* loop *6 ( */ +0x7ca80300, /* ldr *2++, *3 */ +/* ) */ +0x97800100, /* ldrfp r1 */ +0x0600c004, /* loop *6 ( */ +0x8c0c1c00, /* ld *3, *4++ */ +0x95000000, /* stdmp r0 */ +0x0800007a, /* call &mma */ +0x95800000, /* lddmp r0 */ +/* ) */ +0x97800100, /* ldrfp r1 */ +0x95800000, /* lddmp r0 */ +0x0600c001, /* loop *6 ( */ +0x90740800, /* st *0++, *5++ */ +/* ) */ +0x97800100, /* ldrfp r1 */ +0x95800000, /* lddmp r0 */ +0x0c000000, /* ret */ /* } */ -/* @0xbe: function mul1[4] { */ -#define CF_mul1_adr 190 - 0x84004000, /* ldi r0, [#0] */ - 0x08000082, /* call &setupPtrs */ - 0x080000a0, /* call &mul1_exp */ - 0x0c000000, /* ret */ +/* @0xbf: function mm1_sub_cx[22] { */ +#define CF_mm1_sub_cx_adr 191 +0x990c0000, /* strnd r3 */ +0x500fe300, /* add r3, r3, r31 */ +0x100010cc, /* bl mm1_invsel */ +0x528c6302, /* addcx r3, r3, r3 << 16 */ +0x0600c006, /* loop *6 ( */ +0x8c041800, /* ld *1, *0++ */ +0x7c8c0800, /* ldr *3, *0++ */ +0x548c6200, /* subb r3, r2, r3 */ +0x66084301, /* sellx r2, r3, r2 */ +0x90740300, /* st *3, *5++ */ +0xfc000000, /* nop */ +/* ) */ +0x0c000000, /* ret */ +0xfc000000, /* nop */ +/*mm1_invsel: */ +0x528c6302, /* addcx r3, r3, r3 << 16 */ +0x0600c006, /* loop *6 ( */ +0x8c041800, /* ld *1, *0++ */ +0x7c8c0800, /* ldr *3, *0++ */ +0x548c6200, /* subb r3, r2, r3 */ +0x66086201, /* sellx r2, r2, r3 */ +0x90740300, /* st *3, *5++ */ +0xfc000000, /* nop */ +/* ) */ +0x0c000000, /* ret */ /* } */ -/* @0xc2: function sqrx_exp[19] { */ -#define CF_sqrx_exp_adr 194 - 0x84004020, /* ldi r0, [#1] */ - 0x95800000, /* lddmp r0 */ - 0x8c041100, /* ld *1, *1 */ - 0x7c081f00, /* mov r2, r31 */ - 0x0600c001, /* loop *6 ( */ - 0x7ca80300, /* ldr *2++, *3 */ - /* ) */ - 0x97800100, /* ldrfp r1 */ - 0x0600c004, /* loop *6 ( */ - 0x8c0c1c00, /* ld *3, *4++ */ - 0x95000000, /* stdmp r0 */ - 0x08000054, /* call &mma */ - 0x95800000, /* lddmp r0 */ - /* ) */ - 0x97800100, /* ldrfp r1 */ - 0x95800000, /* lddmp r0 */ - 0x0600c001, /* loop *6 ( */ - 0x90740800, /* st *0++, *5++ */ - /* ) */ - 0x97800100, /* ldrfp r1 */ - 0x95800000, /* lddmp r0 */ - 0x0c000000, /* ret */ +/* @0xd5: function mul1_exp[23] { */ +#define CF_mul1_exp_adr 213 +0x8c041100, /* ld *1, *1 */ +0x7c081f00, /* mov r2, r31 */ +0x0600c001, /* loop *6 ( */ +0x7ca80300, /* ldr *2++, *3 */ +/* ) */ +0x97800100, /* ldrfp r1 */ +0x80080001, /* movi r2.0l, #1 */ +0x0600c003, /* loop *6 ( */ +0x95800000, /* lddmp r0 */ +0x0800007a, /* call &mma */ +0x7c081f00, /* mov r2, r31 */ +/* ) */ +0x97800100, /* ldrfp r1 */ +0x95800000, /* lddmp r0 */ +0x56084200, /* subx r2, r2, r2 */ +0x0600c003, /* loop *6 ( */ +0x8c041800, /* ld *1, *0++ */ +0x7c8c0800, /* ldr *3, *0++ */ +0x5e804300, /* cmpbx r3, r2 */ +/* ) */ +0x97800100, /* ldrfp r1 */ +0x95800000, /* lddmp r0 */ +0x080000bf, /* call &mm1_sub_cx */ +0x97800100, /* ldrfp r1 */ +0x95800000, /* lddmp r0 */ +0x0c000000, /* ret */ /* } */ -/* @0xd5: function mulx_exp[14] { */ -#define CF_mulx_exp_adr 213 - 0x84004040, /* ldi r0, [#2] */ - 0x95800000, /* lddmp r0 */ - 0x8c041100, /* ld *1, *1 */ - 0x7c081f00, /* mov r2, r31 */ - 0x0600c001, /* loop *6 ( */ - 0x7ca80300, /* ldr *2++, *3 */ - /* ) */ - 0x97800100, /* ldrfp r1 */ - 0x0600c004, /* loop *6 ( */ - 0x8c0c1c00, /* ld *3, *4++ */ - 0x95000000, /* stdmp r0 */ - 0x08000054, /* call &mma */ - 0x95800000, /* lddmp r0 */ - /* ) */ - 0x97800100, /* ldrfp r1 */ - 0x0c000000, /* ret */ +/* @0xec: function mul1[4] { */ +#define CF_mul1_adr 236 +0x84004000, /* ldi r0, [#0] */ +0x080000a1, /* call &setupPtrs */ +0x080000d5, /* call &mul1_exp */ +0x0c000000, /* ret */ /* } */ -/* @0xe3: function modexp[43] { */ -#define CF_modexp_adr 227 - 0x0800008d, /* call &mulx */ - 0x84004060, /* ldi r0, [#3] */ - 0x95800000, /* lddmp r0 */ - 0x54084200, /* sub r2, r2, r2 */ - 0x0600c004, /* loop *6 ( */ - 0xfc000000, /* nop */ - 0x8c0c1800, /* ld *3, *0++ */ - 0x54885f00, /* subb r2, r31, r2 */ - 0x90740300, /* st *3, *5++ */ - /* ) */ - 0xfc000000, /* nop */ - 0x7c081f00, /* mov r2, r31 */ - 0x8008ffff, /* movi r2.0l, #65535 */ - 0x400c0298, /* and r3, r2, r0 >> 192 */ - 0x48084000, /* not r2, r2 */ - 0x40080200, /* and r2, r2, r0 */ - 0x50086201, /* add r2, r2, r3 << 8 */ - 0x94800200, /* ldlc r2 */ - 0x06000015, /* loop *0 ( */ - 0x080000c2, /* call &sqrx_exp */ - 0x080000d5, /* call &mulx_exp */ - 0x84004060, /* ldi r0, [#3] */ - 0x95800000, /* lddmp r0 */ - 0x99080000, /* strnd r2 */ - 0x54084200, /* sub r2, r2, r2 */ - 0x0600c004, /* loop *6 ( */ - 0x99080000, /* strnd r2 */ - 0x8c0c1400, /* ld *3, *4 */ - 0x50884200, /* addc r2, r2, r2 */ - 0x90700300, /* st *3, *4++ */ - /* ) */ - 0x0600c008, /* loop *6 ( */ - 0x99080000, /* strnd r2 */ - 0x8c041500, /* ld *1, *5 */ - 0x90540300, /* st *3, *5 */ - 0x7c8c0800, /* ldr *3, *0++ */ - 0x7c000200, /* mov r0, r2 */ - 0x99080000, /* strnd r2 */ - 0x64086008, /* selc r2, r0, r3 */ - 0x90740300, /* st *3, *5++ */ - /* ) */ - 0xfc000000, /* nop */ - /* ) */ - 0x84004060, /* ldi r0, [#3] */ - 0x95800000, /* lddmp r0 */ - 0x080000a0, /* call &mul1_exp */ - 0x0c000000, /* ret */ +/* @0xf0: function sqrx_exp[19] { */ +#define CF_sqrx_exp_adr 240 +0x84004020, /* ldi r0, [#1] */ +0x95800000, /* lddmp r0 */ +0x8c041100, /* ld *1, *1 */ +0x7c081f00, /* mov r2, r31 */ +0x0600c001, /* loop *6 ( */ +0x7ca80300, /* ldr *2++, *3 */ +/* ) */ +0x97800100, /* ldrfp r1 */ +0x0600c004, /* loop *6 ( */ +0x8c0c1c00, /* ld *3, *4++ */ +0x95000000, /* stdmp r0 */ +0x0800007a, /* call &mma */ +0x95800000, /* lddmp r0 */ +/* ) */ +0x97800100, /* ldrfp r1 */ +0x95800000, /* lddmp r0 */ +0x0600c001, /* loop *6 ( */ +0x90740800, /* st *0++, *5++ */ +/* ) */ +0x97800100, /* ldrfp r1 */ +0x95800000, /* lddmp r0 */ +0x0c000000, /* ret */ /* } */ -/* @0x10e: function modexp_blinded[76] { */ -#define CF_modexp_blinded_adr 270 - 0x0800008d, /* call &mulx */ - 0x84004060, /* ldi r0, [#3] */ - 0x95800000, /* lddmp r0 */ - 0x54084200, /* sub r2, r2, r2 */ - 0x0600c004, /* loop *6 ( */ - 0xfc000000, /* nop */ - 0x8c0c1800, /* ld *3, *0++ */ - 0x54885f00, /* subb r2, r31, r2 */ - 0x90740300, /* st *3, *5++ */ - /* ) */ - 0xfc000000, /* nop */ - 0x8c0c1900, /* ld *3, *1++ */ - 0x8c0c1100, /* ld *3, *1 */ - 0x521c5f90, /* addx r7, r31, r2 >> 128 */ - 0x590c4200, /* mul128 r3, r2l, r2u */ - 0x7c181f00, /* mov r6, r31 */ - 0x0600c011, /* loop *6 ( */ - 0x99080000, /* strnd r2 */ - 0x8c0c1400, /* ld *3, *4 */ - 0x58106200, /* mul128 r4, r2l, r3l */ - 0x59946200, /* mul128 r5, r2u, r3u */ - 0x58806200, /* mul128 r0, r2u, r3l */ - 0x50100410, /* add r4, r4, r0 << 128 */ - 0x50940590, /* addc r5, r5, r0 >> 128 */ - 0x59006200, /* mul128 r0, r2l, r3u */ - 0x50100410, /* add r4, r4, r0 << 128 */ - 0x50940590, /* addc r5, r5, r0 >> 128 */ - 0x5010c400, /* add r4, r4, r6 */ - 0x5097e500, /* addc r5, r5, r31 */ - 0x50088200, /* add r2, r2, r4 */ - 0x509be500, /* addc r6, r5, r31 */ - 0x5688e200, /* subbx r2, r2, r7 */ - 0x90700300, /* st *3, *4++ */ - 0x541ce700, /* sub r7, r7, r7 */ - /* ) */ - 0x7c080600, /* mov r2, r6 */ - 0x5688e200, /* subbx r2, r2, r7 */ - 0x90500300, /* st *3, *4 */ - 0xfc000000, /* nop */ - 0x84004060, /* ldi r0, [#3] */ - 0x7c081f00, /* mov r2, r31 */ - 0x8008ffff, /* movi r2.0l, #65535 */ - 0x400c0298, /* and r3, r2, r0 >> 192 */ - 0x48084000, /* not r2, r2 */ - 0x40080200, /* and r2, r2, r0 */ - 0x510c0301, /* addi r3, r3, #1 */ - 0x50086201, /* add r2, r2, r3 << 8 */ - 0x94800200, /* ldlc r2 */ - 0x06000019, /* loop *0 ( */ - 0x080000c2, /* call &sqrx_exp */ - 0x080000d5, /* call &mulx_exp */ - 0x84004060, /* ldi r0, [#3] */ - 0x95800000, /* lddmp r0 */ - 0x99080000, /* strnd r2 */ - 0x54084200, /* sub r2, r2, r2 */ - 0x0600c004, /* loop *6 ( */ - 0x99080000, /* strnd r2 */ - 0x8c0c1400, /* ld *3, *4 */ - 0x50884200, /* addc r2, r2, r2 */ - 0x90700300, /* st *3, *4++ */ - /* ) */ - 0x99080000, /* strnd r2 */ - 0x8c0c1400, /* ld *3, *4 */ - 0x50884200, /* addc r2, r2, r2 */ - 0x90700300, /* st *3, *4++ */ - 0x0600c008, /* loop *6 ( */ - 0x99080000, /* strnd r2 */ - 0x8c041500, /* ld *1, *5 */ - 0x90540300, /* st *3, *5 */ - 0x7c8c0800, /* ldr *3, *0++ */ - 0x7c000200, /* mov r0, r2 */ - 0x99080000, /* strnd r2 */ - 0x64086008, /* selc r2, r0, r3 */ - 0x90740300, /* st *3, *5++ */ - /* ) */ - 0xfc000000, /* nop */ - /* ) */ - 0x84004060, /* ldi r0, [#3] */ - 0x95800000, /* lddmp r0 */ - 0x080000a0, /* call &mul1_exp */ - 0x0c000000, /* ret */ +/* @0x103: function mulx_exp[14] { */ +#define CF_mulx_exp_adr 259 +0x84004040, /* ldi r0, [#2] */ +0x95800000, /* lddmp r0 */ +0x8c041100, /* ld *1, *1 */ +0x7c081f00, /* mov r2, r31 */ +0x0600c001, /* loop *6 ( */ +0x7ca80300, /* ldr *2++, *3 */ +/* ) */ +0x97800100, /* ldrfp r1 */ +0x0600c004, /* loop *6 ( */ +0x8c0c1c00, /* ld *3, *4++ */ +0x95000000, /* stdmp r0 */ +0x0800007a, /* call &mma */ +0x95800000, /* lddmp r0 */ +/* ) */ +0x97800100, /* ldrfp r1 */ +0x0c000000, /* ret */ /* } */ -/* @0x15a: function modload[12] { */ -#define CF_modload_adr 346 - 0x4c7fff00, /* xor r31, r31, r31 */ - 0x84004000, /* ldi r0, [#0] */ - 0x95800000, /* lddmp r0 */ - 0x94800000, /* ldlc r0 */ - 0x8000001c, /* movi r0.0l, #28 */ - 0x8080001d, /* movi r0.1l, #29 */ - 0x97800000, /* ldrfp r0 */ - 0x8c001000, /* ld *0, *0 */ - 0x08000001, /* call &d0inv */ - 0x90440100, /* st *1, *1 */ - 0x08000019, /* call &computeRR */ - 0x0c000000, /* ret */ - /* } */ +/* @0x111: function selOutOrC[30] { */ +#define CF_selOutOrC_adr 273 +0x990c0000, /* strnd r3 */ +0x440c6300, /* or r3, r3, r3 */ +0x10001122, /* bl selOutOrC_invsel */ +0x508c6302, /* addc r3, r3, r3 << 16 */ +0x0600c00a, /* loop *6 ( */ +0x990c0000, /* strnd r3 */ +0x99080000, /* strnd r2 */ +0x8c041500, /* ld *1, *5 */ +0x90540300, /* st *3, *5 */ +0x7c8c0800, /* ldr *3, *0++ */ +0x99000000, /* strnd r0 */ +0x7c000200, /* mov r0, r2 */ +0x99080000, /* strnd r2 */ +0x64086001, /* sell r2, r0, r3 */ +0x90740300, /* st *3, *5++ */ +/* ) */ +0x0c000000, /* ret */ +0xfc000000, /* nop */ +/*selOutOrC_invsel: */ +0x508c6302, /* addc r3, r3, r3 << 16 */ +0x0600c00a, /* loop *6 ( */ +0x990c0000, /* strnd r3 */ +0x99080000, /* strnd r2 */ +0x8c041500, /* ld *1, *5 */ +0x90540300, /* st *3, *5 */ +0x7c8c0800, /* ldr *3, *0++ */ +0x99000000, /* strnd r0 */ +0x7c000200, /* mov r0, r2 */ +0x99080000, /* strnd r2 */ +0x64080301, /* sell r2, r3, r0 */ +0x90740300, /* st *3, *5++ */ +/* ) */ +0x0c000000, /* ret */ +/* } */ +/* @0x12f: function modexp[35] { */ +#define CF_modexp_adr 303 +0x080000ac, /* call &mulx */ +0x84004060, /* ldi r0, [#3] */ +0x95800000, /* lddmp r0 */ +0x54084200, /* sub r2, r2, r2 */ +0x0600c004, /* loop *6 ( */ +0xfc000000, /* nop */ +0x8c0c1800, /* ld *3, *0++ */ +0x54885f00, /* subb r2, r31, r2 */ +0x90740300, /* st *3, *5++ */ +/* ) */ +0xfc000000, /* nop */ +0x7c081f00, /* mov r2, r31 */ +0x8008ffff, /* movi r2.0l, #65535 */ +0x400c0298, /* and r3, r2, r0 >> 192 */ +0x48084000, /* not r2, r2 */ +0x40080200, /* and r2, r2, r0 */ +0x50086201, /* add r2, r2, r3 << 8 */ +0x94800200, /* ldlc r2 */ +0x0600000d, /* loop *0 ( */ +0x080000f0, /* call &sqrx_exp */ +0x08000103, /* call &mulx_exp */ +0x84004060, /* ldi r0, [#3] */ +0x95800000, /* lddmp r0 */ +0x99080000, /* strnd r2 */ +0x50084200, /* add r2, r2, r2 */ +0x0600c004, /* loop *6 ( */ +0x99080000, /* strnd r2 */ +0x8c0c1400, /* ld *3, *4 */ +0x50884200, /* addc r2, r2, r2 */ +0x90700300, /* st *3, *4++ */ +/* ) */ +0x08000111, /* call &selOutOrC */ +0xfc000000, /* nop */ +/* ) */ +0x84004060, /* ldi r0, [#3] */ +0x95800000, /* lddmp r0 */ +0x080000d5, /* call &mul1_exp */ +0x0c000000, /* ret */ +/* } */ +/* @0x152: function modexp_blinded[76] { */ +#define CF_modexp_blinded_adr 338 +0x080000ac, /* call &mulx */ +0x84004060, /* ldi r0, [#3] */ +0x95800000, /* lddmp r0 */ +0x54084200, /* sub r2, r2, r2 */ +0x0600c004, /* loop *6 ( */ +0xfc000000, /* nop */ +0x8c0c1800, /* ld *3, *0++ */ +0x54885f00, /* subb r2, r31, r2 */ +0x90740300, /* st *3, *5++ */ +/* ) */ +0xfc000000, /* nop */ +0x8c0c1900, /* ld *3, *1++ */ +0x8c0c1100, /* ld *3, *1 */ +0x521c5f90, /* addx r7, r31, r2 >> 128 */ +0x590c4200, /* mul128 r3, r2l, r2u */ +0x7c181f00, /* mov r6, r31 */ +0x0600c011, /* loop *6 ( */ +0x99080000, /* strnd r2 */ +0x8c0c1400, /* ld *3, *4 */ +0x58106200, /* mul128 r4, r2l, r3l */ +0x59946200, /* mul128 r5, r2u, r3u */ +0x58806200, /* mul128 r0, r2u, r3l */ +0x50100410, /* add r4, r4, r0 << 128 */ +0x50940590, /* addc r5, r5, r0 >> 128 */ +0x59006200, /* mul128 r0, r2l, r3u */ +0x50100410, /* add r4, r4, r0 << 128 */ +0x50940590, /* addc r5, r5, r0 >> 128 */ +0x5010c400, /* add r4, r4, r6 */ +0x5097e500, /* addc r5, r5, r31 */ +0x50088200, /* add r2, r2, r4 */ +0x509be500, /* addc r6, r5, r31 */ +0x5688e200, /* subbx r2, r2, r7 */ +0x90700300, /* st *3, *4++ */ +0x541ce700, /* sub r7, r7, r7 */ +/* ) */ +0x7c080600, /* mov r2, r6 */ +0x5688e200, /* subbx r2, r2, r7 */ +0x90500300, /* st *3, *4 */ +0xfc000000, /* nop */ +0x84004060, /* ldi r0, [#3] */ +0x7c081f00, /* mov r2, r31 */ +0x8008ffff, /* movi r2.0l, #65535 */ +0x400c0298, /* and r3, r2, r0 >> 192 */ +0x48084000, /* not r2, r2 */ +0x40080200, /* and r2, r2, r0 */ +0x510c0301, /* addi r3, r3, #1 */ +0x50086201, /* add r2, r2, r3 << 8 */ +0x94800200, /* ldlc r2 */ +0x06000019, /* loop *0 ( */ +0x080000f0, /* call &sqrx_exp */ +0x08000103, /* call &mulx_exp */ +0x84004060, /* ldi r0, [#3] */ +0x95800000, /* lddmp r0 */ +0x99080000, /* strnd r2 */ +0x54084200, /* sub r2, r2, r2 */ +0x0600c004, /* loop *6 ( */ +0x99080000, /* strnd r2 */ +0x8c0c1400, /* ld *3, *4 */ +0x50884200, /* addc r2, r2, r2 */ +0x90700300, /* st *3, *4++ */ +/* ) */ +0x99080000, /* strnd r2 */ +0x8c0c1400, /* ld *3, *4 */ +0x50884200, /* addc r2, r2, r2 */ +0x90700300, /* st *3, *4++ */ +0x0600c008, /* loop *6 ( */ +0x99080000, /* strnd r2 */ +0x8c041500, /* ld *1, *5 */ +0x90540300, /* st *3, *5 */ +0x7c8c0800, /* ldr *3, *0++ */ +0x7c000200, /* mov r0, r2 */ +0x99080000, /* strnd r2 */ +0x64086008, /* selc r2, r0, r3 */ +0x90740300, /* st *3, *5++ */ +/* ) */ +0xfc000000, /* nop */ +/* ) */ +0x84004060, /* ldi r0, [#3] */ +0x95800000, /* lddmp r0 */ +0x080000d5, /* call &mul1_exp */ +0x0c000000, /* ret */ +/* } */ +/* @0x19e: function modload[12] { */ +#define CF_modload_adr 414 +0x4c7fff00, /* xor r31, r31, r31 */ +0x84004000, /* ldi r0, [#0] */ +0x95800000, /* lddmp r0 */ +0x94800000, /* ldlc r0 */ +0x8000001c, /* movi r0.0l, #28 */ +0x8080001d, /* movi r0.1l, #29 */ +0x97800000, /* ldrfp r0 */ +0x8c001000, /* ld *0, *0 */ +0x08000001, /* call &d0inv */ +0x90440100, /* st *1, *1 */ +0x08000028, /* call &computeRR */ +0x0c000000, /* ret */ +/* } */ +#ifdef CONFIG_DCRYPTO_RSA_SPEEDUP +/* @0x1aa: function selA0orC4[16] { */ +#define CF_selA0orC4_adr 426 +0x99000000, /* strnd r0 */ +0x44000000, /* or r0, r0, r0 */ +0x100011b4, /* bl selA0orC4_invsel */ +0x50840002, /* addc r1, r0, r0 << 16 */ +0x6458da01, /* sell r22, r26, r6 */ +0x645cfb01, /* sell r23, r27, r7 */ +0x64611c01, /* sell r24, r28, r8 */ +0x64653d01, /* sell r25, r29, r9 */ +0x0c000000, /* ret */ +0xfc000000, /* nop */ +/*selA0orC4_invsel: */ +0x50840002, /* addc r1, r0, r0 << 16 */ +0x645b4601, /* sell r22, r6, r26 */ +0x645f6701, /* sell r23, r7, r27 */ +0x64638801, /* sell r24, r8, r28 */ +0x6467a901, /* sell r25, r9, r29 */ +0x0c000000, /* ret */ +/* } */ +/* @0x1ba: function mul4[169] { */ +#define CF_mul4_adr 442 +0x58594600, /* mul128 r22, r6l, r10l */ +0x59dd4600, /* mul128 r23, r6u, r10u */ +0x58894600, /* mul128 r2, r6u, r10l */ +0x50585610, /* add r22, r22, r2 << 128 */ +0x50dc5790, /* addc r23, r23, r2 >> 128 */ +0x59094600, /* mul128 r2, r6l, r10u */ +0x50585610, /* add r22, r22, r2 << 128 */ +0x50dc5790, /* addc r23, r23, r2 >> 128 */ +0x58616700, /* mul128 r24, r7l, r11l */ +0x59e56700, /* mul128 r25, r7u, r11u */ +0x58896700, /* mul128 r2, r7u, r11l */ +0x50605810, /* add r24, r24, r2 << 128 */ +0x50e45990, /* addc r25, r25, r2 >> 128 */ +0x59096700, /* mul128 r2, r7l, r11u */ +0x50605810, /* add r24, r24, r2 << 128 */ +0x50e45990, /* addc r25, r25, r2 >> 128 */ +0x58698800, /* mul128 r26, r8l, r12l */ +0x59ed8800, /* mul128 r27, r8u, r12u */ +0x58898800, /* mul128 r2, r8u, r12l */ +0x50685a10, /* add r26, r26, r2 << 128 */ +0x50ec5b90, /* addc r27, r27, r2 >> 128 */ +0x59098800, /* mul128 r2, r8l, r12u */ +0x50685a10, /* add r26, r26, r2 << 128 */ +0x50ec5b90, /* addc r27, r27, r2 >> 128 */ +0x5871a900, /* mul128 r28, r9l, r13l */ +0x59f5a900, /* mul128 r29, r9u, r13u */ +0x5889a900, /* mul128 r2, r9u, r13l */ +0x50705c10, /* add r28, r28, r2 << 128 */ +0x50f45d90, /* addc r29, r29, r2 >> 128 */ +0x5909a900, /* mul128 r2, r9l, r13u */ +0x50705c10, /* add r28, r28, r2 << 128 */ +0x50f45d90, /* addc r29, r29, r2 >> 128 */ +0x58016600, /* mul128 r0, r6l, r11l */ +0x59856600, /* mul128 r1, r6u, r11u */ +0x58896600, /* mul128 r2, r6u, r11l */ +0x50004010, /* add r0, r0, r2 << 128 */ +0x50844190, /* addc r1, r1, r2 >> 128 */ +0x59096600, /* mul128 r2, r6l, r11u */ +0x50004010, /* add r0, r0, r2 << 128 */ +0x50844190, /* addc r1, r1, r2 >> 128 */ +0x505c1700, /* add r23, r23, r0 */ +0x50e03800, /* addc r24, r24, r1 */ +0x508fff00, /* addc r3, r31, r31 */ +0x58014700, /* mul128 r0, r7l, r10l */ +0x59854700, /* mul128 r1, r7u, r10u */ +0x58894700, /* mul128 r2, r7u, r10l */ +0x50004010, /* add r0, r0, r2 << 128 */ +0x50844190, /* addc r1, r1, r2 >> 128 */ +0x59094700, /* mul128 r2, r7l, r10u */ +0x50004010, /* add r0, r0, r2 << 128 */ +0x50844190, /* addc r1, r1, r2 >> 128 */ +0x505c1700, /* add r23, r23, r0 */ +0x50e03800, /* addc r24, r24, r1 */ +0x50e47900, /* addc r25, r25, r3 */ +0x508fff00, /* addc r3, r31, r31 */ +0x58018600, /* mul128 r0, r6l, r12l */ +0x59858600, /* mul128 r1, r6u, r12u */ +0x58898600, /* mul128 r2, r6u, r12l */ +0x50004010, /* add r0, r0, r2 << 128 */ +0x50844190, /* addc r1, r1, r2 >> 128 */ +0x59098600, /* mul128 r2, r6l, r12u */ +0x50004010, /* add r0, r0, r2 << 128 */ +0x50844190, /* addc r1, r1, r2 >> 128 */ +0x50601800, /* add r24, r24, r0 */ +0x50e43900, /* addc r25, r25, r1 */ +0x508fe300, /* addc r3, r3, r31 */ +0x58014800, /* mul128 r0, r8l, r10l */ +0x59854800, /* mul128 r1, r8u, r10u */ +0x58894800, /* mul128 r2, r8u, r10l */ +0x50004010, /* add r0, r0, r2 << 128 */ +0x50844190, /* addc r1, r1, r2 >> 128 */ +0x59094800, /* mul128 r2, r8l, r10u */ +0x50004010, /* add r0, r0, r2 << 128 */ +0x50844190, /* addc r1, r1, r2 >> 128 */ +0x50601800, /* add r24, r24, r0 */ +0x50e43900, /* addc r25, r25, r1 */ +0x50e87a00, /* addc r26, r26, r3 */ +0x508fff00, /* addc r3, r31, r31 */ +0x5801a600, /* mul128 r0, r6l, r13l */ +0x5985a600, /* mul128 r1, r6u, r13u */ +0x5889a600, /* mul128 r2, r6u, r13l */ +0x50004010, /* add r0, r0, r2 << 128 */ +0x50844190, /* addc r1, r1, r2 >> 128 */ +0x5909a600, /* mul128 r2, r6l, r13u */ +0x50004010, /* add r0, r0, r2 << 128 */ +0x50844190, /* addc r1, r1, r2 >> 128 */ +0x50641900, /* add r25, r25, r0 */ +0x50e83a00, /* addc r26, r26, r1 */ +0x508fe300, /* addc r3, r3, r31 */ +0x58018700, /* mul128 r0, r7l, r12l */ +0x59858700, /* mul128 r1, r7u, r12u */ +0x58898700, /* mul128 r2, r7u, r12l */ +0x50004010, /* add r0, r0, r2 << 128 */ +0x50844190, /* addc r1, r1, r2 >> 128 */ +0x59098700, /* mul128 r2, r7l, r12u */ +0x50004010, /* add r0, r0, r2 << 128 */ +0x50844190, /* addc r1, r1, r2 >> 128 */ +0x50641900, /* add r25, r25, r0 */ +0x50e83a00, /* addc r26, r26, r1 */ +0x508fe300, /* addc r3, r3, r31 */ +0x58014900, /* mul128 r0, r9l, r10l */ +0x59854900, /* mul128 r1, r9u, r10u */ +0x58894900, /* mul128 r2, r9u, r10l */ +0x50004010, /* add r0, r0, r2 << 128 */ +0x50844190, /* addc r1, r1, r2 >> 128 */ +0x59094900, /* mul128 r2, r9l, r10u */ +0x50004010, /* add r0, r0, r2 << 128 */ +0x50844190, /* addc r1, r1, r2 >> 128 */ +0x50641900, /* add r25, r25, r0 */ +0x50e83a00, /* addc r26, r26, r1 */ +0x508fe300, /* addc r3, r3, r31 */ +0x58016800, /* mul128 r0, r8l, r11l */ +0x59856800, /* mul128 r1, r8u, r11u */ +0x58896800, /* mul128 r2, r8u, r11l */ +0x50004010, /* add r0, r0, r2 << 128 */ +0x50844190, /* addc r1, r1, r2 >> 128 */ +0x59096800, /* mul128 r2, r8l, r11u */ +0x50004010, /* add r0, r0, r2 << 128 */ +0x50844190, /* addc r1, r1, r2 >> 128 */ +0x50641900, /* add r25, r25, r0 */ +0x50e83a00, /* addc r26, r26, r1 */ +0x50ec7b00, /* addc r27, r27, r3 */ +0x508fff00, /* addc r3, r31, r31 */ +0x5801a700, /* mul128 r0, r7l, r13l */ +0x5985a700, /* mul128 r1, r7u, r13u */ +0x5889a700, /* mul128 r2, r7u, r13l */ +0x50004010, /* add r0, r0, r2 << 128 */ +0x50844190, /* addc r1, r1, r2 >> 128 */ +0x5909a700, /* mul128 r2, r7l, r13u */ +0x50004010, /* add r0, r0, r2 << 128 */ +0x50844190, /* addc r1, r1, r2 >> 128 */ +0x50681a00, /* add r26, r26, r0 */ +0x50ec3b00, /* addc r27, r27, r1 */ +0x508fe300, /* addc r3, r3, r31 */ +0x58016900, /* mul128 r0, r9l, r11l */ +0x59856900, /* mul128 r1, r9u, r11u */ +0x58896900, /* mul128 r2, r9u, r11l */ +0x50004010, /* add r0, r0, r2 << 128 */ +0x50844190, /* addc r1, r1, r2 >> 128 */ +0x59096900, /* mul128 r2, r9l, r11u */ +0x50004010, /* add r0, r0, r2 << 128 */ +0x50844190, /* addc r1, r1, r2 >> 128 */ +0x50681a00, /* add r26, r26, r0 */ +0x50ec3b00, /* addc r27, r27, r1 */ +0x50f07c00, /* addc r28, r28, r3 */ +0x50f7fd00, /* addc r29, r29, r31 */ +0x5801a800, /* mul128 r0, r8l, r13l */ +0x5985a800, /* mul128 r1, r8u, r13u */ +0x5889a800, /* mul128 r2, r8u, r13l */ +0x50004010, /* add r0, r0, r2 << 128 */ +0x50844190, /* addc r1, r1, r2 >> 128 */ +0x5909a800, /* mul128 r2, r8l, r13u */ +0x50004010, /* add r0, r0, r2 << 128 */ +0x50844190, /* addc r1, r1, r2 >> 128 */ +0x506c1b00, /* add r27, r27, r0 */ +0x50f03c00, /* addc r28, r28, r1 */ +0x50f7fd00, /* addc r29, r29, r31 */ +0x58018900, /* mul128 r0, r9l, r12l */ +0x59858900, /* mul128 r1, r9u, r12u */ +0x58898900, /* mul128 r2, r9u, r12l */ +0x50004010, /* add r0, r0, r2 << 128 */ +0x50844190, /* addc r1, r1, r2 >> 128 */ +0x59098900, /* mul128 r2, r9l, r12u */ +0x50004010, /* add r0, r0, r2 << 128 */ +0x50844190, /* addc r1, r1, r2 >> 128 */ +0x506c1b00, /* add r27, r27, r0 */ +0x50f03c00, /* addc r28, r28, r1 */ +0x50f7fd00, /* addc r29, r29, r31 */ +0x0c000000, /* ret */ +/* } */ +/* @0x263: function sqr4[117] { */ +#define CF_sqr4_adr 611 +0x5858c600, /* mul128 r22, r6l, r6l */ +0x59dcc600, /* mul128 r23, r6u, r6u */ +0x5888c600, /* mul128 r2, r6u, r6l */ +0x50585610, /* add r22, r22, r2 << 128 */ +0x50dc5790, /* addc r23, r23, r2 >> 128 */ +0x50585610, /* add r22, r22, r2 << 128 */ +0x50dc5790, /* addc r23, r23, r2 >> 128 */ +0x5860e700, /* mul128 r24, r7l, r7l */ +0x59e4e700, /* mul128 r25, r7u, r7u */ +0x5888e700, /* mul128 r2, r7u, r7l */ +0x50605810, /* add r24, r24, r2 << 128 */ +0x50e45990, /* addc r25, r25, r2 >> 128 */ +0x50605810, /* add r24, r24, r2 << 128 */ +0x50e45990, /* addc r25, r25, r2 >> 128 */ +0x58690800, /* mul128 r26, r8l, r8l */ +0x59ed0800, /* mul128 r27, r8u, r8u */ +0x58890800, /* mul128 r2, r8u, r8l */ +0x50685a10, /* add r26, r26, r2 << 128 */ +0x50ec5b90, /* addc r27, r27, r2 >> 128 */ +0x50685a10, /* add r26, r26, r2 << 128 */ +0x50ec5b90, /* addc r27, r27, r2 >> 128 */ +0x58712900, /* mul128 r28, r9l, r9l */ +0x59f52900, /* mul128 r29, r9u, r9u */ +0x58892900, /* mul128 r2, r9u, r9l */ +0x50705c10, /* add r28, r28, r2 << 128 */ +0x50f45d90, /* addc r29, r29, r2 >> 128 */ +0x50705c10, /* add r28, r28, r2 << 128 */ +0x50f45d90, /* addc r29, r29, r2 >> 128 */ +0x5800e600, /* mul128 r0, r6l, r7l */ +0x5984e600, /* mul128 r1, r6u, r7u */ +0x5888e600, /* mul128 r2, r6u, r7l */ +0x50004010, /* add r0, r0, r2 << 128 */ +0x50844190, /* addc r1, r1, r2 >> 128 */ +0x5908e600, /* mul128 r2, r6l, r7u */ +0x50004010, /* add r0, r0, r2 << 128 */ +0x50844190, /* addc r1, r1, r2 >> 128 */ +0x505c1700, /* add r23, r23, r0 */ +0x50e03800, /* addc r24, r24, r1 */ +0x508fff00, /* addc r3, r31, r31 */ +0x505c1700, /* add r23, r23, r0 */ +0x50e03800, /* addc r24, r24, r1 */ +0x50e47900, /* addc r25, r25, r3 */ +0x508fff00, /* addc r3, r31, r31 */ +0x58010600, /* mul128 r0, r6l, r8l */ +0x59850600, /* mul128 r1, r6u, r8u */ +0x58890600, /* mul128 r2, r6u, r8l */ +0x50004010, /* add r0, r0, r2 << 128 */ +0x50844190, /* addc r1, r1, r2 >> 128 */ +0x59090600, /* mul128 r2, r6l, r8u */ +0x50004010, /* add r0, r0, r2 << 128 */ +0x50844190, /* addc r1, r1, r2 >> 128 */ +0x50601800, /* add r24, r24, r0 */ +0x50e43900, /* addc r25, r25, r1 */ +0x508fe300, /* addc r3, r3, r31 */ +0x50601800, /* add r24, r24, r0 */ +0x50e43900, /* addc r25, r25, r1 */ +0x50e87a00, /* addc r26, r26, r3 */ +0x508fff00, /* addc r3, r31, r31 */ +0x58012600, /* mul128 r0, r6l, r9l */ +0x59852600, /* mul128 r1, r6u, r9u */ +0x58892600, /* mul128 r2, r6u, r9l */ +0x50004010, /* add r0, r0, r2 << 128 */ +0x50844190, /* addc r1, r1, r2 >> 128 */ +0x59092600, /* mul128 r2, r6l, r9u */ +0x50004010, /* add r0, r0, r2 << 128 */ +0x50844190, /* addc r1, r1, r2 >> 128 */ +0x50641900, /* add r25, r25, r0 */ +0x50e83a00, /* addc r26, r26, r1 */ +0x508fe300, /* addc r3, r3, r31 */ +0x50641900, /* add r25, r25, r0 */ +0x50e83a00, /* addc r26, r26, r1 */ +0x508fe300, /* addc r3, r3, r31 */ +0x58010700, /* mul128 r0, r7l, r8l */ +0x59850700, /* mul128 r1, r7u, r8u */ +0x58890700, /* mul128 r2, r7u, r8l */ +0x50004010, /* add r0, r0, r2 << 128 */ +0x50844190, /* addc r1, r1, r2 >> 128 */ +0x59090700, /* mul128 r2, r7l, r8u */ +0x50004010, /* add r0, r0, r2 << 128 */ +0x50844190, /* addc r1, r1, r2 >> 128 */ +0x50641900, /* add r25, r25, r0 */ +0x50e83a00, /* addc r26, r26, r1 */ +0x508fe300, /* addc r3, r3, r31 */ +0x50641900, /* add r25, r25, r0 */ +0x50e83a00, /* addc r26, r26, r1 */ +0x50ec7b00, /* addc r27, r27, r3 */ +0x508fff00, /* addc r3, r31, r31 */ +0x58012700, /* mul128 r0, r7l, r9l */ +0x59852700, /* mul128 r1, r7u, r9u */ +0x58892700, /* mul128 r2, r7u, r9l */ +0x50004010, /* add r0, r0, r2 << 128 */ +0x50844190, /* addc r1, r1, r2 >> 128 */ +0x59092700, /* mul128 r2, r7l, r9u */ +0x50004010, /* add r0, r0, r2 << 128 */ +0x50844190, /* addc r1, r1, r2 >> 128 */ +0x50681a00, /* add r26, r26, r0 */ +0x50ec3b00, /* addc r27, r27, r1 */ +0x508fe300, /* addc r3, r3, r31 */ +0x50681a00, /* add r26, r26, r0 */ +0x50ec3b00, /* addc r27, r27, r1 */ +0x50f07c00, /* addc r28, r28, r3 */ +0x50f7fd00, /* addc r29, r29, r31 */ +0x58012800, /* mul128 r0, r8l, r9l */ +0x59852800, /* mul128 r1, r8u, r9u */ +0x58892800, /* mul128 r2, r8u, r9l */ +0x50004010, /* add r0, r0, r2 << 128 */ +0x50844190, /* addc r1, r1, r2 >> 128 */ +0x59092800, /* mul128 r2, r8l, r9u */ +0x50004010, /* add r0, r0, r2 << 128 */ +0x50844190, /* addc r1, r1, r2 >> 128 */ +0x506c1b00, /* add r27, r27, r0 */ +0x50f03c00, /* addc r28, r28, r1 */ +0x50f7fd00, /* addc r29, r29, r31 */ +0x506c1b00, /* add r27, r27, r0 */ +0x50f03c00, /* addc r28, r28, r1 */ +0x50f7fd00, /* addc r29, r29, r31 */ +0x0c000000, /* ret */ +/* } */ +/* @0x2d8: function dod0[15] { */ +#define CF_dod0_adr 728 +0x8c0c1100, /* ld *3, *1 */ +0x58140100, /* mul128 r5, r1l, r0l */ +0x58880100, /* mul128 r2, r1u, r0l */ +0x50144510, /* add r5, r5, r2 << 128 */ +0x59080100, /* mul128 r2, r1l, r0u */ +0x50144510, /* add r5, r5, r2 << 128 */ +0x5801c500, /* mul128 r0, r5l, r14l */ +0x5985c500, /* mul128 r1, r5u, r14u */ +0x5889c500, /* mul128 r2, r5u, r14l */ +0x50004010, /* add r0, r0, r2 << 128 */ +0x50844190, /* addc r1, r1, r2 >> 128 */ +0x5909c500, /* mul128 r2, r5l, r14u */ +0x50004010, /* add r0, r0, r2 << 128 */ +0x50844190, /* addc r1, r1, r2 >> 128 */ +0x0c000000, /* ret */ +/* } */ +/* @0x2e7: function dod1[9] { */ +#define CF_dod1_adr 743 +0x5801e500, /* mul128 r0, r5l, r15l */ +0x5985e500, /* mul128 r1, r5u, r15u */ +0x5889e500, /* mul128 r2, r5u, r15l */ +0x50004010, /* add r0, r0, r2 << 128 */ +0x50844190, /* addc r1, r1, r2 >> 128 */ +0x5909e500, /* mul128 r2, r5l, r15u */ +0x50004010, /* add r0, r0, r2 << 128 */ +0x50844190, /* addc r1, r1, r2 >> 128 */ +0x0c000000, /* ret */ +/* } */ +/* @0x2f0: function dod2[9] { */ +#define CF_dod2_adr 752 +0x58020500, /* mul128 r0, r5l, r16l */ +0x59860500, /* mul128 r1, r5u, r16u */ +0x588a0500, /* mul128 r2, r5u, r16l */ +0x50004010, /* add r0, r0, r2 << 128 */ +0x50844190, /* addc r1, r1, r2 >> 128 */ +0x590a0500, /* mul128 r2, r5l, r16u */ +0x50004010, /* add r0, r0, r2 << 128 */ +0x50844190, /* addc r1, r1, r2 >> 128 */ +0x0c000000, /* ret */ +/* } */ +/* @0x2f9: function dod3[9] { */ +#define CF_dod3_adr 761 +0x58022500, /* mul128 r0, r5l, r17l */ +0x59862500, /* mul128 r1, r5u, r17u */ +0x588a2500, /* mul128 r2, r5u, r17l */ +0x50004010, /* add r0, r0, r2 << 128 */ +0x50844190, /* addc r1, r1, r2 >> 128 */ +0x590a2500, /* mul128 r2, r5l, r17u */ +0x50004010, /* add r0, r0, r2 << 128 */ +0x50844190, /* addc r1, r1, r2 >> 128 */ +0x0c000000, /* ret */ +/* } */ +/* @0x302: function redc4[97] { */ +#define CF_redc4_adr 770 +0x7c001600, /* mov r0, r22 */ +0x080002d8, /* call &dod0 */ +0x50581600, /* add r22, r22, r0 */ +0x50dc3700, /* addc r23, r23, r1 */ +0x50e3f800, /* addc r24, r24, r31 */ +0x5093ff00, /* addc r4, r31, r31 */ +0x080002e7, /* call &dod1 */ +0x505c1700, /* add r23, r23, r0 */ +0x50e03800, /* addc r24, r24, r1 */ +0x50e49900, /* addc r25, r25, r4 */ +0x5093ff00, /* addc r4, r31, r31 */ +0x080002f0, /* call &dod2 */ +0x50601800, /* add r24, r24, r0 */ +0x50e43900, /* addc r25, r25, r1 */ +0x50e89a00, /* addc r26, r26, r4 */ +0x5093ff00, /* addc r4, r31, r31 */ +0x080002f9, /* call &dod3 */ +0x50641900, /* add r25, r25, r0 */ +0x50e83a00, /* addc r26, r26, r1 */ +0x50ec9b00, /* addc r27, r27, r4 */ +0x508fff00, /* addc r3, r31, r31 */ +0x7c001700, /* mov r0, r23 */ +0x080002d8, /* call &dod0 */ +0x505c1700, /* add r23, r23, r0 */ +0x50e03800, /* addc r24, r24, r1 */ +0x50e7f900, /* addc r25, r25, r31 */ +0x5093ff00, /* addc r4, r31, r31 */ +0x080002e7, /* call &dod1 */ +0x50601800, /* add r24, r24, r0 */ +0x50e43900, /* addc r25, r25, r1 */ +0x50e89a00, /* addc r26, r26, r4 */ +0x5093ff00, /* addc r4, r31, r31 */ +0x080002f0, /* call &dod2 */ +0x50641900, /* add r25, r25, r0 */ +0x50e83a00, /* addc r26, r26, r1 */ +0x50ec9b00, /* addc r27, r27, r4 */ +0x508fff00, /* addc r3, r31, r31 */ +0x080002f9, /* call &dod3 */ +0x50681a00, /* add r26, r26, r0 */ +0x50ec3b00, /* addc r27, r27, r1 */ +0x50f07c00, /* addc r28, r28, r3 */ +0x508fff00, /* addc r3, r31, r31 */ +0x7c001800, /* mov r0, r24 */ +0x080002d8, /* call &dod0 */ +0x50601800, /* add r24, r24, r0 */ +0x50e43900, /* addc r25, r25, r1 */ +0x50ebfa00, /* addc r26, r26, r31 */ +0x5093ff00, /* addc r4, r31, r31 */ +0x080002e7, /* call &dod1 */ +0x50641900, /* add r25, r25, r0 */ +0x50e83a00, /* addc r26, r26, r1 */ +0x50ec9b00, /* addc r27, r27, r4 */ +0x5093ff00, /* addc r4, r31, r31 */ +0x080002f0, /* call &dod2 */ +0x50681a00, /* add r26, r26, r0 */ +0x50ec3b00, /* addc r27, r27, r1 */ +0x50f09c00, /* addc r28, r28, r4 */ +0x5093e300, /* addc r4, r3, r31 */ +0x080002f9, /* call &dod3 */ +0x506c1b00, /* add r27, r27, r0 */ +0x50f03c00, /* addc r28, r28, r1 */ +0x50f49d00, /* addc r29, r29, r4 */ +0x508fff00, /* addc r3, r31, r31 */ +0x7c001900, /* mov r0, r25 */ +0x080002d8, /* call &dod0 */ +0x50641900, /* add r25, r25, r0 */ +0x50d83a00, /* addc r22, r26, r1 */ +0x50dffb00, /* addc r23, r27, r31 */ +0x5093ff00, /* addc r4, r31, r31 */ +0x080002e7, /* call &dod1 */ +0x50581600, /* add r22, r22, r0 */ +0x50dc3700, /* addc r23, r23, r1 */ +0x50e09c00, /* addc r24, r28, r4 */ +0x5093ff00, /* addc r4, r31, r31 */ +0x080002f0, /* call &dod2 */ +0x505c1700, /* add r23, r23, r0 */ +0x50e03800, /* addc r24, r24, r1 */ +0x50e49d00, /* addc r25, r29, r4 */ +0x508fe300, /* addc r3, r3, r31 */ +0x080002f9, /* call &dod3 */ +0x50601800, /* add r24, r24, r0 */ +0x50e43900, /* addc r25, r25, r1 */ +0x508fe300, /* addc r3, r3, r31 */ +0x56007f00, /* subx r0, r31, r3 */ +0x99680000, /* strnd r26 */ +0x996c0000, /* strnd r27 */ +0x99700000, /* strnd r28 */ +0x99740000, /* strnd r29 */ +0x5409d600, /* sub r2, r22, r14 */ +0x54e9f700, /* subb r26, r23, r15 */ +0x54ee1800, /* subb r27, r24, r16 */ +0x54f23900, /* subb r28, r25, r17 */ +0x66773c08, /* selcx r29, r28, r25 */ +0x66731b08, /* selcx r28, r27, r24 */ +0x666efa08, /* selcx r27, r26, r23 */ +0x666ac208, /* selcx r26, r2, r22 */ +0x0c000000, /* ret */ +/* } */ +/* @0x363: function modexp_1024[101] { */ +#define CF_modexp_1024_adr 867 +0x7c081f00, /* mov r2, r31 */ +0x80080006, /* movi r2.0l, #6 */ +0x8088000a, /* movi r2.1l, #10 */ +0x81880001, /* movi r2.3l, #1 */ +0x8208000e, /* movi r2.4l, #14 */ +0x82880016, /* movi r2.5l, #22 */ +0x83080012, /* movi r2.6l, #18 */ +0x97800200, /* ldrfp r2 */ +0x7c001f00, /* mov r0, r31 */ +0x8180ffff, /* movi r0.3l, #65535 */ +0x84044000, /* ldi r1, [#0] */ +0x40040100, /* and r1, r1, r0 */ +0x48000000, /* not r0, r0 */ +0x84084060, /* ldi r2, [#3] */ +0x40080200, /* and r2, r2, r0 */ +0x44082200, /* or r2, r2, r1 */ +0x95800200, /* lddmp r2 */ +0x05004004, /* loop #4 ( */ +0x8c201b00, /* ld *0++, *3++ */ +0x8c241a00, /* ld *1++, *2++ */ +0x8c301800, /* ld *4++, *0++ */ +0x8c381c00, /* ld *6++, *4++ */ +/* ) */ +0x99780000, /* strnd r30 */ +0x507bde00, /* add r30, r30, r30 */ +0x080001ba, /* call &mul4 */ +0x08000302, /* call &redc4 */ +0x7c281a00, /* mov r10, r26 */ +0x7c2c1b00, /* mov r11, r27 */ +0x7c301c00, /* mov r12, r28 */ +0x7c341d00, /* mov r13, r29 */ +0x99180000, /* strnd r6 */ +0x991c0000, /* strnd r7 */ +0x99200000, /* strnd r8 */ +0x99240000, /* strnd r9 */ +0x05400033, /* loop #1024 ( */ +0x08000263, /* call &sqr4 */ +0x08000302, /* call &redc4 */ +0x99180000, /* strnd r6 */ +0x991c0000, /* strnd r7 */ +0x99200000, /* strnd r8 */ +0x99240000, /* strnd r9 */ +0x7c181a00, /* mov r6, r26 */ +0x7c1c1b00, /* mov r7, r27 */ +0x7c201c00, /* mov r8, r28 */ +0x7c241d00, /* mov r9, r29 */ +0x080001ba, /* call &mul4 */ +0x08000302, /* call &redc4 */ +0x99000000, /* strnd r0 */ +0x5002b500, /* add r0, r21, r21 */ +0x99000000, /* strnd r0 */ +0x50825200, /* addc r0, r18, r18 */ +0x99480000, /* strnd r18 */ +0x7c480000, /* mov r18, r0 */ +0x99000000, /* strnd r0 */ +0x50827300, /* addc r0, r19, r19 */ +0x994c0000, /* strnd r19 */ +0x7c4c0000, /* mov r19, r0 */ +0x99000000, /* strnd r0 */ +0x50829400, /* addc r0, r20, r20 */ +0x99500000, /* strnd r20 */ +0x7c500000, /* mov r20, r0 */ +0x99000000, /* strnd r0 */ +0x5082b500, /* addc r0, r21, r21 */ +0x99540000, /* strnd r21 */ +0x7c540000, /* mov r21, r0 */ +0x99580000, /* strnd r22 */ +0x995c0000, /* strnd r23 */ +0x99600000, /* strnd r24 */ +0x99640000, /* strnd r25 */ +0x080001aa, /* call &selA0orC4 */ +0x99180000, /* strnd r6 */ +0x991c0000, /* strnd r7 */ +0x99200000, /* strnd r8 */ +0x99240000, /* strnd r9 */ +0x99000000, /* strnd r0 */ +0x50000000, /* add r0, r0, r0 */ +0x4c001e00, /* xor r0, r30, r0 */ +0x99780000, /* strnd r30 */ +0x507bde00, /* add r30, r30, r30 */ +0x4c781e00, /* xor r30, r30, r0 */ +0x447a5e00, /* or r30, r30, r18 */ +0x4c03c000, /* xor r0, r0, r30 */ +0x641aca01, /* sell r6, r10, r22 */ +0x641eeb01, /* sell r7, r11, r23 */ +0x64230c01, /* sell r8, r12, r24 */ +0x64272d01, /* sell r9, r13, r25 */ +/* ) */ +0x7c281f00, /* mov r10, r31 */ +0x80280001, /* movi r10.0l, #1 */ +0x7c2c1f00, /* mov r11, r31 */ +0x7c301f00, /* mov r12, r31 */ +0x7c341f00, /* mov r13, r31 */ +0x080001ba, /* call &mul4 */ +0x08000302, /* call &redc4 */ +0x5419da00, /* sub r6, r26, r14 */ +0x549dfb00, /* subb r7, r27, r15 */ +0x54a21c00, /* subb r8, r28, r16 */ +0x54a63d00, /* subb r9, r29, r17 */ +0x080001aa, /* call &selA0orC4 */ +0x05004001, /* loop #4 ( */ +0x90740d00, /* st *5++, *5++ */ +/* ) */ +0x0c000000, /* ret */ +/* } */ +#endif // CONFIG_DCRYPTO_RSA_SPEEDUP }; /* clang-format on */ @@ -487,7 +1147,7 @@ struct DMEM_ctx { #define DMEM_CELL_SIZE 32 #define DMEM_INDEX(p, f) \ - (((const uint8_t *) &(p)->f - (const uint8_t *) (p)) / DMEM_CELL_SIZE) + (((const uint8_t *)&(p)->f - (const uint8_t *)(p)) / DMEM_CELL_SIZE) /* Get non-0 64 bit random */ static void rand64(uint32_t dst[2]) @@ -503,11 +1163,11 @@ static int setup_and_lock(const struct LITE_BIGNUM *N, const struct LITE_BIGNUM *input) { struct DMEM_ctx *ctx = - (struct DMEM_ctx *) GREG32_ADDR(CRYPTO, DMEM_DUMMY); + (struct DMEM_ctx *)GREG32_ADDR(CRYPTO, DMEM_DUMMY); /* Initialize hardware; load code page. */ dcrypto_init_and_lock(); - dcrypto_imem_load(0, IMEM_dcrypto, ARRAY_SIZE(IMEM_dcrypto)); + dcrypto_imem_load(0, IMEM_dcrypto_bn, ARRAY_SIZE(IMEM_dcrypto_bn)); /* Setup DMEM pointers (as indices into DMEM which are 256-bit cells). */ @@ -566,6 +1226,11 @@ static int montout(struct DMEM_ctx *ctx, uint32_t pA, uint32_t pOut) modexp(ctx, CF_modexp_adr, DMEM_INDEX(ctx, RR), DMEM_INDEX(ctx, in), \ DMEM_INDEX(ctx, exp), DMEM_INDEX(ctx, out)) +#define MODEXP1024(ctx, in, exp, out) \ + modexp(ctx, CF_modexp_1024_adr, DMEM_INDEX(ctx, RR), \ + DMEM_INDEX(ctx, in), DMEM_INDEX(ctx, exp), \ + DMEM_INDEX(ctx, out)) + #define MODEXP_BLINDED(ctx, in, exp, out) \ modexp(ctx, CF_modexp_blinded_adr, DMEM_INDEX(ctx, RR), \ DMEM_INDEX(ctx, in), DMEM_INDEX(ctx, exp), \ @@ -605,7 +1270,7 @@ int dcrypto_modexp_blinded(struct LITE_BIGNUM *output, { int i, result; struct DMEM_ctx *ctx = - (struct DMEM_ctx *) GREG32_ADDR(CRYPTO, DMEM_DUMMY); + (struct DMEM_ctx *)GREG32_ADDR(CRYPTO, DMEM_DUMMY); uint32_t r_buf[RSA_MAX_WORDS]; uint32_t rinv_buf[RSA_MAX_WORDS]; @@ -676,7 +1341,7 @@ int dcrypto_modexp(struct LITE_BIGNUM *output, const struct LITE_BIGNUM *input, { int i, result; struct DMEM_ctx *ctx = - (struct DMEM_ctx *) GREG32_ADDR(CRYPTO, DMEM_DUMMY); + (struct DMEM_ctx *)GREG32_ADDR(CRYPTO, DMEM_DUMMY); result = setup_and_lock(N, input); @@ -686,7 +1351,15 @@ int dcrypto_modexp(struct LITE_BIGNUM *output, const struct LITE_BIGNUM *input, for (i = bn_words(exp); i < bn_words(N); ++i) ctx->exp[i] = 0; +#ifdef CONFIG_DCRYPTO_RSA_SPEEDUP + if (bn_bits(N) == 1024) { /* special code for 1024 bits */ + result |= MODEXP1024(ctx, in, exp, out); + } else { + result |= MODEXP(ctx, in, exp, out); + } +#else result |= MODEXP(ctx, in, exp, out); +#endif memcpy(output->d, ctx->out, bn_size(output)); @@ -703,7 +1376,7 @@ int dcrypto_modexp_word(struct LITE_BIGNUM *output, uint32_t e = exp; uint32_t b = 0x80000000; struct DMEM_ctx *ctx = - (struct DMEM_ctx *) GREG32_ADDR(CRYPTO, DMEM_DUMMY); + (struct DMEM_ctx *)GREG32_ADDR(CRYPTO, DMEM_DUMMY); result = setup_and_lock(N, input); @@ -736,3 +1409,87 @@ int dcrypto_modexp_word(struct LITE_BIGNUM *output, dcrypto_unlock(); return result == 0; } + +#ifdef CRYPTO_TEST_SETUP +#include "console.h" +#include "shared_mem.h" +#include "timer.h" + +static uint8_t genp_seed[32]; +static uint32_t prime_buf[32]; +static timestamp_t genp_start; +static timestamp_t genp_end; + +static int genp_core(void) +{ + struct LITE_BIGNUM prime; + int result; + + // Spin seed out into prng candidate prime. + DCRYPTO_hkdf((uint8_t *)prime_buf, sizeof(prime_buf), genp_seed, + sizeof(genp_seed), 0, 0, 0, 0); + DCRYPTO_bn_wrap(&prime, &prime_buf, sizeof(prime_buf)); + + genp_start = get_time(); + result = (DCRYPTO_bn_generate_prime(&prime) != 0) ? EC_SUCCESS + : EC_ERROR_UNKNOWN; + genp_end = get_time(); + + return result; +} + +static int call_on_bigger_stack(int (*func)(void)) +{ + int result, i; + char *new_stack; + const int new_stack_size = 4 * 1024; + + result = shared_mem_acquire(new_stack_size, &new_stack); + if (result == EC_SUCCESS) { + // Paint stack arena + memset(new_stack, 0x01, new_stack_size); + + // Call whilst switching stacks + __asm__ volatile("mov r4, sp\n" // save sp + "mov sp, %[new_stack]\n" + "blx %[func]\n" + "mov sp, r4\n" // restore sp + "mov %[result], r0\n" + : [result] "=r"(result) + : [new_stack] "r"(new_stack + new_stack_size), + [func] "r"(func) + : "r0", "r1", "r2", "r3", "r4", + "lr" // clobbers + ); + + // Take guess at amount of stack that got used + for (i = 0; i < new_stack_size && new_stack[i] == 0x01; ++i) + ; + ccprintf("stack: %u/%u\n", new_stack_size - i, new_stack_size); + + shared_mem_release(new_stack); + } + + return result; +} + +static int command_genp(int argc, char **argv) +{ + int result; + + memset(genp_seed, 0, sizeof(genp_seed)); + if (argc > 1) + memcpy(genp_seed, argv[1], strlen(argv[1])); + + result = call_on_bigger_stack(genp_core); + + if (result == EC_SUCCESS) { + ccprintf("prime: %.*h (lsb first)\n", sizeof(prime_buf), + prime_buf); + ccprintf("μs : %lu\n", genp_end.val - genp_start.val); + } + + return result; +} +DECLARE_CONSOLE_COMMAND(genp, command_genp, "[seed]", "Generate prng prime"); +#endif diff --git a/include/config.h b/include/config.h index 14b20f20a2..debb9686e0 100644 --- a/include/config.h +++ b/include/config.h @@ -1091,6 +1091,12 @@ */ #undef CONFIG_DCRYPTO +/* + * When enabled, RSA 2048 bit keygen gets a 40% performance boost, + * at the cost of 2184 bytes of image size increase. + */ +#undef CONFIG_DCRYPTO_RSA_SPEEDUP + /* * When enabled, accelerate sha512 using the generic crypto engine; * only supported on CR50 -- cgit v1.2.1