diff options
Diffstat (limited to 'third_party/unacl-curve25519/core')
5 files changed, 0 insertions, 3207 deletions
diff --git a/third_party/unacl-curve25519/core/cortex-m0/curve25519/mpy121666.S b/third_party/unacl-curve25519/core/cortex-m0/curve25519/mpy121666.S deleted file mode 100644 index d2a467459b..0000000000 --- a/third_party/unacl-curve25519/core/cortex-m0/curve25519/mpy121666.S +++ /dev/null @@ -1,181 +0,0 @@ -// Implementation of multiplication of an fe25519 bit value with the curve constant 121666. -// -// B. Haase, Endress + Hauser Conducta GmbH & Ko. KG -// public domain. -// -// gnu assembler format. -// -// Generated and tested with C++ functions in the test subdirectory. -// -// ATTENTION: -// Not yet tested on target hardware. - - - .code 16 - .text - .align 2 - - .global fe25519_mpyWith121666_asm - .code 16 - .thumb_func - .type fe25519_mpyWith121666_asm, %function - -fe25519_mpyWith121666_asm: - push {r4,r5,r6,r7,r14} - ldr r7,=56130 - ldr r2,[r1,#28] - lsl r5,r2,#16 - lsr r6,r2,#16 - lsr r3,r2,#16 - uxth r2,r2 - mul r2,r7 - mul r3,r7 - add r5,r2 - mov r2,#0 - adc r6,r2 - lsl r2,r3,#16 - lsr r3,r3,#16 - add r5,r2 - adc r6,r3 - lsl r2,r5,#1 - lsr r2,r2,#1 - str r2,[r0,#28] - lsr r5,r5,#31 - lsl r6,r6,#1 - orr r5,r6 - mov r6,#19 - mul r5,r6 - mov r6,#0 - ldr r2,[r1,#0] - lsl r3,r2,#16 - lsr r4,r2,#16 - add r5,r3 - adc r6,r4 - lsr r3,r2,#16 - uxth r2,r2 - mul r2,r7 - mul r3,r7 - add r5,r2 - mov r2,#0 - adc r6,r2 - lsl r2,r3,#16 - lsr r3,r3,#16 - add r5,r2 - adc r6,r3 - str r5,[r0,#0] - mov r5,#0 - ldr r2,[r1,#4] - lsl r3,r2,#16 - lsr r4,r2,#16 - add r6,r3 - adc r5,r4 - lsr r3,r2,#16 - uxth r2,r2 - mul r2,r7 - mul r3,r7 - add r6,r2 - mov r2,#0 - adc r5,r2 - lsl r2,r3,#16 - lsr r3,r3,#16 - add r6,r2 - adc r5,r3 - str r6,[r0,#4] - mov r6,#0 - ldr r2,[r1,#8] - lsl r3,r2,#16 - lsr r4,r2,#16 - add r5,r3 - adc r6,r4 - lsr r3,r2,#16 - uxth r2,r2 - mul r2,r7 - mul r3,r7 - add r5,r2 - mov r2,#0 - adc r6,r2 - lsl r2,r3,#16 - lsr r3,r3,#16 - add r5,r2 - adc r6,r3 - str r5,[r0,#8] - mov r5,#0 - ldr r2,[r1,#12] - lsl r3,r2,#16 - lsr r4,r2,#16 - add r6,r3 - adc r5,r4 - lsr r3,r2,#16 - uxth r2,r2 - mul r2,r7 - mul r3,r7 - add r6,r2 - mov r2,#0 - adc r5,r2 - lsl r2,r3,#16 - lsr r3,r3,#16 - add r6,r2 - adc r5,r3 - str r6,[r0,#12] - mov r6,#0 - ldr r2,[r1,#16] - lsl r3,r2,#16 - lsr r4,r2,#16 - add r5,r3 - adc r6,r4 - lsr r3,r2,#16 - uxth r2,r2 - mul r2,r7 - mul r3,r7 - add r5,r2 - mov r2,#0 - adc r6,r2 - lsl r2,r3,#16 - lsr r3,r3,#16 - add r5,r2 - adc r6,r3 - str r5,[r0,#16] - mov r5,#0 - ldr r2,[r1,#20] - lsl r3,r2,#16 - lsr r4,r2,#16 - add r6,r3 - adc r5,r4 - lsr r3,r2,#16 - uxth r2,r2 - mul r2,r7 - mul r3,r7 - add r6,r2 - mov r2,#0 - adc r5,r2 - lsl r2,r3,#16 - lsr r3,r3,#16 - add r6,r2 - adc r5,r3 - str r6,[r0,#20] - mov r6,#0 - ldr r2,[r1,#24] - lsl r3,r2,#16 - lsr r4,r2,#16 - add r5,r3 - adc r6,r4 - lsr r3,r2,#16 - uxth r2,r2 - mul r2,r7 - mul r3,r7 - add r5,r2 - mov r2,#0 - adc r6,r2 - lsl r2,r3,#16 - lsr r3,r3,#16 - add r5,r2 - adc r6,r3 - str r5,[r0,#24] - mov r5,#0 - ldr r2,[r0,#28] - add r6,r2 - str r6,[r0,#28] - pop {r4,r5,r6,r7,r15} - - .size fe25519_mpyWith121666_asm, .-fe25519_mpyWith121666_asm - diff --git a/third_party/unacl-curve25519/core/cortex-m0/curve25519/mul.S b/third_party/unacl-curve25519/core/cortex-m0/curve25519/mul.S deleted file mode 100644 index 366713a7a3..0000000000 --- a/third_party/unacl-curve25519/core/cortex-m0/curve25519/mul.S +++ /dev/null @@ -1,1111 +0,0 @@ - .align 2 - .global multiply256x256_asm - .type multiply256x256_asm, %function -multiply256x256_asm: - push {r4-r7,lr} - mov r3, r8 - mov r4, r9 - mov r5, r10 - mov r6, r11 - push {r0-r6} - mov r12, r0 - mov r10, r2 - mov r11, r1 - mov r0,r2 - //ldm r0!, {r4,r5,r6,r7} - ldm r0!, {r4,r5} - add r0,#8 - ldm r1!, {r2,r3,r6,r7} - push {r0,r1} - /////////BEGIN LOW PART ////////////////////// - /////////MUL128///////////// - //MUL64 - mov r6, r5 - mov r1, r2 - sub r5, r4 - sbc r0, r0 - eor r5, r0 - sub r5, r0 - sub r1, r3 - sbc r7, r7 - eor r1, r7 - sub r1, r7 - eor r7, r0 - mov r9, r1 - mov r8, r5 - lsr r1,r4,#16 - uxth r4,r4 - mov r0,r4 - uxth r5,r2 - lsr r2,#16 - mul r0,r5//00 - mul r5,r1//10 - mul r4,r2//01 - mul r1,r2//11 - lsl r2,r4,#16 - lsr r4,r4,#16 - add r0,r2 - adc r1,r4 - lsl r2,r5,#16 - lsr r4,r5,#16 - add r0,r2 - adc r1,r4 - lsr r4, r6,#16 - uxth r6, r6 - uxth r5, r3 - lsr r3, r3, #16 - mov r2, r6 - mul r2, r5 - mul r5, r4 - mul r6, r3 - mul r3, r4 - lsl r4,r5,#16 - lsr r5,r5,#16 - add r2,r4 - adc r3,r5 - lsl r4,r6,#16 - lsr r5,r6,#16 - add r2,r4 - adc r3,r5 - eor r6, r6 - add r2, r1 - adc r3, r6 - mov r1, r9 - mov r5, r8 - mov r8, r0 - lsr r0, r1,#16 - uxth r1,r1 - mov r4,r1 - lsr r6,r5,#16 - uxth r5,r5 - mul r1,r5 - mul r4,r6 - mul r5,r0 - mul r0,r6 - lsl r6,r4,#16 - lsr r4,#16 - add r1,r6 - adc r0,r4 - lsl r6,r5,#16 - lsr r5,#16 - add r1,r6 - adc r0,r5 - eor r1,r7 - eor r0,r7 - eor r4, r4 - asr r7, r7, #1 - adc r1, r2 - adc r2, r0 - adc r7, r4 - mov r0, r8 - add r1, r0 - adc r2, r3 - adc r3, r7 - ////////////////////////// - mov r4, r12 - stm r4!, {r0,r1} - push {r4} - push {r0,r1} - mov r1, r10 - mov r10, r2 - ldm r1, {r0, r1, r4, r5} - mov r2, r4 - mov r7, r5 - sub r2, r0 - sbc r7, r1 - sbc r6, r6 - eor r2, r6 - eor r7, r6 - sub r2, r6 - sbc r7, r6 - push {r2, r7} - mov r2, r11 - mov r11, r3 - ldm r2, {r0, r1, r2, r3} - sub r0, r2 - sbc r1, r3 - sbc r7, r7 - eor r0, r7 - eor r1, r7 - sub r0, r7 - sbc r1, r7 - eor r7, r6 - mov r12, r7 - push {r0, r1} - //MUL64 - mov r6, r5 - mov r1, r2 - sub r5, r4 - sbc r0, r0 - eor r5, r0 - sub r5, r0 - sub r1, r3 - sbc r7, r7 - eor r1, r7 - sub r1, r7 - eor r7, r0 - mov r9, r1 - mov r8, r5 - lsr r1,r4,#16 - uxth r4,r4 - mov r0,r4 - uxth r5,r2 - lsr r2,#16 - mul r0,r5//00 - mul r5,r1//10 - mul r4,r2//01 - mul r1,r2//11 - lsl r2,r4,#16 - lsr r4,r4,#16 - add r0,r2 - adc r1,r4 - lsl r2,r5,#16 - lsr r4,r5,#16 - add r0,r2 - adc r1,r4 - lsr r4, r6,#16 - uxth r6, r6 - uxth r5, r3 - lsr r3, r3, #16 - mov r2, r6 - mul r2, r5 - mul r5, r4 - mul r6, r3 - mul r3, r4 - lsl r4,r5,#16 - lsr r5,r5,#16 - add r2,r4 - adc r3,r5 - lsl r4,r6,#16 - lsr r5,r6,#16 - add r2,r4 - adc r3,r5 - eor r6, r6 - add r2, r1 - adc r3, r6 - mov r1, r9 - mov r5, r8 - mov r8, r0 - lsr r0, r1,#16 - uxth r1,r1 - mov r4,r1 - lsr r6,r5,#16 - uxth r5,r5 - mul r1,r5 - mul r4,r6 - mul r5,r0 - mul r0,r6 - lsl r6,r4,#16 - lsr r4,#16 - add r1,r6 - adc r0,r4 - lsl r6,r5,#16 - lsr r5,#16 - add r1,r6 - adc r0,r5 - eor r1,r7 - eor r0,r7 - eor r4, r4 - asr r7, r7, #1 - adc r1, r2 - adc r2, r0 - adc r7, r4 - mov r0, r8 - add r1, r0 - adc r2, r3 - adc r3, r7 - mov r4, r10 - mov r5, r11 - eor r6, r6 - add r0, r4 - adc r1, r5 - adc r2, r6 - adc r3, r6 - mov r10, r2 - mov r11, r3 - pop {r2-r5} - push {r0, r1} - mov r6, r5 - mov r1, r2 - sub r5, r4 - sbc r0, r0 - eor r5, r0 - sub r5, r0 - sub r1, r3 - sbc r7, r7 - eor r1, r7 - sub r1, r7 - eor r7, r0 - mov r9, r1 - mov r8, r5 - lsr r1,r4,#16 - uxth r4,r4 - mov r0,r4 - uxth r5,r2 - lsr r2,#16 - mul r0,r5//00 - mul r5,r1//10 - mul r4,r2//01 - mul r1,r2//11 - lsl r2,r4,#16 - lsr r4,r4,#16 - add r0,r2 - adc r1,r4 - lsl r2,r5,#16 - lsr r4,r5,#16 - add r0,r2 - adc r1,r4 - lsr r4, r6,#16 - uxth r6, r6 - uxth r5, r3 - lsr r3, r3, #16 - mov r2, r6 - mul r2, r5 - mul r5, r4 - mul r6, r3 - mul r3, r4 - lsl r4,r5,#16 - lsr r5,r5,#16 - add r2,r4 - adc r3,r5 - lsl r4,r6,#16 - lsr r5,r6,#16 - add r2,r4 - adc r3,r5 - eor r6, r6 - add r2, r1 - adc r3, r6 - mov r1, r9 - mov r5, r8 - mov r8, r0 - lsr r0, r1,#16 - uxth r1,r1 - mov r4,r1 - lsr r6,r5,#16 - uxth r5,r5 - mul r1,r5 - mul r4,r6 - mul r5,r0 - mul r0,r6 - lsl r6,r4,#16 - lsr r4,#16 - add r1,r6 - adc r0,r4 - lsl r6,r5,#16 - lsr r5,#16 - add r1,r6 - adc r0,r5 - eor r1,r7 - eor r0,r7 - eor r4, r4 - asr r7, r7, #1 - adc r1, r2 - adc r2, r0 - adc r7, r4 - mov r0, r8 - add r1, r0 - adc r2, r3 - adc r3, r7 - pop {r4, r5} - mov r6, r12 - mov r7, r12 - eor r0, r6 - eor r1, r6 - eor r2, r6 - eor r3, r6 - asr r6, r6, #1 - adc r0, r4 - adc r1, r5 - adc r4, r2 - adc r5, r3 - eor r2, r2 - adc r6,r2 - adc r7,r2 - pop {r2, r3} - mov r8, r2 - mov r9, r3 - add r2, r0 - adc r3, r1 - mov r0, r10 - mov r1, r11 - adc r4, r0 - adc r5, r1 - adc r6, r0 - adc r7, r1 - ////////END LOW PART///////////////////// - pop {r0} - stm r0!, {r2,r3} - pop {r1,r2} - push {r0} - push {r4-r7} - mov r10, r1 - mov r11, r2 - ldm r1!, {r4, r5} - ldm r2, {r2, r3} - /////////BEGIN HIGH PART//////////////// - /////////MUL128///////////// - //MUL64 - mov r6, r5 - mov r1, r2 - sub r5, r4 - sbc r0, r0 - eor r5, r0 - sub r5, r0 - sub r1, r3 - sbc r7, r7 - eor r1, r7 - sub r1, r7 - eor r7, r0 - mov r9, r1 - mov r8, r5 - lsr r1,r4,#16 - uxth r4,r4 - mov r0,r4 - uxth r5,r2 - lsr r2,#16 - mul r0,r5//00 - mul r5,r1//10 - mul r4,r2//01 - mul r1,r2//11 - lsl r2,r4,#16 - lsr r4,r4,#16 - add r0,r2 - adc r1,r4 - lsl r2,r5,#16 - lsr r4,r5,#16 - add r0,r2 - adc r1,r4 - lsr r4, r6,#16 - uxth r6, r6 - uxth r5, r3 - lsr r3, r3, #16 - mov r2, r6 - mul r2, r5 - mul r5, r4 - mul r6, r3 - mul r3, r4 - lsl r4,r5,#16 - lsr r5,r5,#16 - add r2,r4 - adc r3,r5 - lsl r4,r6,#16 - lsr r5,r6,#16 - add r2,r4 - adc r3,r5 - eor r6, r6 - add r2, r1 - adc r3, r6 - mov r1, r9 - mov r5, r8 - mov r8, r0 - lsr r0, r1,#16 - uxth r1,r1 - mov r4,r1 - lsr r6,r5,#16 - uxth r5,r5 - mul r1,r5 - mul r4,r6 - mul r5,r0 - mul r0,r6 - lsl r6,r4,#16 - lsr r4,#16 - add r1,r6 - adc r0,r4 - lsl r6,r5,#16 - lsr r5,#16 - add r1,r6 - adc r0,r5 - eor r1,r7 - eor r0,r7 - eor r4, r4 - asr r7, r7, #1 - adc r1, r2 - adc r2, r0 - adc r7, r4 - mov r0, r8 - add r1, r0 - adc r2, r3 - adc r3, r7 - push {r0,r1} - mov r1, r10 - mov r10, r2 - ldm r1, {r0, r1, r4, r5} - mov r2, r4 - mov r7, r5 - sub r2, r0 - sbc r7, r1 - sbc r6, r6 - eor r2, r6 - eor r7, r6 - sub r2, r6 - sbc r7, r6 - push {r2, r7} - mov r2, r11 - mov r11, r3 - ldm r2, {r0, r1, r2, r3} - sub r0, r2 - sbc r1, r3 - sbc r7, r7 - eor r0, r7 - eor r1, r7 - sub r0, r7 - sbc r1, r7 - eor r7, r6 - mov r12, r7 - push {r0, r1} - //MUL64 - mov r6, r5 - mov r1, r2 - sub r5, r4 - sbc r0, r0 - eor r5, r0 - sub r5, r0 - sub r1, r3 - sbc r7, r7 - eor r1, r7 - sub r1, r7 - eor r7, r0 - mov r9, r1 - mov r8, r5 - lsr r1,r4,#16 - uxth r4,r4 - mov r0,r4 - uxth r5,r2 - lsr r2,#16 - mul r0,r5//00 - mul r5,r1//10 - mul r4,r2//01 - mul r1,r2//11 - lsl r2,r4,#16 - lsr r4,r4,#16 - add r0,r2 - adc r1,r4 - lsl r2,r5,#16 - lsr r4,r5,#16 - add r0,r2 - adc r1,r4 - lsr r4, r6,#16 - uxth r6, r6 - uxth r5, r3 - lsr r3, r3, #16 - mov r2, r6 - mul r2, r5 - mul r5, r4 - mul r6, r3 - mul r3, r4 - lsl r4,r5,#16 - lsr r5,r5,#16 - add r2,r4 - adc r3,r5 - lsl r4,r6,#16 - lsr r5,r6,#16 - add r2,r4 - adc r3,r5 - eor r6, r6 - add r2, r1 - adc r3, r6 - mov r1, r9 - mov r5, r8 - mov r8, r0 - lsr r0, r1,#16 - uxth r1,r1 - mov r4,r1 - lsr r6,r5,#16 - uxth r5,r5 - mul r1,r5 - mul r4,r6 - mul r5,r0 - mul r0,r6 - lsl r6,r4,#16 - lsr r4,#16 - add r1,r6 - adc r0,r4 - lsl r6,r5,#16 - lsr r5,#16 - add r1,r6 - adc r0,r5 - eor r1,r7 - eor r0,r7 - eor r4, r4 - asr r7, r7, #1 - adc r1, r2 - adc r2, r0 - adc r7, r4 - mov r0, r8 - add r1, r0 - adc r2, r3 - adc r3, r7 - mov r4, r10 - mov r5, r11 - eor r6, r6 - add r0, r4 - adc r1, r5 - adc r2, r6 - adc r3, r6 - mov r10, r2 - mov r11, r3 - pop {r2-r5} - push {r0, r1} - mov r6, r5 - mov r1, r2 - sub r5, r4 - sbc r0, r0 - eor r5, r0 - sub r5, r0 - sub r1, r3 - sbc r7, r7 - eor r1, r7 - sub r1, r7 - eor r7, r0 - mov r9, r1 - mov r8, r5 - lsr r1,r4,#16 - uxth r4,r4 - mov r0,r4 - uxth r5,r2 - lsr r2,#16 - mul r0,r5//00 - mul r5,r1//10 - mul r4,r2//01 - mul r1,r2//11 - lsl r2,r4,#16 - lsr r4,r4,#16 - add r0,r2 - adc r1,r4 - lsl r2,r5,#16 - lsr r4,r5,#16 - add r0,r2 - adc r1,r4 - lsr r4, r6,#16 - uxth r6, r6 - uxth r5, r3 - lsr r3, r3, #16 - mov r2, r6 - mul r2, r5 - mul r5, r4 - mul r6, r3 - mul r3, r4 - lsl r4,r5,#16 - lsr r5,r5,#16 - add r2,r4 - adc r3,r5 - lsl r4,r6,#16 - lsr r5,r6,#16 - add r2,r4 - adc r3,r5 - eor r6, r6 - add r2, r1 - adc r3, r6 - mov r1, r9 - mov r5, r8 - mov r8, r0 - lsr r0, r1,#16 - uxth r1,r1 - mov r4,r1 - lsr r6,r5,#16 - uxth r5,r5 - mul r1,r5 - mul r4,r6 - mul r5,r0 - mul r0,r6 - lsl r6,r4,#16 - lsr r4,#16 - add r1,r6 - adc r0,r4 - lsl r6,r5,#16 - lsr r5,#16 - add r1,r6 - adc r0,r5 - eor r1,r7 - eor r0,r7 - eor r4, r4 - asr r7, r7, #1 - adc r1, r2 - adc r2, r0 - adc r7, r4 - mov r0, r8 - add r1, r0 - adc r2, r3 - adc r3, r7 - pop {r4, r5} - mov r6, r12 - mov r7, r12 - eor r0, r6 - eor r1, r6 - eor r2, r6 - eor r3, r6 - asr r6, r6, #1 - adc r0, r4 - adc r1, r5 - adc r4, r2 - adc r5, r3 - eor r2, r2 - adc r6,r2 //0,1 - adc r7,r2 - pop {r2, r3} - mov r8, r2 - mov r9, r3 - add r2, r0 - adc r3, r1 - mov r0, r10 - mov r1, r11 - adc r4, r0 - adc r5, r1 - adc r6, r0 - adc r7, r1 - ////////END HIGH PART///////////////////// - mov r0, r8 - mov r1, r9 - mov r8, r6 - mov r9, r7 - pop {r6, r7} - add r0, r6 - adc r1, r7 - pop {r6, r7} - adc r2, r6 - adc r3, r7 - pop {r7} - stm r7!, {r0-r3} - mov r10, r7 - eor r0,r0 - mov r6, r8 - mov r7, r9 - adc r4, r0 - adc r5, r0 - adc r6, r0 - adc r7, r0 - pop {r0,r1,r2} - mov r12, r2 - push {r0, r4-r7} - ldm r1, {r0-r7} - sub r0, r4 - sbc r1, r5 - sbc r2, r6 - sbc r3, r7 - eor r4, r4 - sbc r4, r4 - eor r0, r4 - eor r1, r4 - eor r2, r4 - eor r3, r4 - sub r0, r4 - sbc r1, r4 - sbc r2, r4 - sbc r3, r4 - mov r6, r12 - mov r12, r4 //carry - mov r5, r10 - stm r5!, {r0-r3} - mov r11, r5 - mov r8, r0 - mov r9, r1 - ldm r6, {r0-r7} - sub r4, r0 - sbc r5, r1 - sbc r6, r2 - sbc r7, r3 - eor r0, r0 - sbc r0, r0 - eor r4, r0 - eor r5, r0 - eor r6, r0 - eor r7, r0 - sub r4, r0 - sbc r5, r0 - sbc r6, r0 - sbc r7, r0 - mov r1, r12 - eor r0, r1 - mov r1, r11 - stm r1!, {r4-r7} - push {r0} - mov r2, r8 - mov r3, r9 - /////////BEGIN MIDDLE PART//////////////// - /////////MUL128///////////// - //MUL64 - mov r6, r5 - mov r1, r2 - sub r5, r4 - sbc r0, r0 - eor r5, r0 - sub r5, r0 - sub r1, r3 - sbc r7, r7 - eor r1, r7 - sub r1, r7 - eor r7, r0 - mov r9, r1 - mov r8, r5 - lsr r1,r4,#16 - uxth r4,r4 - mov r0,r4 - uxth r5,r2 - lsr r2,#16 - mul r0,r5//00 - mul r5,r1//10 - mul r4,r2//01 - mul r1,r2//11 - lsl r2,r4,#16 - lsr r4,r4,#16 - add r0,r2 - adc r1,r4 - lsl r2,r5,#16 - lsr r4,r5,#16 - add r0,r2 - adc r1,r4 - lsr r4, r6,#16 - uxth r6, r6 - uxth r5, r3 - lsr r3, r3, #16 - mov r2, r6 - mul r2, r5 - mul r5, r4 - mul r6, r3 - mul r3, r4 - lsl r4,r5,#16 - lsr r5,r5,#16 - add r2,r4 - adc r3,r5 - lsl r4,r6,#16 - lsr r5,r6,#16 - add r2,r4 - adc r3,r5 - eor r6, r6 - add r2, r1 - adc r3, r6 - mov r1, r9 - mov r5, r8 - mov r8, r0 - lsr r0, r1,#16 - uxth r1,r1 - mov r4,r1 - lsr r6,r5,#16 - uxth r5,r5 - mul r1,r5 - mul r4,r6 - mul r5,r0 - mul r0,r6 - lsl r6,r4,#16 - lsr r4,#16 - add r1,r6 - adc r0,r4 - lsl r6,r5,#16 - lsr r5,#16 - add r1,r6 - adc r0,r5 - eor r1,r7 - eor r0,r7 - eor r4, r4 - asr r7, r7, #1 - adc r1, r2 - adc r2, r0 - adc r7, r4 - mov r0, r8 - add r1, r0 - adc r2, r3 - adc r3, r7 - push {r0,r1} - mov r1, r10 - mov r10, r2 - ldm r1, {r0, r1, r4, r5} - mov r2, r4 - mov r7, r5 - sub r2, r0 - sbc r7, r1 - sbc r6, r6 - eor r2, r6 - eor r7, r6 - sub r2, r6 - sbc r7, r6 - push {r2, r7} - mov r2, r11 - mov r11, r3 - ldm r2, {r0, r1, r2, r3} - sub r0, r2 - sbc r1, r3 - sbc r7, r7 - eor r0, r7 - eor r1, r7 - sub r0, r7 - sbc r1, r7 - eor r7, r6 - mov r12, r7 - push {r0, r1} - //MUL64 - mov r6, r5 - mov r1, r2 - sub r5, r4 - sbc r0, r0 - eor r5, r0 - sub r5, r0 - sub r1, r3 - sbc r7, r7 - eor r1, r7 - sub r1, r7 - eor r7, r0 - mov r9, r1 - mov r8, r5 - lsr r1,r4,#16 - uxth r4,r4 - mov r0,r4 - uxth r5,r2 - lsr r2,#16 - mul r0,r5//00 - mul r5,r1//10 - mul r4,r2//01 - mul r1,r2//11 - lsl r2,r4,#16 - lsr r4,r4,#16 - add r0,r2 - adc r1,r4 - lsl r2,r5,#16 - lsr r4,r5,#16 - add r0,r2 - adc r1,r4 - lsr r4, r6,#16 - uxth r6, r6 - uxth r5, r3 - lsr r3, r3, #16 - mov r2, r6 - mul r2, r5 - mul r5, r4 - mul r6, r3 - mul r3, r4 - lsl r4,r5,#16 - lsr r5,r5,#16 - add r2,r4 - adc r3,r5 - lsl r4,r6,#16 - lsr r5,r6,#16 - add r2,r4 - adc r3,r5 - eor r6, r6 - add r2, r1 - adc r3, r6 - mov r1, r9 - mov r5, r8 - mov r8, r0 - lsr r0, r1,#16 - uxth r1,r1 - mov r4,r1 - lsr r6,r5,#16 - uxth r5,r5 - mul r1,r5 - mul r4,r6 - mul r5,r0 - mul r0,r6 - lsl r6,r4,#16 - lsr r4,#16 - add r1,r6 - adc r0,r4 - lsl r6,r5,#16 - lsr r5,#16 - add r1,r6 - adc r0,r5 - eor r1,r7 - eor r0,r7 - eor r4, r4 - asr r7, r7, #1 - adc r1, r2 - adc r2, r0 - adc r7, r4 - mov r0, r8 - add r1, r0 - adc r2, r3 - adc r3, r7 - mov r4, r10 - mov r5, r11 - eor r6, r6 - add r0, r4 - adc r1, r5 - adc r2, r6 - adc r3, r6 - mov r10, r2 - mov r11, r3 - pop {r2-r5} - push {r0, r1} - mov r6, r5 - mov r1, r2 - sub r5, r4 - sbc r0, r0 - eor r5, r0 - sub r5, r0 - sub r1, r3 - sbc r7, r7 - eor r1, r7 - sub r1, r7 - eor r7, r0 - mov r9, r1 - mov r8, r5 - lsr r1,r4,#16 - uxth r4,r4 - mov r0,r4 - uxth r5,r2 - lsr r2,#16 - mul r0,r5//00 - mul r5,r1//10 - mul r4,r2//01 - mul r1,r2//11 - lsl r2,r4,#16 - lsr r4,r4,#16 - add r0,r2 - adc r1,r4 - lsl r2,r5,#16 - lsr r4,r5,#16 - add r0,r2 - adc r1,r4 - lsr r4, r6,#16 - uxth r6, r6 - uxth r5, r3 - lsr r3, r3, #16 - mov r2, r6 - mul r2, r5 - mul r5, r4 - mul r6, r3 - mul r3, r4 - lsl r4,r5,#16 - lsr r5,r5,#16 - add r2,r4 - adc r3,r5 - lsl r4,r6,#16 - lsr r5,r6,#16 - add r2,r4 - adc r3,r5 - eor r6, r6 - add r2, r1 - adc r3, r6 - mov r1, r9 - mov r5, r8 - mov r8, r0 - lsr r0, r1,#16 - uxth r1,r1 - mov r4,r1 - lsr r6,r5,#16 - uxth r5,r5 - mul r1,r5 - mul r4,r6 - mul r5,r0 - mul r0,r6 - lsl r6,r4,#16 - lsr r4,#16 - add r1,r6 - adc r0,r4 - lsl r6,r5,#16 - lsr r5,#16 - add r1,r6 - adc r0,r5 - eor r1,r7 - eor r0,r7 - eor r4, r4 - asr r7, r7, #1 - adc r1, r2 - adc r2, r0 - adc r7, r4 - mov r0, r8 - add r1, r0 - adc r2, r3 - adc r3, r7 - pop {r4, r5} - mov r6, r12 - mov r7, r12 - eor r0, r6 - eor r1, r6 - eor r2, r6 - eor r3, r6 - asr r6, r6, #1 - adc r0, r4 - adc r1, r5 - adc r4, r2 - adc r5, r3 - eor r2, r2 - adc r6,r2 //0,1 - adc r7,r2 - pop {r2, r3} - mov r8, r2 - mov r9, r3 - add r2, r0 - adc r3, r1 - mov r0, r10 - mov r1, r11 - adc r4, r0 - adc r5, r1 - adc r6, r0 - adc r7, r1 - //////////END MIDDLE PART//////////////// - pop {r0,r1} //r0,r1 - mov r12, r0 //negative - eor r2, r0 - eor r3, r0 - eor r4, r0 - eor r5, r0 - eor r6, r0 - eor r7, r0 - push {r4-r7} - ldm r1!, {r4-r7} - mov r11, r1 //reference - mov r1, r9 - eor r1, r0 - mov r10, r4 - mov r4, r8 - asr r0, #1 - eor r0, r4 - mov r4, r10 - adc r0, r4 - adc r1, r5 - adc r2, r6 - adc r3, r7 - eor r4, r4 - adc r4, r4 - mov r10, r4 //carry - mov r4, r11 - ldm r4, {r4-r7} - add r0, r4 - adc r1, r5 - adc r2, r6 - adc r3, r7 - mov r9, r4 - mov r4, r11 - stm r4!, {r0-r3} - mov r11, r4 - pop {r0-r3} - mov r4, r9 - adc r4, r0 - adc r5, r1 - adc r6, r2 - adc r7, r3 - mov r1, #0 - adc r1, r1 - mov r0, r10 - mov r10, r1 //carry - asr r0, #1 - pop {r0-r3} - adc r4, r0 - adc r5, r1 - adc r6, r2 - adc r7, r3 - mov r8, r0 - mov r0, r11 - stm r0!, {r4-r7} - mov r11, r0 - mov r0, r8 - mov r6, r12 - mov r5, r10 - eor r4, r4 - adc r5, r6 - adc r6, r4 - add r0, r5 - adc r1, r6 - adc r2, r6 - adc r3, r6 - mov r7, r11 - stm r7!, {r0-r3} - pop {r3-r6} - mov r8, r3 - mov r9, r4 - mov r10, r5 - mov r11, r6 - pop {r4-r7,pc} - bx lr -.size multiply256x256_asm, .-multiply256x256_asm - diff --git a/third_party/unacl-curve25519/core/cortex-m0/curve25519/reduce25519.S b/third_party/unacl-curve25519/core/cortex-m0/curve25519/reduce25519.S deleted file mode 100644 index 9a3c29a0f6..0000000000 --- a/third_party/unacl-curve25519/core/cortex-m0/curve25519/reduce25519.S +++ /dev/null @@ -1,163 +0,0 @@ -// Implementation of a partial reduction modulo 2^255 - 38. -// -// B. Haase, Endress + Hauser Conducta GmbH & Ko. KG -// public domain. -// -// gnu assembler format. -// -// Generated and tested with C++ functions in the test subdirectory and on the target. -// - - .code 16 - - .text - .align 2 - - .global fe25519_reduceTo256Bits_asm - .code 16 - .thumb_func - .type fe25519_reduceTo256Bits_asm, %function - -fe25519_reduceTo256Bits_asm: - push {r4,r5,r6,r7,r14} - ldr r2,[r1,#60] - lsr r3,r2,#16 - uxth r2,r2 - mov r7,#38 - mul r2,r7 - mul r3,r7 - ldr r4,[r1,#28] - lsr r5,r3,#16 - lsl r3,r3,#16 - mov r6,#0 - add r4,r2 - adc r5,r6 - add r4,r3 - adc r5,r6 - lsl r2,r4,#1 - lsr r2,r2,#1 - str r2,[r0,#28] - lsr r4,r4,#31 - lsl r5,r5,#1 - orr r4,r5 - mov r2,#19 - mul r2,r4 - ldr r4,[r1,#0] - add r2,r4 - mov r3,#0 - adc r3,r6 - ldr r4,[r1,#32] - lsr r5,r4,#16 - uxth r4,r4 - mul r5,r7 - mul r4,r7 - add r2,r4 - adc r3,r6 - lsl r4,r5,#16 - lsr r5,r5,#16 - add r2,r4 - adc r3,r5 - str r2,[r0,#0] - ldr r4,[r1,#4] - add r3,r4 - mov r2,#0 - adc r2,r6 - ldr r4,[r1,#36] - lsr r5,r4,#16 - uxth r4,r4 - mul r5,r7 - mul r4,r7 - add r3,r4 - adc r2,r6 - lsl r4,r5,#16 - lsr r5,r5,#16 - add r3,r4 - adc r2,r5 - str r3,[r0,#4] - ldr r4,[r1,#8] - add r2,r4 - mov r3,#0 - adc r3,r6 - ldr r4,[r1,#40] - lsr r5,r4,#16 - uxth r4,r4 - mul r5,r7 - mul r4,r7 - add r2,r4 - adc r3,r6 - lsl r4,r5,#16 - lsr r5,r5,#16 - add r2,r4 - adc r3,r5 - str r2,[r0,#8] - ldr r4,[r1,#12] - add r3,r4 - mov r2,#0 - adc r2,r6 - ldr r4,[r1,#44] - lsr r5,r4,#16 - uxth r4,r4 - mul r5,r7 - mul r4,r7 - add r3,r4 - adc r2,r6 - lsl r4,r5,#16 - lsr r5,r5,#16 - add r3,r4 - adc r2,r5 - str r3,[r0,#12] - ldr r4,[r1,#16] - add r2,r4 - mov r3,#0 - adc r3,r6 - ldr r4,[r1,#48] - lsr r5,r4,#16 - uxth r4,r4 - mul r5,r7 - mul r4,r7 - add r2,r4 - adc r3,r6 - lsl r4,r5,#16 - lsr r5,r5,#16 - add r2,r4 - adc r3,r5 - str r2,[r0,#16] - ldr r4,[r1,#20] - add r3,r4 - mov r2,#0 - adc r2,r6 - ldr r4,[r1,#52] - lsr r5,r4,#16 - uxth r4,r4 - mul r5,r7 - mul r4,r7 - add r3,r4 - adc r2,r6 - lsl r4,r5,#16 - lsr r5,r5,#16 - add r3,r4 - adc r2,r5 - str r3,[r0,#20] - ldr r4,[r1,#24] - add r2,r4 - mov r3,#0 - adc r3,r6 - ldr r4,[r1,#56] - lsr r5,r4,#16 - uxth r4,r4 - mul r5,r7 - mul r4,r7 - add r2,r4 - adc r3,r6 - lsl r4,r5,#16 - lsr r5,r5,#16 - add r2,r4 - adc r3,r5 - str r2,[r0,#24] - ldr r4,[r0,#28] - add r4,r3 - str r4,[r0,#28] - pop {r4,r5,r6,r7,r15} - - .size fe25519_reduceTo256Bits_asm, .-fe25519_reduceTo256Bits_asm - diff --git a/third_party/unacl-curve25519/core/cortex-m0/curve25519/scalarmult.c b/third_party/unacl-curve25519/core/cortex-m0/curve25519/scalarmult.c deleted file mode 100644 index 07e2b144e7..0000000000 --- a/third_party/unacl-curve25519/core/cortex-m0/curve25519/scalarmult.c +++ /dev/null @@ -1,588 +0,0 @@ -/* ======================= - ============================ C/C++ HEADER FILE ============================= - ======================= - - Collection of all required submodules from naclM0 required for curve25519 - scalar multiplication (not including randomization, etc.) alone. - - Library naclM0 largely bases on work avrNacl of M. Hutter and P. Schwabe. - - Will compile to the two functions - - int - crypto_scalarmult_base_curve25519( - unsigned char* q, - const unsigned char* n - ); - - int - crypto_scalarmult_curve25519 ( - unsigned char* r, - const unsigned char* s, - const unsigned char* p - ); - - Requires inttypes.h header and the four external assembly functions - - extern void - fe25519_reduceTo256Bits_asm ( - fe25519 *res, - const UN_512bitValue *in - ); - - extern void - fe25519_mpyWith121666_asm ( - fe25519* out, - const fe25519* in - ); - - extern void - multiply256x256_asm ( - UN_512bitValue* result, - const UN_256bitValue* x, - const UN_256bitValue* y - ); - - extern void - square256_asm ( - UN_512bitValue* result, - const UN_256bitValue* x - ); - - \file scalarmult.c - - \Author B. Haase, Endress + Hauser Conducta GmbH & Co. KG - - Distributed under the conditions of the - Creative Commons CC0 1.0 Universal public domain dedication - ============================================================================*/ - -#include "curve25519.h" -#include "util.h" - -typedef uint8_t uint8; -typedef uint16_t uint16; -typedef uint32_t uint32; -typedef uint64_t uint64; -typedef uintptr_t uintptr; - -typedef int8_t int8; -typedef int16_t int16; -typedef int32_t int32; -typedef int64_t int64; -typedef intptr_t intptr; - -// Note that it's important to define the unit8 as first union member, so that -// an array of uint8 may be used as initializer. -typedef union UN_256bitValue_ -{ - uint8 as_uint8[32]; - uint16 as_uint16[16]; - uint32 as_uint32[8]; - uint64 as_uint64[4]; -} UN_256bitValue; - -// Note that it's important to define the unit8 as first union member, so that -// an array of uint8 may be used as initializer. -typedef union UN_512bitValue_ -{ - uint8 as_uint8[64]; - uint16 as_uint16[32]; - uint32 as_uint32[16]; - uint64 as_uint64[8]; - UN_256bitValue as_256_bitValue[2]; -} UN_512bitValue; - -typedef UN_256bitValue fe25519; - -// **************************************************** -// Assembly functions. -// **************************************************** - -extern void -fe25519_reduceTo256Bits_asm( - fe25519 *res, - const UN_512bitValue *in -); - -#define fe25519_mpyWith121666 fe25519_mpyWith121666_asm -extern void -fe25519_mpyWith121666_asm ( - fe25519* out, - const fe25519* in -); - -#define multiply256x256 multiply256x256_asm -extern void -multiply256x256( - UN_512bitValue* result, - const UN_256bitValue* x, - const UN_256bitValue* y -); - -#define square256 square256_asm -extern void -square256( - UN_512bitValue* result, - const UN_256bitValue* x -); - -// **************************************************** -// C functions for fe25519 -// **************************************************** - -static void -fe25519_cpy( - fe25519* dest, - const fe25519* source -) -{ - memcpy(dest, source, 32); -} - -static void -fe25519_unpack( - fe25519* out, - const unsigned char in[32] -) -{ - memcpy(out, in, 32); - - out->as_uint8[31] &= 0x7f; // make sure that the last bit is cleared. -} - -static void -fe25519_sub( - fe25519* out, - const fe25519* baseValue, - const fe25519* valueToSubstract -) -{ - uint16 ctr; - int64 accu = 0; - - // First subtract the most significant word, so that we may - // reduce the result "on the fly". - accu = baseValue->as_uint32[7]; - accu -= valueToSubstract->as_uint32[7]; - - // We always set bit #31, and compensate this by subtracting 1 from the reduction - // value. - out->as_uint32[7] = ((uint32)accu) | 0x80000000ul; - - accu = 19 * ((int32)(accu >> 31) - 1); - // ^ "-1" is the compensation for the "| 0x80000000ul" above. - // This choice makes sure, that the result will be positive! - - for (ctr = 0; ctr < 7; ctr += 1) - { - accu += baseValue->as_uint32[ctr]; - accu -= valueToSubstract->as_uint32[ctr]; - - out->as_uint32[ctr] = (uint32)accu; - accu >>= 32; - } - accu += out->as_uint32[7]; - out->as_uint32[7] = (uint32)accu; -} - -static void -fe25519_add( - fe25519* out, - const fe25519* baseValue, - const fe25519* valueToAdd -) -{ - uint16 ctr = 0; - uint64 accu = 0; - - // We first add the most significant word, so that we may reduce - // "on the fly". - accu = baseValue->as_uint32[7]; - accu += valueToAdd->as_uint32[7]; - out->as_uint32[7] = ((uint32)accu) & 0x7ffffffful; - - accu = ((uint32)(accu >> 31)) * 19; - - for (ctr = 0; ctr < 7; ctr += 1) - { - accu += baseValue->as_uint32[ctr]; - accu += valueToAdd->as_uint32[ctr]; - - out->as_uint32[ctr] = (uint32)accu; - accu >>= 32; - } - accu += out->as_uint32[7]; - out->as_uint32[7] = (uint32)accu; -} - -static void -fe25519_mul( - fe25519* result, - const fe25519* in1, - const fe25519* in2 -) -{ - UN_512bitValue tmp; - - multiply256x256(&tmp, in1, in2); - fe25519_reduceTo256Bits_asm(result,&tmp); -} - -static void -fe25519_square( - fe25519* result, - const fe25519* in -) -{ - UN_512bitValue tmp; - - square256(&tmp, in); - fe25519_reduceTo256Bits_asm(result,&tmp); -} - -static void -fe25519_reduceCompletely( - fe25519* inout -) -{ - uint32 numberOfTimesToSubstractPrime; - uint32 initialGuessForNumberOfTimesToSubstractPrime = inout->as_uint32[7] >> - 31; - uint64 accu; - uint8 ctr; - - // add one additional 19 to the estimated number of reductions. - // Do the calculation without writing back the results to memory. - // - // The initial guess of required numbers of reductions is based - // on bit #32 of the most significant word. - // This initial guess may be wrong, since we might have a value - // v in the range - // 2^255 - 19 <= v < 2^255 - // . After adding 19 to the value, we will be having the correct - // Number of required subtractions. - accu = initialGuessForNumberOfTimesToSubstractPrime * 19 + 19; - - for (ctr = 0; ctr < 7; ctr++) - { - accu += inout->as_uint32[ctr]; - accu >>= 32; - } - accu += inout->as_uint32[7]; - - numberOfTimesToSubstractPrime = (uint32)(accu >> 31); - - // Do the reduction. - accu = numberOfTimesToSubstractPrime * 19; - - for (ctr = 0; ctr < 7; ctr++) - { - accu += inout->as_uint32[ctr]; - inout->as_uint32[ctr] = (uint32)accu; - accu >>= 32; - } - accu += inout->as_uint32[7]; - inout->as_uint32[7] = accu & 0x7ffffffful; -} - -/// We are already using a packed radix 16 representation for fe25519. The real use for this function -/// is for architectures that use more bits for storing a fe25519 in a representation where multiplication -/// may be calculated more efficiently. -/// Here we simply copy the data. -static void -fe25519_pack( - unsigned char out[32], - fe25519* in -) -{ - fe25519_reduceCompletely(in); - - memcpy(out, in, 32); -} - -// Note, that r and x are allowed to overlap! -static void -fe25519_invert_useProvidedScratchBuffers( - fe25519* r, - const fe25519* x, - fe25519* t0, - fe25519* t1, - fe25519* t2 -) -{ - fe25519 *z11 = r; // store z11 in r (in order to save one temporary). - fe25519 *z2_10_0 = t1; - fe25519 *z2_50_0 = t2; - fe25519 *z2_100_0 = z2_10_0; - - uint8 i; - - { - fe25519 *z2 = z2_50_0; - - /* 2 */ fe25519_square(z2, x); - /* 4 */ fe25519_square(t0, z2); - /* 8 */ fe25519_square(t0, t0); - /* 9 */ fe25519_mul(z2_10_0, t0, x); - /* 11 */ fe25519_mul(z11, z2_10_0, z2); - - // z2 is dead. - } - - /* 22 */ fe25519_square(t0, z11); - /* 2^5 - 2^0 = 31 */ fe25519_mul(z2_10_0, t0, z2_10_0); - - /* 2^6 - 2^1 */ fe25519_square(t0, z2_10_0); - /* 2^7 - 2^2 */ fe25519_square(t0, t0); - /* 2^8 - 2^3 */ fe25519_square(t0, t0); - /* 2^9 - 2^4 */ fe25519_square(t0, t0); - /* 2^10 - 2^5 */ fe25519_square(t0, t0); - /* 2^10 - 2^0 */ fe25519_mul(z2_10_0, t0, z2_10_0); - - /* 2^11 - 2^1 */ fe25519_square(t0, z2_10_0); - - /* 2^20 - 2^10 */ for (i = 1; i < 10; i ++) - { - fe25519_square(t0, t0); - } - /* 2^20 - 2^0 */ fe25519_mul(z2_50_0, t0, z2_10_0); - - /* 2^21 - 2^1 */ fe25519_square(t0, z2_50_0); - - /* 2^40 - 2^20 */ for (i = 1; i < 20; i ++) - { - fe25519_square(t0, t0); - } - /* 2^40 - 2^0 */ fe25519_mul(t0, t0, z2_50_0); - - /* 2^41 - 2^1 */ fe25519_square(t0, t0); - - /* 2^50 - 2^10 */ for (i = 1; i < 10; i ++) - { - fe25519_square(t0, t0); - } - /* 2^50 - 2^0 */ fe25519_mul(z2_50_0, t0, z2_10_0); - - /* 2^51 - 2^1 */ fe25519_square(t0, z2_50_0); - - /* 2^100 - 2^50 */ for (i = 1; i < 50; i ++) - { - fe25519_square(t0, t0); - } - /* 2^100 - 2^0 */ fe25519_mul(z2_100_0, t0, z2_50_0); - - /* 2^101 - 2^1 */ fe25519_square(t0, z2_100_0); - - /* 2^200 - 2^100 */ for (i = 1; i < 100; i ++) - { - fe25519_square(t0, t0); - } - /* 2^200 - 2^0 */ fe25519_mul(t0, t0, z2_100_0); - - /* 2^250 - 2^50 */ for (i = 0; i < 50; i ++) - { - fe25519_square(t0, t0); - } - /* 2^250 - 2^0 */ fe25519_mul(t0, t0, z2_50_0); - - /* 2^255 - 2^5 */ for (i = 0; i < 5; i ++) - { - fe25519_square(t0, t0); - } - /* 2^255 - 21 */ fe25519_mul(r, t0, z11); -} - -static void -fe25519_setzero( - fe25519* out -) -{ - uint8 ctr; - - for (ctr = 0; ctr < 8; ctr++) - { - out->as_uint32[ctr] = 0; - } -} - -static void -fe25519_setone( - fe25519* out -) -{ - uint8 ctr; - - out->as_uint32[0] = 1; - - for (ctr = 1; ctr < 8; ctr++) - { - out->as_uint32[ctr] = 0; - } -} - -static void -fe25519_cswap( - fe25519* in1, - fe25519* in2, - int condition -) -{ - int32 mask = condition; - uint32 ctr; - - mask = -mask; - - for (ctr = 0; ctr < 8; ctr++) - { - uint32 val1 = in1->as_uint32[ctr]; - uint32 val2 = in2->as_uint32[ctr]; - uint32 temp = val1; - - val1 ^= mask & (val2 ^ val1); - val2 ^= mask & (val2 ^ temp); - - - in1->as_uint32[ctr] = val1; - in2->as_uint32[ctr] = val2; - } -} - -// **************************************************** -// Scalarmultiplication implementation. -// **************************************************** - -typedef struct _ST_curve25519ladderstepWorkingState -{ - // The base point in affine coordinates - fe25519 x0; - - // The two working points p, q, in projective coordinates. Possibly randomized. - fe25519 xp; - fe25519 zp; - fe25519 xq; - fe25519 zq; - - UN_256bitValue s; - - int nextScalarBitToProcess; - uint8 previousProcessedBit; -} ST_curve25519ladderstepWorkingState; - -static void -curve25519_ladderstep( - ST_curve25519ladderstepWorkingState* pState -) -{ - // Implements the "ladd-1987-m-3" differential-addition-and-doubling formulas - // Source: 1987 Montgomery "Speeding the Pollard and elliptic curve methods of factorization", page 261, - // fifth and sixth displays, plus common-subexpression elimination. - // - // Notation from the explicit formulas database: - // (X2,Z2) corresponds to (xp,zp), - // (X3,Z3) corresponds to (xq,zq) - // Result (X4,Z4) (X5,Z5) expected in (xp,zp) and (xq,zq) - // - // A = X2+Z2; AA = A^2; B = X2-Z2; BB = B^2; E = AA-BB; C = X3+Z3; D = X3-Z3; - // DA = D*A; CB = C*B; t0 = DA+CB; t1 = t0^2; X5 = Z1*t1; t2 = DA-CB; - // t3 = t2^2; Z5 = X1*t3; X4 = AA*BB; t4 = a24*E; t5 = BB+t4; Z4 = E*t5 ; - // - // Re-Ordered for using less temporaries. - - fe25519 t1, t2; - - fe25519 *b1=&pState->xp; fe25519 *b2=&pState->zp; - fe25519 *b3=&pState->xq; fe25519 *b4=&pState->zq; - - fe25519 *b5= &t1; fe25519 *b6=&t2; - - fe25519_add(b5,b1,b2); // A = X2+Z2 - fe25519_sub(b6,b1,b2); // B = X2-Z2 - fe25519_add(b1,b3,b4); // C = X3+Z3 - fe25519_sub(b2,b3,b4); // D = X3-Z3 - fe25519_mul(b3,b2,b5); // DA= D*A - fe25519_mul(b2,b1,b6); // CB= C*B - fe25519_add(b1,b2,b3); // T0= DA+CB - fe25519_sub(b4,b3,b2); // T2= DA-CB - fe25519_square(b3,b1); // X5==T1= T0^2 - fe25519_square(b1,b4); // T3= t2^2 - fe25519_mul(b4,b1,&pState->x0); // Z5=X1*t3 - fe25519_square(b1,b5); // AA=A^2 - fe25519_square(b5,b6); // BB=B^2 - fe25519_sub(b2,b1,b5); // E=AA-BB - fe25519_mul(b1,b5,b1); // X4= AA*BB - fe25519_mpyWith121666 (b6,b2); // T4 = a24*E - fe25519_add(b6,b6,b5); // T5 = BB + t4 - fe25519_mul(b2,b6,b2); // Z4 = E*t5 -} - -static void -curve25519_cswap( - ST_curve25519ladderstepWorkingState* state, - uint8 b -) -{ - fe25519_cswap (&state->xp, &state->xq,b); - fe25519_cswap (&state->zp, &state->zq,b); -} - -void -x25519_scalar_mult( - uint8_t r[32], - const uint8_t s[32], - const uint8_t p[32] -) -{ - ST_curve25519ladderstepWorkingState state; - unsigned char i; - - - // Prepare the scalar within the working state buffer. - for (i = 0; i < 32; i++) - { - state.s.as_uint8 [i] = s[i]; - } - state.s.as_uint8 [0] &= 248; - state.s.as_uint8 [31] &= 127; - state.s.as_uint8 [31] |= 64; - - // Copy the affine x-axis of the base point to the state. - fe25519_unpack (&state.x0, p); - - // Prepare the working points within the working state struct. - - fe25519_setone (&state.zq); - fe25519_cpy (&state.xq, &state.x0); - - fe25519_setone(&state.xp); - fe25519_setzero(&state.zp); - - state.nextScalarBitToProcess = 254; - - state.previousProcessedBit = 0; - - // Process all the bits except for the last three where we explicitly double the result. - while (state.nextScalarBitToProcess >= 0) - { - uint8 byteNo = state.nextScalarBitToProcess >> 3; - uint8 bitNo = state.nextScalarBitToProcess & 7; - uint8 bit; - uint8 swap; - - bit = 1 & (state.s.as_uint8 [byteNo] >> bitNo); - swap = bit ^ state.previousProcessedBit; - state.previousProcessedBit = bit; - curve25519_cswap(&state, swap); - curve25519_ladderstep(&state); - state.nextScalarBitToProcess --; - } - - curve25519_cswap(&state,state.previousProcessedBit); - - // optimize for stack usage. - fe25519_invert_useProvidedScratchBuffers (&state.zp, &state.zp, &state.xq, &state.zq, &state.x0); - fe25519_mul(&state.xp, &state.xp, &state.zp); - fe25519_reduceCompletely(&state.xp); - - fe25519_pack (r, &state.xp); -} diff --git a/third_party/unacl-curve25519/core/cortex-m0/curve25519/sqr.S b/third_party/unacl-curve25519/core/cortex-m0/curve25519/sqr.S deleted file mode 100644 index b62121adb7..0000000000 --- a/third_party/unacl-curve25519/core/cortex-m0/curve25519/sqr.S +++ /dev/null @@ -1,1164 +0,0 @@ -// Author: Ana Helena Sánchez, Björn Haase (second implementation). -// -// public domain -// - - .align 2 - .global square256_asm - .type square256_asm, %function -square256_asm: -// ###################### -// ASM Square 256 refined karatsuba: -// ###################### - // sqr 256 Refined Karatsuba - // pInput in r1 - // pResult in r0 - // adheres to arm eabi calling convention. - push {r1,r4,r5,r6,r7,r14} - .syntax unified - mov r3,r8 - mov r4,r9 - mov r5,r10 - mov r6,r11 - mov r7,r12 - .syntax divided - push {r3,r4,r5,r6,r7} - .syntax unified - mov r14,r0 - .syntax divided - ldm r1!,{r4,r5,r6,r7} - // sqr 128 Refined Karatsuba - // Input in r4 ... r7 - // Result in r0 ... r7 - // clobbers all registers except for r14 - .syntax unified - mov r0,r4 - mov r1,r5 - .syntax divided - sub r0,r6 - sbc r1,r7 - sbc r2,r2 - eor r0,r2 - eor r1,r2 - sub r0,r2 - sbc r1,r2 - .syntax unified - mov r8,r0 - mov r9,r1 - mov r10,r6 - .syntax divided - // START: sqr 64 Refined Karatsuba - // Input operands in r4,r5 - // Result in r0,r1,r2,r3 - // Clobbers: r4-r6 - // START: sqr 32 - // Input operand in r4 - // Result in r0 ,r1 - // Clobbers: r2, r3 - uxth r0,r4 - lsr r1,r4,#16 - .syntax unified - mov r2,r0 - .syntax divided - mul r2,r1 - mul r0,r0 - mul r1,r1 - lsr r3,r2,#15 - lsl r2,r2,#17 - add r0,r2 - adc r1,r3 - // End: sqr 32 - // Result in r0 ,r1 - sub r4,r5 - sbc r6,r6 - eor r4,r6 - sub r4,r6 - // START: sqr 32 - // Input operand in r5 - // Result in r2 ,r3 - // Clobbers: r5, r6 - uxth r2,r5 - lsr r3,r5,#16 - .syntax unified - mov r5,r2 - .syntax divided - mul r5,r3 - mul r2,r2 - mul r3,r3 - lsr r6,r5,#15 - lsl r5,r5,#17 - add r2,r5 - adc r3,r6 - // End: sqr 32 - // Result in r2 ,r3 - mov r6,#0 - add r2,r1 - adc r3,r6 - // START: sqr 32 - // Input operand in r4 - // Result in r4 ,r5 - // Clobbers: r1, r6 - lsr r5,r4,#16 - uxth r4,r4 - .syntax unified - mov r1,r4 - .syntax divided - mul r1,r5 - mul r4,r4 - mul r5,r5 - lsr r6,r1,#15 - lsl r1,r1,#17 - add r4,r1 - adc r5,r6 - // End: sqr 32 - // Result in r4 ,r5 - .syntax unified - mov r1,r2 - .syntax divided - sub r1,r4 - sbc r2,r5 - .syntax unified - mov r5,r3 - .syntax divided - mov r6,#0 - sbc r3,r6 - add r1,r0 - adc r2,r5 - adc r3,r6 - // END: sqr 64 Refined Karatsuba - // Result in r0,r1,r2,r3 - // Leaves r6 zero. - .syntax unified - mov r6,r10 - mov r10,r0 - mov r11,r1 - mov r12,r2 - mov r1,r3 - .syntax divided - // START: sqr 64 Refined Karatsuba - // Input operands in r6,r7 - // Result in r2,r3,r4,r5 - // Clobbers: r0,r7,r6 - // START: sqr 32 - // Input operand in r6 - // Result in r2 ,r3 - // Clobbers: r4, r5 - uxth r2,r6 - lsr r3,r6,#16 - .syntax unified - mov r4,r2 - .syntax divided - mul r4,r3 - mul r2,r2 - mul r3,r3 - lsr r5,r4,#15 - lsl r4,r4,#17 - add r2,r4 - adc r3,r5 - // End: sqr 32 - // Result in r2 ,r3 - sub r6,r7 - sbc r4,r4 - eor r6,r4 - sub r6,r4 - // START: sqr 32 - // Input operand in r7 - // Result in r4 ,r5 - // Clobbers: r0, r7 - uxth r4,r7 - lsr r5,r7,#16 - .syntax unified - mov r0,r4 - .syntax divided - mul r0,r5 - mul r4,r4 - mul r5,r5 - lsr r7,r0,#15 - lsl r0,r0,#17 - add r4,r0 - adc r5,r7 - // End: sqr 32 - // Result in r4 ,r5 - mov r7,#0 - add r4,r3 - adc r5,r7 - // START: sqr 32 - // Input operand in r6 - // Result in r7 ,r0 - // Clobbers: r6, r3 - uxth r7,r6 - lsr r0,r6,#16 - .syntax unified - mov r6,r7 - .syntax divided - mul r6,r0 - mul r7,r7 - mul r0,r0 - lsr r3,r6,#15 - lsl r6,r6,#17 - add r7,r6 - adc r0,r3 - // End: sqr 32 - // Result in r7 ,r0 - .syntax unified - mov r3,r4 - .syntax divided - sub r3,r7 - sbc r4,r0 - .syntax unified - mov r0,r5 - .syntax divided - mov r6,#0 - sbc r5,r6 - add r3,r2 - adc r4,r0 - adc r5,r6 - // END: sqr 64 Refined Karatsuba - // Result in r2,r3,r4,r5 - // Leaves r6 zero. - .syntax unified - mov r0,r12 - .syntax divided - add r2,r0 - adc r3,r1 - adc r4,r6 - adc r5,r6 - .syntax unified - mov r12,r2 - mov r2,r8 - mov r8,r3 - mov r3,r9 - mov r9,r4 - .syntax divided - // START: sqr 64 Refined Karatsuba - // Input operands in r2,r3 - // Result in r6,r7,r0,r1 - // Clobbers: r2,r3,r4 - // START: sqr 32 - // Input operand in r2 - // Result in r6 ,r7 - // Clobbers: r0, r1 - uxth r6,r2 - lsr r7,r2,#16 - .syntax unified - mov r0,r6 - .syntax divided - mul r0,r7 - mul r6,r6 - mul r7,r7 - lsr r1,r0,#15 - lsl r0,r0,#17 - add r6,r0 - adc r7,r1 - // End: sqr 32 - // Result in r6 ,r7 - sub r2,r3 - sbc r4,r4 - eor r2,r4 - sub r2,r4 - // START: sqr 32 - // Input operand in r3 - // Result in r0 ,r1 - // Clobbers: r3, r4 - uxth r0,r3 - lsr r1,r3,#16 - .syntax unified - mov r3,r0 - .syntax divided - mul r3,r1 - mul r0,r0 - mul r1,r1 - lsr r4,r3,#15 - lsl r3,r3,#17 - add r0,r3 - adc r1,r4 - // End: sqr 32 - // Result in r0 ,r1 - mov r4,#0 - add r0,r7 - adc r1,r4 - // START: sqr 32 - // Input operand in r2 - // Result in r3 ,r4 - // Clobbers: r2, r7 - uxth r3,r2 - lsr r4,r2,#16 - .syntax unified - mov r2,r3 - .syntax divided - mul r2,r4 - mul r3,r3 - mul r4,r4 - lsr r7,r2,#15 - lsl r2,r2,#17 - add r3,r2 - adc r4,r7 - // End: sqr 32 - // Result in r3 ,r4 - .syntax unified - mov r7,r0 - .syntax divided - sub r7,r3 - sbc r0,r4 - .syntax unified - mov r2,r1 - .syntax divided - mov r4,#0 - sbc r1,r4 - add r7,r6 - adc r0,r2 - adc r1,r4 - // END: sqr 64 Refined Karatsuba - // Result in r6,r7,r0,r1 - // Returns r4 as zero. - .syntax unified - mov r2,r12 - mov r3,r8 - mov r4,r9 - .syntax divided - sub r2,r6 - sbc r3,r7 - .syntax unified - mov r6,r4 - mov r7,r5 - .syntax divided - sbc r4,r0 - sbc r5,r1 - mov r0,#0 - sbc r6,r0 - sbc r7,r0 - .syntax unified - mov r0,r10 - .syntax divided - add r2,r0 - .syntax unified - mov r1,r11 - .syntax divided - adc r3,r1 - .syntax unified - mov r0,r12 - .syntax divided - adc r4,r0 - .syntax unified - mov r0,r8 - .syntax divided - adc r5,r0 - mov r0,#0 - adc r6,r0 - adc r7,r0 - .syntax unified - mov r0,r10 - .syntax divided - // END: sqr 128 Refined Karatsuba - // Result in r0 ... r7 - push {r4,r5,r6,r7} - .syntax unified - mov r4,r14 - .syntax divided - stm r4!,{r0,r1,r2,r3} - ldr r4,[SP,#36] - add r4,#16 - ldm r4,{r4,r5,r6,r7} - // sqr 128 Refined Karatsuba - // Input in r4 ... r7 - // Result in r0 ... r7 - // clobbers all registers except for r14 - .syntax unified - mov r0,r4 - mov r1,r5 - .syntax divided - sub r0,r6 - sbc r1,r7 - sbc r2,r2 - eor r0,r2 - eor r1,r2 - sub r0,r2 - sbc r1,r2 - .syntax unified - mov r8,r0 - mov r9,r1 - mov r10,r6 - .syntax divided - // START: sqr 64 Refined Karatsuba - // Input operands in r4,r5 - // Result in r0,r1,r2,r3 - // Clobbers: r4-r6 - // START: sqr 32 - // Input operand in r4 - // Result in r0 ,r1 - // Clobbers: r2, r3 - uxth r0,r4 - lsr r1,r4,#16 - .syntax unified - mov r2,r0 - .syntax divided - mul r2,r1 - mul r0,r0 - mul r1,r1 - lsr r3,r2,#15 - lsl r2,r2,#17 - add r0,r2 - adc r1,r3 - // End: sqr 32 - // Result in r0 ,r1 - sub r4,r5 - sbc r6,r6 - eor r4,r6 - sub r4,r6 - // START: sqr 32 - // Input operand in r5 - // Result in r2 ,r3 - // Clobbers: r5, r6 - uxth r2,r5 - lsr r3,r5,#16 - .syntax unified - mov r5,r2 - .syntax divided - mul r5,r3 - mul r2,r2 - mul r3,r3 - lsr r6,r5,#15 - lsl r5,r5,#17 - add r2,r5 - adc r3,r6 - // End: sqr 32 - // Result in r2 ,r3 - mov r6,#0 - add r2,r1 - adc r3,r6 - // START: sqr 32 - // Input operand in r4 - // Result in r4 ,r5 - // Clobbers: r1, r6 - lsr r5,r4,#16 - uxth r4,r4 - .syntax unified - mov r1,r4 - .syntax divided - mul r1,r5 - mul r4,r4 - mul r5,r5 - lsr r6,r1,#15 - lsl r1,r1,#17 - add r4,r1 - adc r5,r6 - // End: sqr 32 - // Result in r4 ,r5 - .syntax unified - mov r1,r2 - .syntax divided - sub r1,r4 - sbc r2,r5 - .syntax unified - mov r5,r3 - .syntax divided - mov r6,#0 - sbc r3,r6 - add r1,r0 - adc r2,r5 - adc r3,r6 - // END: sqr 64 Refined Karatsuba - // Result in r0,r1,r2,r3 - // Leaves r6 zero. - .syntax unified - mov r6,r10 - mov r10,r0 - mov r11,r1 - mov r12,r2 - mov r1,r3 - .syntax divided - // START: sqr 64 Refined Karatsuba - // Input operands in r6,r7 - // Result in r2,r3,r4,r5 - // Clobbers: r0,r7,r6 - // START: sqr 32 - // Input operand in r6 - // Result in r2 ,r3 - // Clobbers: r4, r5 - uxth r2,r6 - lsr r3,r6,#16 - .syntax unified - mov r4,r2 - .syntax divided - mul r4,r3 - mul r2,r2 - mul r3,r3 - lsr r5,r4,#15 - lsl r4,r4,#17 - add r2,r4 - adc r3,r5 - // End: sqr 32 - // Result in r2 ,r3 - sub r6,r7 - sbc r4,r4 - eor r6,r4 - sub r6,r4 - // START: sqr 32 - // Input operand in r7 - // Result in r4 ,r5 - // Clobbers: r0, r7 - uxth r4,r7 - lsr r5,r7,#16 - .syntax unified - mov r0,r4 - .syntax divided - mul r0,r5 - mul r4,r4 - mul r5,r5 - lsr r7,r0,#15 - lsl r0,r0,#17 - add r4,r0 - adc r5,r7 - // End: sqr 32 - // Result in r4 ,r5 - mov r7,#0 - add r4,r3 - adc r5,r7 - // START: sqr 32 - // Input operand in r6 - // Result in r7 ,r0 - // Clobbers: r6, r3 - uxth r7,r6 - lsr r0,r6,#16 - .syntax unified - mov r6,r7 - .syntax divided - mul r6,r0 - mul r7,r7 - mul r0,r0 - lsr r3,r6,#15 - lsl r6,r6,#17 - add r7,r6 - adc r0,r3 - // End: sqr 32 - // Result in r7 ,r0 - .syntax unified - mov r3,r4 - .syntax divided - sub r3,r7 - sbc r4,r0 - .syntax unified - mov r0,r5 - .syntax divided - mov r6,#0 - sbc r5,r6 - add r3,r2 - adc r4,r0 - adc r5,r6 - // END: sqr 64 Refined Karatsuba - // Result in r2,r3,r4,r5 - // Leaves r6 zero. - .syntax unified - mov r0,r12 - .syntax divided - add r2,r0 - adc r3,r1 - adc r4,r6 - adc r5,r6 - .syntax unified - mov r12,r2 - mov r2,r8 - mov r8,r3 - mov r3,r9 - mov r9,r4 - .syntax divided - // START: sqr 64 Refined Karatsuba - // Input operands in r2,r3 - // Result in r6,r7,r0,r1 - // Clobbers: r2,r3,r4 - // START: sqr 32 - // Input operand in r2 - // Result in r6 ,r7 - // Clobbers: r0, r1 - uxth r6,r2 - lsr r7,r2,#16 - .syntax unified - mov r0,r6 - .syntax divided - mul r0,r7 - mul r6,r6 - mul r7,r7 - lsr r1,r0,#15 - lsl r0,r0,#17 - add r6,r0 - adc r7,r1 - // End: sqr 32 - // Result in r6 ,r7 - sub r2,r3 - sbc r4,r4 - eor r2,r4 - sub r2,r4 - // START: sqr 32 - // Input operand in r3 - // Result in r0 ,r1 - // Clobbers: r3, r4 - uxth r0,r3 - lsr r1,r3,#16 - .syntax unified - mov r3,r0 - .syntax divided - mul r3,r1 - mul r0,r0 - mul r1,r1 - lsr r4,r3,#15 - lsl r3,r3,#17 - add r0,r3 - adc r1,r4 - // End: sqr 32 - // Result in r0 ,r1 - mov r4,#0 - add r0,r7 - adc r1,r4 - // START: sqr 32 - // Input operand in r2 - // Result in r3 ,r4 - // Clobbers: r2, r7 - uxth r3,r2 - lsr r4,r2,#16 - .syntax unified - mov r2,r3 - .syntax divided - mul r2,r4 - mul r3,r3 - mul r4,r4 - lsr r7,r2,#15 - lsl r2,r2,#17 - add r3,r2 - adc r4,r7 - // End: sqr 32 - // Result in r3 ,r4 - .syntax unified - mov r7,r0 - .syntax divided - sub r7,r3 - sbc r0,r4 - .syntax unified - mov r2,r1 - .syntax divided - mov r4,#0 - sbc r1,r4 - add r7,r6 - adc r0,r2 - adc r1,r4 - // END: sqr 64 Refined Karatsuba - // Result in r6,r7,r0,r1 - // Returns r4 as zero. - .syntax unified - mov r2,r12 - mov r3,r8 - mov r4,r9 - .syntax divided - sub r2,r6 - sbc r3,r7 - .syntax unified - mov r6,r4 - mov r7,r5 - .syntax divided - sbc r4,r0 - sbc r5,r1 - mov r0,#0 - sbc r6,r0 - sbc r7,r0 - .syntax unified - mov r0,r10 - .syntax divided - add r2,r0 - .syntax unified - mov r1,r11 - .syntax divided - adc r3,r1 - .syntax unified - mov r0,r12 - .syntax divided - adc r4,r0 - .syntax unified - mov r0,r8 - .syntax divided - adc r5,r0 - mov r0,#0 - adc r6,r0 - adc r7,r0 - .syntax unified - mov r0,r10 - .syntax divided - // END: sqr 128 Refined Karatsuba - // Result in r0 ... r7 - .syntax unified - mov r8,r4 - mov r9,r5 - mov r10,r6 - mov r11,r7 - .syntax divided - pop {r4,r5,r6,r7} - add r0,r4 - adc r1,r5 - adc r2,r6 - adc r3,r7 - .syntax unified - mov r4,r8 - mov r5,r9 - mov r6,r10 - mov r7,r11 - mov r8,r0 - .syntax divided - mov r0,#0 - adc r4,r0 - adc r5,r0 - adc r6,r0 - adc r7,r0 - .syntax unified - mov r0,r8 - .syntax divided - push {r0,r1,r2,r3,r4,r5,r6,r7} - ldr r4,[SP,#52] - ldm r4,{r0,r1,r2,r3,r4,r5,r6,r7} - sub r4,r0 - sbc r5,r1 - sbc r6,r2 - sbc r7,r3 - sbc r0,r0 - eor r4,r0 - eor r5,r0 - eor r6,r0 - eor r7,r0 - sub r4,r0 - sbc r5,r0 - sbc r6,r0 - sbc r7,r0 - // sqr 128 Refined Karatsuba - // Input in r4 ... r7 - // Result in r0 ... r7 - // clobbers all registers except for r14 - .syntax unified - mov r0,r4 - mov r1,r5 - .syntax divided - sub r0,r6 - sbc r1,r7 - sbc r2,r2 - eor r0,r2 - eor r1,r2 - sub r0,r2 - sbc r1,r2 - .syntax unified - mov r8,r0 - mov r9,r1 - mov r10,r6 - .syntax divided - // START: sqr 64 Refined Karatsuba - // Input operands in r4,r5 - // Result in r0,r1,r2,r3 - // Clobbers: r4-r6 - // START: sqr 32 - // Input operand in r4 - // Result in r0 ,r1 - // Clobbers: r2, r3 - uxth r0,r4 - lsr r1,r4,#16 - .syntax unified - mov r2,r0 - .syntax divided - mul r2,r1 - mul r0,r0 - mul r1,r1 - lsr r3,r2,#15 - lsl r2,r2,#17 - add r0,r2 - adc r1,r3 - // End: sqr 32 - // Result in r0 ,r1 - sub r4,r5 - sbc r6,r6 - eor r4,r6 - sub r4,r6 - // START: sqr 32 - // Input operand in r5 - // Result in r2 ,r3 - // Clobbers: r5, r6 - uxth r2,r5 - lsr r3,r5,#16 - .syntax unified - mov r5,r2 - .syntax divided - mul r5,r3 - mul r2,r2 - mul r3,r3 - lsr r6,r5,#15 - lsl r5,r5,#17 - add r2,r5 - adc r3,r6 - // End: sqr 32 - // Result in r2 ,r3 - mov r6,#0 - add r2,r1 - adc r3,r6 - // START: sqr 32 - // Input operand in r4 - // Result in r4 ,r5 - // Clobbers: r1, r6 - lsr r5,r4,#16 - uxth r4,r4 - .syntax unified - mov r1,r4 - .syntax divided - mul r1,r5 - mul r4,r4 - mul r5,r5 - lsr r6,r1,#15 - lsl r1,r1,#17 - add r4,r1 - adc r5,r6 - // End: sqr 32 - // Result in r4 ,r5 - .syntax unified - mov r1,r2 - .syntax divided - sub r1,r4 - sbc r2,r5 - .syntax unified - mov r5,r3 - .syntax divided - mov r6,#0 - sbc r3,r6 - add r1,r0 - adc r2,r5 - adc r3,r6 - // END: sqr 64 Refined Karatsuba - // Result in r0,r1,r2,r3 - // Leaves r6 zero. - .syntax unified - mov r6,r10 - mov r10,r0 - mov r11,r1 - mov r12,r2 - mov r1,r3 - .syntax divided - // START: sqr 64 Refined Karatsuba - // Input operands in r6,r7 - // Result in r2,r3,r4,r5 - // Clobbers: r0,r7,r6 - // START: sqr 32 - // Input operand in r6 - // Result in r2 ,r3 - // Clobbers: r4, r5 - uxth r2,r6 - lsr r3,r6,#16 - .syntax unified - mov r4,r2 - .syntax divided - mul r4,r3 - mul r2,r2 - mul r3,r3 - lsr r5,r4,#15 - lsl r4,r4,#17 - add r2,r4 - adc r3,r5 - // End: sqr 32 - // Result in r2 ,r3 - sub r6,r7 - sbc r4,r4 - eor r6,r4 - sub r6,r4 - // START: sqr 32 - // Input operand in r7 - // Result in r4 ,r5 - // Clobbers: r0, r7 - uxth r4,r7 - lsr r5,r7,#16 - .syntax unified - mov r0,r4 - .syntax divided - mul r0,r5 - mul r4,r4 - mul r5,r5 - lsr r7,r0,#15 - lsl r0,r0,#17 - add r4,r0 - adc r5,r7 - // End: sqr 32 - // Result in r4 ,r5 - mov r7,#0 - add r4,r3 - adc r5,r7 - // START: sqr 32 - // Input operand in r6 - // Result in r7 ,r0 - // Clobbers: r6, r3 - uxth r7,r6 - lsr r0,r6,#16 - .syntax unified - mov r6,r7 - .syntax divided - mul r6,r0 - mul r7,r7 - mul r0,r0 - lsr r3,r6,#15 - lsl r6,r6,#17 - add r7,r6 - adc r0,r3 - // End: sqr 32 - // Result in r7 ,r0 - .syntax unified - mov r3,r4 - .syntax divided - sub r3,r7 - sbc r4,r0 - .syntax unified - mov r0,r5 - .syntax divided - mov r6,#0 - sbc r5,r6 - add r3,r2 - adc r4,r0 - adc r5,r6 - // END: sqr 64 Refined Karatsuba - // Result in r2,r3,r4,r5 - // Leaves r6 zero. - .syntax unified - mov r0,r12 - .syntax divided - add r2,r0 - adc r3,r1 - adc r4,r6 - adc r5,r6 - .syntax unified - mov r12,r2 - mov r2,r8 - mov r8,r3 - mov r3,r9 - mov r9,r4 - .syntax divided - // START: sqr 64 Refined Karatsuba - // Input operands in r2,r3 - // Result in r6,r7,r0,r1 - // Clobbers: r2,r3,r4 - // START: sqr 32 - // Input operand in r2 - // Result in r6 ,r7 - // Clobbers: r0, r1 - uxth r6,r2 - lsr r7,r2,#16 - .syntax unified - mov r0,r6 - .syntax divided - mul r0,r7 - mul r6,r6 - mul r7,r7 - lsr r1,r0,#15 - lsl r0,r0,#17 - add r6,r0 - adc r7,r1 - // End: sqr 32 - // Result in r6 ,r7 - sub r2,r3 - sbc r4,r4 - eor r2,r4 - sub r2,r4 - // START: sqr 32 - // Input operand in r3 - // Result in r0 ,r1 - // Clobbers: r3, r4 - uxth r0,r3 - lsr r1,r3,#16 - .syntax unified - mov r3,r0 - .syntax divided - mul r3,r1 - mul r0,r0 - mul r1,r1 - lsr r4,r3,#15 - lsl r3,r3,#17 - add r0,r3 - adc r1,r4 - // End: sqr 32 - // Result in r0 ,r1 - mov r4,#0 - add r0,r7 - adc r1,r4 - // START: sqr 32 - // Input operand in r2 - // Result in r3 ,r4 - // Clobbers: r2, r7 - uxth r3,r2 - lsr r4,r2,#16 - .syntax unified - mov r2,r3 - .syntax divided - mul r2,r4 - mul r3,r3 - mul r4,r4 - lsr r7,r2,#15 - lsl r2,r2,#17 - add r3,r2 - adc r4,r7 - // End: sqr 32 - // Result in r3 ,r4 - .syntax unified - mov r7,r0 - .syntax divided - sub r7,r3 - sbc r0,r4 - .syntax unified - mov r2,r1 - .syntax divided - mov r4,#0 - sbc r1,r4 - add r7,r6 - adc r0,r2 - adc r1,r4 - // END: sqr 64 Refined Karatsuba - // Result in r6,r7,r0,r1 - // Returns r4 as zero. - .syntax unified - mov r2,r12 - mov r3,r8 - mov r4,r9 - .syntax divided - sub r2,r6 - sbc r3,r7 - .syntax unified - mov r6,r4 - mov r7,r5 - .syntax divided - sbc r4,r0 - sbc r5,r1 - mov r0,#0 - sbc r6,r0 - sbc r7,r0 - .syntax unified - mov r0,r10 - .syntax divided - add r2,r0 - .syntax unified - mov r1,r11 - .syntax divided - adc r3,r1 - .syntax unified - mov r0,r12 - .syntax divided - adc r4,r0 - .syntax unified - mov r0,r8 - .syntax divided - adc r5,r0 - mov r0,#0 - adc r6,r0 - adc r7,r0 - .syntax unified - mov r0,r10 - .syntax divided - // END: sqr 128 Refined Karatsuba - // Result in r0 ... r7 - mvn r0,r0 - mvn r1,r1 - mvn r2,r2 - mvn r3,r3 - mvn r4,r4 - mvn r5,r5 - mvn r6,r6 - mvn r7,r7 - .syntax unified - mov r8,r4 - mov r9,r5 - mov r10,r6 - mov r11,r7 - .syntax divided - mov r4,#143 - asr r4,r4,#1 - pop {r4,r5,r6,r7} - adc r0,r4 - adc r1,r5 - adc r2,r6 - adc r3,r7 - .syntax unified - mov r12,r4 - .syntax divided - mov r4,#16 - add r4,r14 - stm r4!,{r0,r1,r2,r3} - .syntax unified - mov r4,r12 - mov r0,r8 - .syntax divided - adc r0,r4 - .syntax unified - mov r8,r0 - mov r1,r9 - .syntax divided - adc r1,r5 - .syntax unified - mov r9,r1 - mov r2,r10 - .syntax divided - adc r2,r6 - .syntax unified - mov r10,r2 - mov r3,r11 - .syntax divided - adc r3,r7 - .syntax unified - mov r11,r3 - .syntax divided - mov r0,#0 - adc r0,r0 - .syntax unified - mov r12,r0 - mov r0,r14 - .syntax divided - ldm r0,{r0,r1,r2,r3,r4,r5,r6,r7} - add r0,r4 - adc r1,r5 - adc r2,r6 - adc r3,r7 - mov r4,#16 - add r4,r14 - stm r4!,{r0,r1,r2,r3} - .syntax unified - mov r14,r4 - mov r0,r13 - .syntax divided - ldm r0!,{r4,r5,r6,r7} - .syntax unified - mov r1,r8 - .syntax divided - adc r4,r1 - .syntax unified - mov r1,r9 - .syntax divided - adc r5,r1 - .syntax unified - mov r1,r10 - .syntax divided - adc r6,r1 - .syntax unified - mov r1,r11 - .syntax divided - adc r7,r1 - .syntax unified - mov r0,r14 - .syntax divided - stm r0!,{r4,r5,r6,r7} - pop {r4,r5,r6,r7} - .syntax unified - mov r1,r12 - .syntax divided - mov r2,#0 - mvn r2,r2 - adc r1,r2 - asr r2,r1,#4 - add r4,r1 - adc r5,r2 - adc r6,r2 - adc r7,r2 - stm r0!,{r4,r5,r6,r7} - pop {r3,r4,r5,r6,r7} - .syntax unified - mov r8,r3 - mov r9,r4 - mov r10,r5 - mov r11,r6 - mov r12,r7 - .syntax divided - pop {r0,r4,r5,r6,r7,r15} -//Cycle Count ASM-Version of 256 sqr (Refined Karatsuba) (Cortex M0): 793 (697 instructions). - .size square256_asm, .-square256_asm |