diff options
Diffstat (limited to 'third_party/unacl-curve25519/core/cortex-m0/curve25519/sqr.S')
-rw-r--r-- | third_party/unacl-curve25519/core/cortex-m0/curve25519/sqr.S | 1164 |
1 files changed, 0 insertions, 1164 deletions
diff --git a/third_party/unacl-curve25519/core/cortex-m0/curve25519/sqr.S b/third_party/unacl-curve25519/core/cortex-m0/curve25519/sqr.S deleted file mode 100644 index b62121adb7..0000000000 --- a/third_party/unacl-curve25519/core/cortex-m0/curve25519/sqr.S +++ /dev/null @@ -1,1164 +0,0 @@ -// Author: Ana Helena Sánchez, Björn Haase (second implementation). -// -// public domain -// - - .align 2 - .global square256_asm - .type square256_asm, %function -square256_asm: -// ###################### -// ASM Square 256 refined karatsuba: -// ###################### - // sqr 256 Refined Karatsuba - // pInput in r1 - // pResult in r0 - // adheres to arm eabi calling convention. - push {r1,r4,r5,r6,r7,r14} - .syntax unified - mov r3,r8 - mov r4,r9 - mov r5,r10 - mov r6,r11 - mov r7,r12 - .syntax divided - push {r3,r4,r5,r6,r7} - .syntax unified - mov r14,r0 - .syntax divided - ldm r1!,{r4,r5,r6,r7} - // sqr 128 Refined Karatsuba - // Input in r4 ... r7 - // Result in r0 ... r7 - // clobbers all registers except for r14 - .syntax unified - mov r0,r4 - mov r1,r5 - .syntax divided - sub r0,r6 - sbc r1,r7 - sbc r2,r2 - eor r0,r2 - eor r1,r2 - sub r0,r2 - sbc r1,r2 - .syntax unified - mov r8,r0 - mov r9,r1 - mov r10,r6 - .syntax divided - // START: sqr 64 Refined Karatsuba - // Input operands in r4,r5 - // Result in r0,r1,r2,r3 - // Clobbers: r4-r6 - // START: sqr 32 - // Input operand in r4 - // Result in r0 ,r1 - // Clobbers: r2, r3 - uxth r0,r4 - lsr r1,r4,#16 - .syntax unified - mov r2,r0 - .syntax divided - mul r2,r1 - mul r0,r0 - mul r1,r1 - lsr r3,r2,#15 - lsl r2,r2,#17 - add r0,r2 - adc r1,r3 - // End: sqr 32 - // Result in r0 ,r1 - sub r4,r5 - sbc r6,r6 - eor r4,r6 - sub r4,r6 - // START: sqr 32 - // Input operand in r5 - // Result in r2 ,r3 - // Clobbers: r5, r6 - uxth r2,r5 - lsr r3,r5,#16 - .syntax unified - mov r5,r2 - .syntax divided - mul r5,r3 - mul r2,r2 - mul r3,r3 - lsr r6,r5,#15 - lsl r5,r5,#17 - add r2,r5 - adc r3,r6 - // End: sqr 32 - // Result in r2 ,r3 - mov r6,#0 - add r2,r1 - adc r3,r6 - // START: sqr 32 - // Input operand in r4 - // Result in r4 ,r5 - // Clobbers: r1, r6 - lsr r5,r4,#16 - uxth r4,r4 - .syntax unified - mov r1,r4 - .syntax divided - mul r1,r5 - mul r4,r4 - mul r5,r5 - lsr r6,r1,#15 - lsl r1,r1,#17 - add r4,r1 - adc r5,r6 - // End: sqr 32 - // Result in r4 ,r5 - .syntax unified - mov r1,r2 - .syntax divided - sub r1,r4 - sbc r2,r5 - .syntax unified - mov r5,r3 - .syntax divided - mov r6,#0 - sbc r3,r6 - add r1,r0 - adc r2,r5 - adc r3,r6 - // END: sqr 64 Refined Karatsuba - // Result in r0,r1,r2,r3 - // Leaves r6 zero. - .syntax unified - mov r6,r10 - mov r10,r0 - mov r11,r1 - mov r12,r2 - mov r1,r3 - .syntax divided - // START: sqr 64 Refined Karatsuba - // Input operands in r6,r7 - // Result in r2,r3,r4,r5 - // Clobbers: r0,r7,r6 - // START: sqr 32 - // Input operand in r6 - // Result in r2 ,r3 - // Clobbers: r4, r5 - uxth r2,r6 - lsr r3,r6,#16 - .syntax unified - mov r4,r2 - .syntax divided - mul r4,r3 - mul r2,r2 - mul r3,r3 - lsr r5,r4,#15 - lsl r4,r4,#17 - add r2,r4 - adc r3,r5 - // End: sqr 32 - // Result in r2 ,r3 - sub r6,r7 - sbc r4,r4 - eor r6,r4 - sub r6,r4 - // START: sqr 32 - // Input operand in r7 - // Result in r4 ,r5 - // Clobbers: r0, r7 - uxth r4,r7 - lsr r5,r7,#16 - .syntax unified - mov r0,r4 - .syntax divided - mul r0,r5 - mul r4,r4 - mul r5,r5 - lsr r7,r0,#15 - lsl r0,r0,#17 - add r4,r0 - adc r5,r7 - // End: sqr 32 - // Result in r4 ,r5 - mov r7,#0 - add r4,r3 - adc r5,r7 - // START: sqr 32 - // Input operand in r6 - // Result in r7 ,r0 - // Clobbers: r6, r3 - uxth r7,r6 - lsr r0,r6,#16 - .syntax unified - mov r6,r7 - .syntax divided - mul r6,r0 - mul r7,r7 - mul r0,r0 - lsr r3,r6,#15 - lsl r6,r6,#17 - add r7,r6 - adc r0,r3 - // End: sqr 32 - // Result in r7 ,r0 - .syntax unified - mov r3,r4 - .syntax divided - sub r3,r7 - sbc r4,r0 - .syntax unified - mov r0,r5 - .syntax divided - mov r6,#0 - sbc r5,r6 - add r3,r2 - adc r4,r0 - adc r5,r6 - // END: sqr 64 Refined Karatsuba - // Result in r2,r3,r4,r5 - // Leaves r6 zero. - .syntax unified - mov r0,r12 - .syntax divided - add r2,r0 - adc r3,r1 - adc r4,r6 - adc r5,r6 - .syntax unified - mov r12,r2 - mov r2,r8 - mov r8,r3 - mov r3,r9 - mov r9,r4 - .syntax divided - // START: sqr 64 Refined Karatsuba - // Input operands in r2,r3 - // Result in r6,r7,r0,r1 - // Clobbers: r2,r3,r4 - // START: sqr 32 - // Input operand in r2 - // Result in r6 ,r7 - // Clobbers: r0, r1 - uxth r6,r2 - lsr r7,r2,#16 - .syntax unified - mov r0,r6 - .syntax divided - mul r0,r7 - mul r6,r6 - mul r7,r7 - lsr r1,r0,#15 - lsl r0,r0,#17 - add r6,r0 - adc r7,r1 - // End: sqr 32 - // Result in r6 ,r7 - sub r2,r3 - sbc r4,r4 - eor r2,r4 - sub r2,r4 - // START: sqr 32 - // Input operand in r3 - // Result in r0 ,r1 - // Clobbers: r3, r4 - uxth r0,r3 - lsr r1,r3,#16 - .syntax unified - mov r3,r0 - .syntax divided - mul r3,r1 - mul r0,r0 - mul r1,r1 - lsr r4,r3,#15 - lsl r3,r3,#17 - add r0,r3 - adc r1,r4 - // End: sqr 32 - // Result in r0 ,r1 - mov r4,#0 - add r0,r7 - adc r1,r4 - // START: sqr 32 - // Input operand in r2 - // Result in r3 ,r4 - // Clobbers: r2, r7 - uxth r3,r2 - lsr r4,r2,#16 - .syntax unified - mov r2,r3 - .syntax divided - mul r2,r4 - mul r3,r3 - mul r4,r4 - lsr r7,r2,#15 - lsl r2,r2,#17 - add r3,r2 - adc r4,r7 - // End: sqr 32 - // Result in r3 ,r4 - .syntax unified - mov r7,r0 - .syntax divided - sub r7,r3 - sbc r0,r4 - .syntax unified - mov r2,r1 - .syntax divided - mov r4,#0 - sbc r1,r4 - add r7,r6 - adc r0,r2 - adc r1,r4 - // END: sqr 64 Refined Karatsuba - // Result in r6,r7,r0,r1 - // Returns r4 as zero. - .syntax unified - mov r2,r12 - mov r3,r8 - mov r4,r9 - .syntax divided - sub r2,r6 - sbc r3,r7 - .syntax unified - mov r6,r4 - mov r7,r5 - .syntax divided - sbc r4,r0 - sbc r5,r1 - mov r0,#0 - sbc r6,r0 - sbc r7,r0 - .syntax unified - mov r0,r10 - .syntax divided - add r2,r0 - .syntax unified - mov r1,r11 - .syntax divided - adc r3,r1 - .syntax unified - mov r0,r12 - .syntax divided - adc r4,r0 - .syntax unified - mov r0,r8 - .syntax divided - adc r5,r0 - mov r0,#0 - adc r6,r0 - adc r7,r0 - .syntax unified - mov r0,r10 - .syntax divided - // END: sqr 128 Refined Karatsuba - // Result in r0 ... r7 - push {r4,r5,r6,r7} - .syntax unified - mov r4,r14 - .syntax divided - stm r4!,{r0,r1,r2,r3} - ldr r4,[SP,#36] - add r4,#16 - ldm r4,{r4,r5,r6,r7} - // sqr 128 Refined Karatsuba - // Input in r4 ... r7 - // Result in r0 ... r7 - // clobbers all registers except for r14 - .syntax unified - mov r0,r4 - mov r1,r5 - .syntax divided - sub r0,r6 - sbc r1,r7 - sbc r2,r2 - eor r0,r2 - eor r1,r2 - sub r0,r2 - sbc r1,r2 - .syntax unified - mov r8,r0 - mov r9,r1 - mov r10,r6 - .syntax divided - // START: sqr 64 Refined Karatsuba - // Input operands in r4,r5 - // Result in r0,r1,r2,r3 - // Clobbers: r4-r6 - // START: sqr 32 - // Input operand in r4 - // Result in r0 ,r1 - // Clobbers: r2, r3 - uxth r0,r4 - lsr r1,r4,#16 - .syntax unified - mov r2,r0 - .syntax divided - mul r2,r1 - mul r0,r0 - mul r1,r1 - lsr r3,r2,#15 - lsl r2,r2,#17 - add r0,r2 - adc r1,r3 - // End: sqr 32 - // Result in r0 ,r1 - sub r4,r5 - sbc r6,r6 - eor r4,r6 - sub r4,r6 - // START: sqr 32 - // Input operand in r5 - // Result in r2 ,r3 - // Clobbers: r5, r6 - uxth r2,r5 - lsr r3,r5,#16 - .syntax unified - mov r5,r2 - .syntax divided - mul r5,r3 - mul r2,r2 - mul r3,r3 - lsr r6,r5,#15 - lsl r5,r5,#17 - add r2,r5 - adc r3,r6 - // End: sqr 32 - // Result in r2 ,r3 - mov r6,#0 - add r2,r1 - adc r3,r6 - // START: sqr 32 - // Input operand in r4 - // Result in r4 ,r5 - // Clobbers: r1, r6 - lsr r5,r4,#16 - uxth r4,r4 - .syntax unified - mov r1,r4 - .syntax divided - mul r1,r5 - mul r4,r4 - mul r5,r5 - lsr r6,r1,#15 - lsl r1,r1,#17 - add r4,r1 - adc r5,r6 - // End: sqr 32 - // Result in r4 ,r5 - .syntax unified - mov r1,r2 - .syntax divided - sub r1,r4 - sbc r2,r5 - .syntax unified - mov r5,r3 - .syntax divided - mov r6,#0 - sbc r3,r6 - add r1,r0 - adc r2,r5 - adc r3,r6 - // END: sqr 64 Refined Karatsuba - // Result in r0,r1,r2,r3 - // Leaves r6 zero. - .syntax unified - mov r6,r10 - mov r10,r0 - mov r11,r1 - mov r12,r2 - mov r1,r3 - .syntax divided - // START: sqr 64 Refined Karatsuba - // Input operands in r6,r7 - // Result in r2,r3,r4,r5 - // Clobbers: r0,r7,r6 - // START: sqr 32 - // Input operand in r6 - // Result in r2 ,r3 - // Clobbers: r4, r5 - uxth r2,r6 - lsr r3,r6,#16 - .syntax unified - mov r4,r2 - .syntax divided - mul r4,r3 - mul r2,r2 - mul r3,r3 - lsr r5,r4,#15 - lsl r4,r4,#17 - add r2,r4 - adc r3,r5 - // End: sqr 32 - // Result in r2 ,r3 - sub r6,r7 - sbc r4,r4 - eor r6,r4 - sub r6,r4 - // START: sqr 32 - // Input operand in r7 - // Result in r4 ,r5 - // Clobbers: r0, r7 - uxth r4,r7 - lsr r5,r7,#16 - .syntax unified - mov r0,r4 - .syntax divided - mul r0,r5 - mul r4,r4 - mul r5,r5 - lsr r7,r0,#15 - lsl r0,r0,#17 - add r4,r0 - adc r5,r7 - // End: sqr 32 - // Result in r4 ,r5 - mov r7,#0 - add r4,r3 - adc r5,r7 - // START: sqr 32 - // Input operand in r6 - // Result in r7 ,r0 - // Clobbers: r6, r3 - uxth r7,r6 - lsr r0,r6,#16 - .syntax unified - mov r6,r7 - .syntax divided - mul r6,r0 - mul r7,r7 - mul r0,r0 - lsr r3,r6,#15 - lsl r6,r6,#17 - add r7,r6 - adc r0,r3 - // End: sqr 32 - // Result in r7 ,r0 - .syntax unified - mov r3,r4 - .syntax divided - sub r3,r7 - sbc r4,r0 - .syntax unified - mov r0,r5 - .syntax divided - mov r6,#0 - sbc r5,r6 - add r3,r2 - adc r4,r0 - adc r5,r6 - // END: sqr 64 Refined Karatsuba - // Result in r2,r3,r4,r5 - // Leaves r6 zero. - .syntax unified - mov r0,r12 - .syntax divided - add r2,r0 - adc r3,r1 - adc r4,r6 - adc r5,r6 - .syntax unified - mov r12,r2 - mov r2,r8 - mov r8,r3 - mov r3,r9 - mov r9,r4 - .syntax divided - // START: sqr 64 Refined Karatsuba - // Input operands in r2,r3 - // Result in r6,r7,r0,r1 - // Clobbers: r2,r3,r4 - // START: sqr 32 - // Input operand in r2 - // Result in r6 ,r7 - // Clobbers: r0, r1 - uxth r6,r2 - lsr r7,r2,#16 - .syntax unified - mov r0,r6 - .syntax divided - mul r0,r7 - mul r6,r6 - mul r7,r7 - lsr r1,r0,#15 - lsl r0,r0,#17 - add r6,r0 - adc r7,r1 - // End: sqr 32 - // Result in r6 ,r7 - sub r2,r3 - sbc r4,r4 - eor r2,r4 - sub r2,r4 - // START: sqr 32 - // Input operand in r3 - // Result in r0 ,r1 - // Clobbers: r3, r4 - uxth r0,r3 - lsr r1,r3,#16 - .syntax unified - mov r3,r0 - .syntax divided - mul r3,r1 - mul r0,r0 - mul r1,r1 - lsr r4,r3,#15 - lsl r3,r3,#17 - add r0,r3 - adc r1,r4 - // End: sqr 32 - // Result in r0 ,r1 - mov r4,#0 - add r0,r7 - adc r1,r4 - // START: sqr 32 - // Input operand in r2 - // Result in r3 ,r4 - // Clobbers: r2, r7 - uxth r3,r2 - lsr r4,r2,#16 - .syntax unified - mov r2,r3 - .syntax divided - mul r2,r4 - mul r3,r3 - mul r4,r4 - lsr r7,r2,#15 - lsl r2,r2,#17 - add r3,r2 - adc r4,r7 - // End: sqr 32 - // Result in r3 ,r4 - .syntax unified - mov r7,r0 - .syntax divided - sub r7,r3 - sbc r0,r4 - .syntax unified - mov r2,r1 - .syntax divided - mov r4,#0 - sbc r1,r4 - add r7,r6 - adc r0,r2 - adc r1,r4 - // END: sqr 64 Refined Karatsuba - // Result in r6,r7,r0,r1 - // Returns r4 as zero. - .syntax unified - mov r2,r12 - mov r3,r8 - mov r4,r9 - .syntax divided - sub r2,r6 - sbc r3,r7 - .syntax unified - mov r6,r4 - mov r7,r5 - .syntax divided - sbc r4,r0 - sbc r5,r1 - mov r0,#0 - sbc r6,r0 - sbc r7,r0 - .syntax unified - mov r0,r10 - .syntax divided - add r2,r0 - .syntax unified - mov r1,r11 - .syntax divided - adc r3,r1 - .syntax unified - mov r0,r12 - .syntax divided - adc r4,r0 - .syntax unified - mov r0,r8 - .syntax divided - adc r5,r0 - mov r0,#0 - adc r6,r0 - adc r7,r0 - .syntax unified - mov r0,r10 - .syntax divided - // END: sqr 128 Refined Karatsuba - // Result in r0 ... r7 - .syntax unified - mov r8,r4 - mov r9,r5 - mov r10,r6 - mov r11,r7 - .syntax divided - pop {r4,r5,r6,r7} - add r0,r4 - adc r1,r5 - adc r2,r6 - adc r3,r7 - .syntax unified - mov r4,r8 - mov r5,r9 - mov r6,r10 - mov r7,r11 - mov r8,r0 - .syntax divided - mov r0,#0 - adc r4,r0 - adc r5,r0 - adc r6,r0 - adc r7,r0 - .syntax unified - mov r0,r8 - .syntax divided - push {r0,r1,r2,r3,r4,r5,r6,r7} - ldr r4,[SP,#52] - ldm r4,{r0,r1,r2,r3,r4,r5,r6,r7} - sub r4,r0 - sbc r5,r1 - sbc r6,r2 - sbc r7,r3 - sbc r0,r0 - eor r4,r0 - eor r5,r0 - eor r6,r0 - eor r7,r0 - sub r4,r0 - sbc r5,r0 - sbc r6,r0 - sbc r7,r0 - // sqr 128 Refined Karatsuba - // Input in r4 ... r7 - // Result in r0 ... r7 - // clobbers all registers except for r14 - .syntax unified - mov r0,r4 - mov r1,r5 - .syntax divided - sub r0,r6 - sbc r1,r7 - sbc r2,r2 - eor r0,r2 - eor r1,r2 - sub r0,r2 - sbc r1,r2 - .syntax unified - mov r8,r0 - mov r9,r1 - mov r10,r6 - .syntax divided - // START: sqr 64 Refined Karatsuba - // Input operands in r4,r5 - // Result in r0,r1,r2,r3 - // Clobbers: r4-r6 - // START: sqr 32 - // Input operand in r4 - // Result in r0 ,r1 - // Clobbers: r2, r3 - uxth r0,r4 - lsr r1,r4,#16 - .syntax unified - mov r2,r0 - .syntax divided - mul r2,r1 - mul r0,r0 - mul r1,r1 - lsr r3,r2,#15 - lsl r2,r2,#17 - add r0,r2 - adc r1,r3 - // End: sqr 32 - // Result in r0 ,r1 - sub r4,r5 - sbc r6,r6 - eor r4,r6 - sub r4,r6 - // START: sqr 32 - // Input operand in r5 - // Result in r2 ,r3 - // Clobbers: r5, r6 - uxth r2,r5 - lsr r3,r5,#16 - .syntax unified - mov r5,r2 - .syntax divided - mul r5,r3 - mul r2,r2 - mul r3,r3 - lsr r6,r5,#15 - lsl r5,r5,#17 - add r2,r5 - adc r3,r6 - // End: sqr 32 - // Result in r2 ,r3 - mov r6,#0 - add r2,r1 - adc r3,r6 - // START: sqr 32 - // Input operand in r4 - // Result in r4 ,r5 - // Clobbers: r1, r6 - lsr r5,r4,#16 - uxth r4,r4 - .syntax unified - mov r1,r4 - .syntax divided - mul r1,r5 - mul r4,r4 - mul r5,r5 - lsr r6,r1,#15 - lsl r1,r1,#17 - add r4,r1 - adc r5,r6 - // End: sqr 32 - // Result in r4 ,r5 - .syntax unified - mov r1,r2 - .syntax divided - sub r1,r4 - sbc r2,r5 - .syntax unified - mov r5,r3 - .syntax divided - mov r6,#0 - sbc r3,r6 - add r1,r0 - adc r2,r5 - adc r3,r6 - // END: sqr 64 Refined Karatsuba - // Result in r0,r1,r2,r3 - // Leaves r6 zero. - .syntax unified - mov r6,r10 - mov r10,r0 - mov r11,r1 - mov r12,r2 - mov r1,r3 - .syntax divided - // START: sqr 64 Refined Karatsuba - // Input operands in r6,r7 - // Result in r2,r3,r4,r5 - // Clobbers: r0,r7,r6 - // START: sqr 32 - // Input operand in r6 - // Result in r2 ,r3 - // Clobbers: r4, r5 - uxth r2,r6 - lsr r3,r6,#16 - .syntax unified - mov r4,r2 - .syntax divided - mul r4,r3 - mul r2,r2 - mul r3,r3 - lsr r5,r4,#15 - lsl r4,r4,#17 - add r2,r4 - adc r3,r5 - // End: sqr 32 - // Result in r2 ,r3 - sub r6,r7 - sbc r4,r4 - eor r6,r4 - sub r6,r4 - // START: sqr 32 - // Input operand in r7 - // Result in r4 ,r5 - // Clobbers: r0, r7 - uxth r4,r7 - lsr r5,r7,#16 - .syntax unified - mov r0,r4 - .syntax divided - mul r0,r5 - mul r4,r4 - mul r5,r5 - lsr r7,r0,#15 - lsl r0,r0,#17 - add r4,r0 - adc r5,r7 - // End: sqr 32 - // Result in r4 ,r5 - mov r7,#0 - add r4,r3 - adc r5,r7 - // START: sqr 32 - // Input operand in r6 - // Result in r7 ,r0 - // Clobbers: r6, r3 - uxth r7,r6 - lsr r0,r6,#16 - .syntax unified - mov r6,r7 - .syntax divided - mul r6,r0 - mul r7,r7 - mul r0,r0 - lsr r3,r6,#15 - lsl r6,r6,#17 - add r7,r6 - adc r0,r3 - // End: sqr 32 - // Result in r7 ,r0 - .syntax unified - mov r3,r4 - .syntax divided - sub r3,r7 - sbc r4,r0 - .syntax unified - mov r0,r5 - .syntax divided - mov r6,#0 - sbc r5,r6 - add r3,r2 - adc r4,r0 - adc r5,r6 - // END: sqr 64 Refined Karatsuba - // Result in r2,r3,r4,r5 - // Leaves r6 zero. - .syntax unified - mov r0,r12 - .syntax divided - add r2,r0 - adc r3,r1 - adc r4,r6 - adc r5,r6 - .syntax unified - mov r12,r2 - mov r2,r8 - mov r8,r3 - mov r3,r9 - mov r9,r4 - .syntax divided - // START: sqr 64 Refined Karatsuba - // Input operands in r2,r3 - // Result in r6,r7,r0,r1 - // Clobbers: r2,r3,r4 - // START: sqr 32 - // Input operand in r2 - // Result in r6 ,r7 - // Clobbers: r0, r1 - uxth r6,r2 - lsr r7,r2,#16 - .syntax unified - mov r0,r6 - .syntax divided - mul r0,r7 - mul r6,r6 - mul r7,r7 - lsr r1,r0,#15 - lsl r0,r0,#17 - add r6,r0 - adc r7,r1 - // End: sqr 32 - // Result in r6 ,r7 - sub r2,r3 - sbc r4,r4 - eor r2,r4 - sub r2,r4 - // START: sqr 32 - // Input operand in r3 - // Result in r0 ,r1 - // Clobbers: r3, r4 - uxth r0,r3 - lsr r1,r3,#16 - .syntax unified - mov r3,r0 - .syntax divided - mul r3,r1 - mul r0,r0 - mul r1,r1 - lsr r4,r3,#15 - lsl r3,r3,#17 - add r0,r3 - adc r1,r4 - // End: sqr 32 - // Result in r0 ,r1 - mov r4,#0 - add r0,r7 - adc r1,r4 - // START: sqr 32 - // Input operand in r2 - // Result in r3 ,r4 - // Clobbers: r2, r7 - uxth r3,r2 - lsr r4,r2,#16 - .syntax unified - mov r2,r3 - .syntax divided - mul r2,r4 - mul r3,r3 - mul r4,r4 - lsr r7,r2,#15 - lsl r2,r2,#17 - add r3,r2 - adc r4,r7 - // End: sqr 32 - // Result in r3 ,r4 - .syntax unified - mov r7,r0 - .syntax divided - sub r7,r3 - sbc r0,r4 - .syntax unified - mov r2,r1 - .syntax divided - mov r4,#0 - sbc r1,r4 - add r7,r6 - adc r0,r2 - adc r1,r4 - // END: sqr 64 Refined Karatsuba - // Result in r6,r7,r0,r1 - // Returns r4 as zero. - .syntax unified - mov r2,r12 - mov r3,r8 - mov r4,r9 - .syntax divided - sub r2,r6 - sbc r3,r7 - .syntax unified - mov r6,r4 - mov r7,r5 - .syntax divided - sbc r4,r0 - sbc r5,r1 - mov r0,#0 - sbc r6,r0 - sbc r7,r0 - .syntax unified - mov r0,r10 - .syntax divided - add r2,r0 - .syntax unified - mov r1,r11 - .syntax divided - adc r3,r1 - .syntax unified - mov r0,r12 - .syntax divided - adc r4,r0 - .syntax unified - mov r0,r8 - .syntax divided - adc r5,r0 - mov r0,#0 - adc r6,r0 - adc r7,r0 - .syntax unified - mov r0,r10 - .syntax divided - // END: sqr 128 Refined Karatsuba - // Result in r0 ... r7 - mvn r0,r0 - mvn r1,r1 - mvn r2,r2 - mvn r3,r3 - mvn r4,r4 - mvn r5,r5 - mvn r6,r6 - mvn r7,r7 - .syntax unified - mov r8,r4 - mov r9,r5 - mov r10,r6 - mov r11,r7 - .syntax divided - mov r4,#143 - asr r4,r4,#1 - pop {r4,r5,r6,r7} - adc r0,r4 - adc r1,r5 - adc r2,r6 - adc r3,r7 - .syntax unified - mov r12,r4 - .syntax divided - mov r4,#16 - add r4,r14 - stm r4!,{r0,r1,r2,r3} - .syntax unified - mov r4,r12 - mov r0,r8 - .syntax divided - adc r0,r4 - .syntax unified - mov r8,r0 - mov r1,r9 - .syntax divided - adc r1,r5 - .syntax unified - mov r9,r1 - mov r2,r10 - .syntax divided - adc r2,r6 - .syntax unified - mov r10,r2 - mov r3,r11 - .syntax divided - adc r3,r7 - .syntax unified - mov r11,r3 - .syntax divided - mov r0,#0 - adc r0,r0 - .syntax unified - mov r12,r0 - mov r0,r14 - .syntax divided - ldm r0,{r0,r1,r2,r3,r4,r5,r6,r7} - add r0,r4 - adc r1,r5 - adc r2,r6 - adc r3,r7 - mov r4,#16 - add r4,r14 - stm r4!,{r0,r1,r2,r3} - .syntax unified - mov r14,r4 - mov r0,r13 - .syntax divided - ldm r0!,{r4,r5,r6,r7} - .syntax unified - mov r1,r8 - .syntax divided - adc r4,r1 - .syntax unified - mov r1,r9 - .syntax divided - adc r5,r1 - .syntax unified - mov r1,r10 - .syntax divided - adc r6,r1 - .syntax unified - mov r1,r11 - .syntax divided - adc r7,r1 - .syntax unified - mov r0,r14 - .syntax divided - stm r0!,{r4,r5,r6,r7} - pop {r4,r5,r6,r7} - .syntax unified - mov r1,r12 - .syntax divided - mov r2,#0 - mvn r2,r2 - adc r1,r2 - asr r2,r1,#4 - add r4,r1 - adc r5,r2 - adc r6,r2 - adc r7,r2 - stm r0!,{r4,r5,r6,r7} - pop {r3,r4,r5,r6,r7} - .syntax unified - mov r8,r3 - mov r9,r4 - mov r10,r5 - mov r11,r6 - mov r12,r7 - .syntax divided - pop {r0,r4,r5,r6,r7,r15} -//Cycle Count ASM-Version of 256 sqr (Refined Karatsuba) (Cortex M0): 793 (697 instructions). - .size square256_asm, .-square256_asm |