diff options
Diffstat (limited to 'third_party/unacl-curve25519/core/cortex-m0/curve25519/sqr.S')
-rw-r--r-- | third_party/unacl-curve25519/core/cortex-m0/curve25519/sqr.S | 1164 |
1 files changed, 1164 insertions, 0 deletions
diff --git a/third_party/unacl-curve25519/core/cortex-m0/curve25519/sqr.S b/third_party/unacl-curve25519/core/cortex-m0/curve25519/sqr.S new file mode 100644 index 0000000000..b62121adb7 --- /dev/null +++ b/third_party/unacl-curve25519/core/cortex-m0/curve25519/sqr.S @@ -0,0 +1,1164 @@ +// Author: Ana Helena Sánchez, Björn Haase (second implementation). +// +// public domain +// + + .align 2 + .global square256_asm + .type square256_asm, %function +square256_asm: +// ###################### +// ASM Square 256 refined karatsuba: +// ###################### + // sqr 256 Refined Karatsuba + // pInput in r1 + // pResult in r0 + // adheres to arm eabi calling convention. + push {r1,r4,r5,r6,r7,r14} + .syntax unified + mov r3,r8 + mov r4,r9 + mov r5,r10 + mov r6,r11 + mov r7,r12 + .syntax divided + push {r3,r4,r5,r6,r7} + .syntax unified + mov r14,r0 + .syntax divided + ldm r1!,{r4,r5,r6,r7} + // sqr 128 Refined Karatsuba + // Input in r4 ... r7 + // Result in r0 ... r7 + // clobbers all registers except for r14 + .syntax unified + mov r0,r4 + mov r1,r5 + .syntax divided + sub r0,r6 + sbc r1,r7 + sbc r2,r2 + eor r0,r2 + eor r1,r2 + sub r0,r2 + sbc r1,r2 + .syntax unified + mov r8,r0 + mov r9,r1 + mov r10,r6 + .syntax divided + // START: sqr 64 Refined Karatsuba + // Input operands in r4,r5 + // Result in r0,r1,r2,r3 + // Clobbers: r4-r6 + // START: sqr 32 + // Input operand in r4 + // Result in r0 ,r1 + // Clobbers: r2, r3 + uxth r0,r4 + lsr r1,r4,#16 + .syntax unified + mov r2,r0 + .syntax divided + mul r2,r1 + mul r0,r0 + mul r1,r1 + lsr r3,r2,#15 + lsl r2,r2,#17 + add r0,r2 + adc r1,r3 + // End: sqr 32 + // Result in r0 ,r1 + sub r4,r5 + sbc r6,r6 + eor r4,r6 + sub r4,r6 + // START: sqr 32 + // Input operand in r5 + // Result in r2 ,r3 + // Clobbers: r5, r6 + uxth r2,r5 + lsr r3,r5,#16 + .syntax unified + mov r5,r2 + .syntax divided + mul r5,r3 + mul r2,r2 + mul r3,r3 + lsr r6,r5,#15 + lsl r5,r5,#17 + add r2,r5 + adc r3,r6 + // End: sqr 32 + // Result in r2 ,r3 + mov r6,#0 + add r2,r1 + adc r3,r6 + // START: sqr 32 + // Input operand in r4 + // Result in r4 ,r5 + // Clobbers: r1, r6 + lsr r5,r4,#16 + uxth r4,r4 + .syntax unified + mov r1,r4 + .syntax divided + mul r1,r5 + mul r4,r4 + mul r5,r5 + lsr r6,r1,#15 + lsl r1,r1,#17 + add r4,r1 + adc r5,r6 + // End: sqr 32 + // Result in r4 ,r5 + .syntax unified + mov r1,r2 + .syntax divided + sub r1,r4 + sbc r2,r5 + .syntax unified + mov r5,r3 + .syntax divided + mov r6,#0 + sbc r3,r6 + add r1,r0 + adc r2,r5 + adc r3,r6 + // END: sqr 64 Refined Karatsuba + // Result in r0,r1,r2,r3 + // Leaves r6 zero. + .syntax unified + mov r6,r10 + mov r10,r0 + mov r11,r1 + mov r12,r2 + mov r1,r3 + .syntax divided + // START: sqr 64 Refined Karatsuba + // Input operands in r6,r7 + // Result in r2,r3,r4,r5 + // Clobbers: r0,r7,r6 + // START: sqr 32 + // Input operand in r6 + // Result in r2 ,r3 + // Clobbers: r4, r5 + uxth r2,r6 + lsr r3,r6,#16 + .syntax unified + mov r4,r2 + .syntax divided + mul r4,r3 + mul r2,r2 + mul r3,r3 + lsr r5,r4,#15 + lsl r4,r4,#17 + add r2,r4 + adc r3,r5 + // End: sqr 32 + // Result in r2 ,r3 + sub r6,r7 + sbc r4,r4 + eor r6,r4 + sub r6,r4 + // START: sqr 32 + // Input operand in r7 + // Result in r4 ,r5 + // Clobbers: r0, r7 + uxth r4,r7 + lsr r5,r7,#16 + .syntax unified + mov r0,r4 + .syntax divided + mul r0,r5 + mul r4,r4 + mul r5,r5 + lsr r7,r0,#15 + lsl r0,r0,#17 + add r4,r0 + adc r5,r7 + // End: sqr 32 + // Result in r4 ,r5 + mov r7,#0 + add r4,r3 + adc r5,r7 + // START: sqr 32 + // Input operand in r6 + // Result in r7 ,r0 + // Clobbers: r6, r3 + uxth r7,r6 + lsr r0,r6,#16 + .syntax unified + mov r6,r7 + .syntax divided + mul r6,r0 + mul r7,r7 + mul r0,r0 + lsr r3,r6,#15 + lsl r6,r6,#17 + add r7,r6 + adc r0,r3 + // End: sqr 32 + // Result in r7 ,r0 + .syntax unified + mov r3,r4 + .syntax divided + sub r3,r7 + sbc r4,r0 + .syntax unified + mov r0,r5 + .syntax divided + mov r6,#0 + sbc r5,r6 + add r3,r2 + adc r4,r0 + adc r5,r6 + // END: sqr 64 Refined Karatsuba + // Result in r2,r3,r4,r5 + // Leaves r6 zero. + .syntax unified + mov r0,r12 + .syntax divided + add r2,r0 + adc r3,r1 + adc r4,r6 + adc r5,r6 + .syntax unified + mov r12,r2 + mov r2,r8 + mov r8,r3 + mov r3,r9 + mov r9,r4 + .syntax divided + // START: sqr 64 Refined Karatsuba + // Input operands in r2,r3 + // Result in r6,r7,r0,r1 + // Clobbers: r2,r3,r4 + // START: sqr 32 + // Input operand in r2 + // Result in r6 ,r7 + // Clobbers: r0, r1 + uxth r6,r2 + lsr r7,r2,#16 + .syntax unified + mov r0,r6 + .syntax divided + mul r0,r7 + mul r6,r6 + mul r7,r7 + lsr r1,r0,#15 + lsl r0,r0,#17 + add r6,r0 + adc r7,r1 + // End: sqr 32 + // Result in r6 ,r7 + sub r2,r3 + sbc r4,r4 + eor r2,r4 + sub r2,r4 + // START: sqr 32 + // Input operand in r3 + // Result in r0 ,r1 + // Clobbers: r3, r4 + uxth r0,r3 + lsr r1,r3,#16 + .syntax unified + mov r3,r0 + .syntax divided + mul r3,r1 + mul r0,r0 + mul r1,r1 + lsr r4,r3,#15 + lsl r3,r3,#17 + add r0,r3 + adc r1,r4 + // End: sqr 32 + // Result in r0 ,r1 + mov r4,#0 + add r0,r7 + adc r1,r4 + // START: sqr 32 + // Input operand in r2 + // Result in r3 ,r4 + // Clobbers: r2, r7 + uxth r3,r2 + lsr r4,r2,#16 + .syntax unified + mov r2,r3 + .syntax divided + mul r2,r4 + mul r3,r3 + mul r4,r4 + lsr r7,r2,#15 + lsl r2,r2,#17 + add r3,r2 + adc r4,r7 + // End: sqr 32 + // Result in r3 ,r4 + .syntax unified + mov r7,r0 + .syntax divided + sub r7,r3 + sbc r0,r4 + .syntax unified + mov r2,r1 + .syntax divided + mov r4,#0 + sbc r1,r4 + add r7,r6 + adc r0,r2 + adc r1,r4 + // END: sqr 64 Refined Karatsuba + // Result in r6,r7,r0,r1 + // Returns r4 as zero. + .syntax unified + mov r2,r12 + mov r3,r8 + mov r4,r9 + .syntax divided + sub r2,r6 + sbc r3,r7 + .syntax unified + mov r6,r4 + mov r7,r5 + .syntax divided + sbc r4,r0 + sbc r5,r1 + mov r0,#0 + sbc r6,r0 + sbc r7,r0 + .syntax unified + mov r0,r10 + .syntax divided + add r2,r0 + .syntax unified + mov r1,r11 + .syntax divided + adc r3,r1 + .syntax unified + mov r0,r12 + .syntax divided + adc r4,r0 + .syntax unified + mov r0,r8 + .syntax divided + adc r5,r0 + mov r0,#0 + adc r6,r0 + adc r7,r0 + .syntax unified + mov r0,r10 + .syntax divided + // END: sqr 128 Refined Karatsuba + // Result in r0 ... r7 + push {r4,r5,r6,r7} + .syntax unified + mov r4,r14 + .syntax divided + stm r4!,{r0,r1,r2,r3} + ldr r4,[SP,#36] + add r4,#16 + ldm r4,{r4,r5,r6,r7} + // sqr 128 Refined Karatsuba + // Input in r4 ... r7 + // Result in r0 ... r7 + // clobbers all registers except for r14 + .syntax unified + mov r0,r4 + mov r1,r5 + .syntax divided + sub r0,r6 + sbc r1,r7 + sbc r2,r2 + eor r0,r2 + eor r1,r2 + sub r0,r2 + sbc r1,r2 + .syntax unified + mov r8,r0 + mov r9,r1 + mov r10,r6 + .syntax divided + // START: sqr 64 Refined Karatsuba + // Input operands in r4,r5 + // Result in r0,r1,r2,r3 + // Clobbers: r4-r6 + // START: sqr 32 + // Input operand in r4 + // Result in r0 ,r1 + // Clobbers: r2, r3 + uxth r0,r4 + lsr r1,r4,#16 + .syntax unified + mov r2,r0 + .syntax divided + mul r2,r1 + mul r0,r0 + mul r1,r1 + lsr r3,r2,#15 + lsl r2,r2,#17 + add r0,r2 + adc r1,r3 + // End: sqr 32 + // Result in r0 ,r1 + sub r4,r5 + sbc r6,r6 + eor r4,r6 + sub r4,r6 + // START: sqr 32 + // Input operand in r5 + // Result in r2 ,r3 + // Clobbers: r5, r6 + uxth r2,r5 + lsr r3,r5,#16 + .syntax unified + mov r5,r2 + .syntax divided + mul r5,r3 + mul r2,r2 + mul r3,r3 + lsr r6,r5,#15 + lsl r5,r5,#17 + add r2,r5 + adc r3,r6 + // End: sqr 32 + // Result in r2 ,r3 + mov r6,#0 + add r2,r1 + adc r3,r6 + // START: sqr 32 + // Input operand in r4 + // Result in r4 ,r5 + // Clobbers: r1, r6 + lsr r5,r4,#16 + uxth r4,r4 + .syntax unified + mov r1,r4 + .syntax divided + mul r1,r5 + mul r4,r4 + mul r5,r5 + lsr r6,r1,#15 + lsl r1,r1,#17 + add r4,r1 + adc r5,r6 + // End: sqr 32 + // Result in r4 ,r5 + .syntax unified + mov r1,r2 + .syntax divided + sub r1,r4 + sbc r2,r5 + .syntax unified + mov r5,r3 + .syntax divided + mov r6,#0 + sbc r3,r6 + add r1,r0 + adc r2,r5 + adc r3,r6 + // END: sqr 64 Refined Karatsuba + // Result in r0,r1,r2,r3 + // Leaves r6 zero. + .syntax unified + mov r6,r10 + mov r10,r0 + mov r11,r1 + mov r12,r2 + mov r1,r3 + .syntax divided + // START: sqr 64 Refined Karatsuba + // Input operands in r6,r7 + // Result in r2,r3,r4,r5 + // Clobbers: r0,r7,r6 + // START: sqr 32 + // Input operand in r6 + // Result in r2 ,r3 + // Clobbers: r4, r5 + uxth r2,r6 + lsr r3,r6,#16 + .syntax unified + mov r4,r2 + .syntax divided + mul r4,r3 + mul r2,r2 + mul r3,r3 + lsr r5,r4,#15 + lsl r4,r4,#17 + add r2,r4 + adc r3,r5 + // End: sqr 32 + // Result in r2 ,r3 + sub r6,r7 + sbc r4,r4 + eor r6,r4 + sub r6,r4 + // START: sqr 32 + // Input operand in r7 + // Result in r4 ,r5 + // Clobbers: r0, r7 + uxth r4,r7 + lsr r5,r7,#16 + .syntax unified + mov r0,r4 + .syntax divided + mul r0,r5 + mul r4,r4 + mul r5,r5 + lsr r7,r0,#15 + lsl r0,r0,#17 + add r4,r0 + adc r5,r7 + // End: sqr 32 + // Result in r4 ,r5 + mov r7,#0 + add r4,r3 + adc r5,r7 + // START: sqr 32 + // Input operand in r6 + // Result in r7 ,r0 + // Clobbers: r6, r3 + uxth r7,r6 + lsr r0,r6,#16 + .syntax unified + mov r6,r7 + .syntax divided + mul r6,r0 + mul r7,r7 + mul r0,r0 + lsr r3,r6,#15 + lsl r6,r6,#17 + add r7,r6 + adc r0,r3 + // End: sqr 32 + // Result in r7 ,r0 + .syntax unified + mov r3,r4 + .syntax divided + sub r3,r7 + sbc r4,r0 + .syntax unified + mov r0,r5 + .syntax divided + mov r6,#0 + sbc r5,r6 + add r3,r2 + adc r4,r0 + adc r5,r6 + // END: sqr 64 Refined Karatsuba + // Result in r2,r3,r4,r5 + // Leaves r6 zero. + .syntax unified + mov r0,r12 + .syntax divided + add r2,r0 + adc r3,r1 + adc r4,r6 + adc r5,r6 + .syntax unified + mov r12,r2 + mov r2,r8 + mov r8,r3 + mov r3,r9 + mov r9,r4 + .syntax divided + // START: sqr 64 Refined Karatsuba + // Input operands in r2,r3 + // Result in r6,r7,r0,r1 + // Clobbers: r2,r3,r4 + // START: sqr 32 + // Input operand in r2 + // Result in r6 ,r7 + // Clobbers: r0, r1 + uxth r6,r2 + lsr r7,r2,#16 + .syntax unified + mov r0,r6 + .syntax divided + mul r0,r7 + mul r6,r6 + mul r7,r7 + lsr r1,r0,#15 + lsl r0,r0,#17 + add r6,r0 + adc r7,r1 + // End: sqr 32 + // Result in r6 ,r7 + sub r2,r3 + sbc r4,r4 + eor r2,r4 + sub r2,r4 + // START: sqr 32 + // Input operand in r3 + // Result in r0 ,r1 + // Clobbers: r3, r4 + uxth r0,r3 + lsr r1,r3,#16 + .syntax unified + mov r3,r0 + .syntax divided + mul r3,r1 + mul r0,r0 + mul r1,r1 + lsr r4,r3,#15 + lsl r3,r3,#17 + add r0,r3 + adc r1,r4 + // End: sqr 32 + // Result in r0 ,r1 + mov r4,#0 + add r0,r7 + adc r1,r4 + // START: sqr 32 + // Input operand in r2 + // Result in r3 ,r4 + // Clobbers: r2, r7 + uxth r3,r2 + lsr r4,r2,#16 + .syntax unified + mov r2,r3 + .syntax divided + mul r2,r4 + mul r3,r3 + mul r4,r4 + lsr r7,r2,#15 + lsl r2,r2,#17 + add r3,r2 + adc r4,r7 + // End: sqr 32 + // Result in r3 ,r4 + .syntax unified + mov r7,r0 + .syntax divided + sub r7,r3 + sbc r0,r4 + .syntax unified + mov r2,r1 + .syntax divided + mov r4,#0 + sbc r1,r4 + add r7,r6 + adc r0,r2 + adc r1,r4 + // END: sqr 64 Refined Karatsuba + // Result in r6,r7,r0,r1 + // Returns r4 as zero. + .syntax unified + mov r2,r12 + mov r3,r8 + mov r4,r9 + .syntax divided + sub r2,r6 + sbc r3,r7 + .syntax unified + mov r6,r4 + mov r7,r5 + .syntax divided + sbc r4,r0 + sbc r5,r1 + mov r0,#0 + sbc r6,r0 + sbc r7,r0 + .syntax unified + mov r0,r10 + .syntax divided + add r2,r0 + .syntax unified + mov r1,r11 + .syntax divided + adc r3,r1 + .syntax unified + mov r0,r12 + .syntax divided + adc r4,r0 + .syntax unified + mov r0,r8 + .syntax divided + adc r5,r0 + mov r0,#0 + adc r6,r0 + adc r7,r0 + .syntax unified + mov r0,r10 + .syntax divided + // END: sqr 128 Refined Karatsuba + // Result in r0 ... r7 + .syntax unified + mov r8,r4 + mov r9,r5 + mov r10,r6 + mov r11,r7 + .syntax divided + pop {r4,r5,r6,r7} + add r0,r4 + adc r1,r5 + adc r2,r6 + adc r3,r7 + .syntax unified + mov r4,r8 + mov r5,r9 + mov r6,r10 + mov r7,r11 + mov r8,r0 + .syntax divided + mov r0,#0 + adc r4,r0 + adc r5,r0 + adc r6,r0 + adc r7,r0 + .syntax unified + mov r0,r8 + .syntax divided + push {r0,r1,r2,r3,r4,r5,r6,r7} + ldr r4,[SP,#52] + ldm r4,{r0,r1,r2,r3,r4,r5,r6,r7} + sub r4,r0 + sbc r5,r1 + sbc r6,r2 + sbc r7,r3 + sbc r0,r0 + eor r4,r0 + eor r5,r0 + eor r6,r0 + eor r7,r0 + sub r4,r0 + sbc r5,r0 + sbc r6,r0 + sbc r7,r0 + // sqr 128 Refined Karatsuba + // Input in r4 ... r7 + // Result in r0 ... r7 + // clobbers all registers except for r14 + .syntax unified + mov r0,r4 + mov r1,r5 + .syntax divided + sub r0,r6 + sbc r1,r7 + sbc r2,r2 + eor r0,r2 + eor r1,r2 + sub r0,r2 + sbc r1,r2 + .syntax unified + mov r8,r0 + mov r9,r1 + mov r10,r6 + .syntax divided + // START: sqr 64 Refined Karatsuba + // Input operands in r4,r5 + // Result in r0,r1,r2,r3 + // Clobbers: r4-r6 + // START: sqr 32 + // Input operand in r4 + // Result in r0 ,r1 + // Clobbers: r2, r3 + uxth r0,r4 + lsr r1,r4,#16 + .syntax unified + mov r2,r0 + .syntax divided + mul r2,r1 + mul r0,r0 + mul r1,r1 + lsr r3,r2,#15 + lsl r2,r2,#17 + add r0,r2 + adc r1,r3 + // End: sqr 32 + // Result in r0 ,r1 + sub r4,r5 + sbc r6,r6 + eor r4,r6 + sub r4,r6 + // START: sqr 32 + // Input operand in r5 + // Result in r2 ,r3 + // Clobbers: r5, r6 + uxth r2,r5 + lsr r3,r5,#16 + .syntax unified + mov r5,r2 + .syntax divided + mul r5,r3 + mul r2,r2 + mul r3,r3 + lsr r6,r5,#15 + lsl r5,r5,#17 + add r2,r5 + adc r3,r6 + // End: sqr 32 + // Result in r2 ,r3 + mov r6,#0 + add r2,r1 + adc r3,r6 + // START: sqr 32 + // Input operand in r4 + // Result in r4 ,r5 + // Clobbers: r1, r6 + lsr r5,r4,#16 + uxth r4,r4 + .syntax unified + mov r1,r4 + .syntax divided + mul r1,r5 + mul r4,r4 + mul r5,r5 + lsr r6,r1,#15 + lsl r1,r1,#17 + add r4,r1 + adc r5,r6 + // End: sqr 32 + // Result in r4 ,r5 + .syntax unified + mov r1,r2 + .syntax divided + sub r1,r4 + sbc r2,r5 + .syntax unified + mov r5,r3 + .syntax divided + mov r6,#0 + sbc r3,r6 + add r1,r0 + adc r2,r5 + adc r3,r6 + // END: sqr 64 Refined Karatsuba + // Result in r0,r1,r2,r3 + // Leaves r6 zero. + .syntax unified + mov r6,r10 + mov r10,r0 + mov r11,r1 + mov r12,r2 + mov r1,r3 + .syntax divided + // START: sqr 64 Refined Karatsuba + // Input operands in r6,r7 + // Result in r2,r3,r4,r5 + // Clobbers: r0,r7,r6 + // START: sqr 32 + // Input operand in r6 + // Result in r2 ,r3 + // Clobbers: r4, r5 + uxth r2,r6 + lsr r3,r6,#16 + .syntax unified + mov r4,r2 + .syntax divided + mul r4,r3 + mul r2,r2 + mul r3,r3 + lsr r5,r4,#15 + lsl r4,r4,#17 + add r2,r4 + adc r3,r5 + // End: sqr 32 + // Result in r2 ,r3 + sub r6,r7 + sbc r4,r4 + eor r6,r4 + sub r6,r4 + // START: sqr 32 + // Input operand in r7 + // Result in r4 ,r5 + // Clobbers: r0, r7 + uxth r4,r7 + lsr r5,r7,#16 + .syntax unified + mov r0,r4 + .syntax divided + mul r0,r5 + mul r4,r4 + mul r5,r5 + lsr r7,r0,#15 + lsl r0,r0,#17 + add r4,r0 + adc r5,r7 + // End: sqr 32 + // Result in r4 ,r5 + mov r7,#0 + add r4,r3 + adc r5,r7 + // START: sqr 32 + // Input operand in r6 + // Result in r7 ,r0 + // Clobbers: r6, r3 + uxth r7,r6 + lsr r0,r6,#16 + .syntax unified + mov r6,r7 + .syntax divided + mul r6,r0 + mul r7,r7 + mul r0,r0 + lsr r3,r6,#15 + lsl r6,r6,#17 + add r7,r6 + adc r0,r3 + // End: sqr 32 + // Result in r7 ,r0 + .syntax unified + mov r3,r4 + .syntax divided + sub r3,r7 + sbc r4,r0 + .syntax unified + mov r0,r5 + .syntax divided + mov r6,#0 + sbc r5,r6 + add r3,r2 + adc r4,r0 + adc r5,r6 + // END: sqr 64 Refined Karatsuba + // Result in r2,r3,r4,r5 + // Leaves r6 zero. + .syntax unified + mov r0,r12 + .syntax divided + add r2,r0 + adc r3,r1 + adc r4,r6 + adc r5,r6 + .syntax unified + mov r12,r2 + mov r2,r8 + mov r8,r3 + mov r3,r9 + mov r9,r4 + .syntax divided + // START: sqr 64 Refined Karatsuba + // Input operands in r2,r3 + // Result in r6,r7,r0,r1 + // Clobbers: r2,r3,r4 + // START: sqr 32 + // Input operand in r2 + // Result in r6 ,r7 + // Clobbers: r0, r1 + uxth r6,r2 + lsr r7,r2,#16 + .syntax unified + mov r0,r6 + .syntax divided + mul r0,r7 + mul r6,r6 + mul r7,r7 + lsr r1,r0,#15 + lsl r0,r0,#17 + add r6,r0 + adc r7,r1 + // End: sqr 32 + // Result in r6 ,r7 + sub r2,r3 + sbc r4,r4 + eor r2,r4 + sub r2,r4 + // START: sqr 32 + // Input operand in r3 + // Result in r0 ,r1 + // Clobbers: r3, r4 + uxth r0,r3 + lsr r1,r3,#16 + .syntax unified + mov r3,r0 + .syntax divided + mul r3,r1 + mul r0,r0 + mul r1,r1 + lsr r4,r3,#15 + lsl r3,r3,#17 + add r0,r3 + adc r1,r4 + // End: sqr 32 + // Result in r0 ,r1 + mov r4,#0 + add r0,r7 + adc r1,r4 + // START: sqr 32 + // Input operand in r2 + // Result in r3 ,r4 + // Clobbers: r2, r7 + uxth r3,r2 + lsr r4,r2,#16 + .syntax unified + mov r2,r3 + .syntax divided + mul r2,r4 + mul r3,r3 + mul r4,r4 + lsr r7,r2,#15 + lsl r2,r2,#17 + add r3,r2 + adc r4,r7 + // End: sqr 32 + // Result in r3 ,r4 + .syntax unified + mov r7,r0 + .syntax divided + sub r7,r3 + sbc r0,r4 + .syntax unified + mov r2,r1 + .syntax divided + mov r4,#0 + sbc r1,r4 + add r7,r6 + adc r0,r2 + adc r1,r4 + // END: sqr 64 Refined Karatsuba + // Result in r6,r7,r0,r1 + // Returns r4 as zero. + .syntax unified + mov r2,r12 + mov r3,r8 + mov r4,r9 + .syntax divided + sub r2,r6 + sbc r3,r7 + .syntax unified + mov r6,r4 + mov r7,r5 + .syntax divided + sbc r4,r0 + sbc r5,r1 + mov r0,#0 + sbc r6,r0 + sbc r7,r0 + .syntax unified + mov r0,r10 + .syntax divided + add r2,r0 + .syntax unified + mov r1,r11 + .syntax divided + adc r3,r1 + .syntax unified + mov r0,r12 + .syntax divided + adc r4,r0 + .syntax unified + mov r0,r8 + .syntax divided + adc r5,r0 + mov r0,#0 + adc r6,r0 + adc r7,r0 + .syntax unified + mov r0,r10 + .syntax divided + // END: sqr 128 Refined Karatsuba + // Result in r0 ... r7 + mvn r0,r0 + mvn r1,r1 + mvn r2,r2 + mvn r3,r3 + mvn r4,r4 + mvn r5,r5 + mvn r6,r6 + mvn r7,r7 + .syntax unified + mov r8,r4 + mov r9,r5 + mov r10,r6 + mov r11,r7 + .syntax divided + mov r4,#143 + asr r4,r4,#1 + pop {r4,r5,r6,r7} + adc r0,r4 + adc r1,r5 + adc r2,r6 + adc r3,r7 + .syntax unified + mov r12,r4 + .syntax divided + mov r4,#16 + add r4,r14 + stm r4!,{r0,r1,r2,r3} + .syntax unified + mov r4,r12 + mov r0,r8 + .syntax divided + adc r0,r4 + .syntax unified + mov r8,r0 + mov r1,r9 + .syntax divided + adc r1,r5 + .syntax unified + mov r9,r1 + mov r2,r10 + .syntax divided + adc r2,r6 + .syntax unified + mov r10,r2 + mov r3,r11 + .syntax divided + adc r3,r7 + .syntax unified + mov r11,r3 + .syntax divided + mov r0,#0 + adc r0,r0 + .syntax unified + mov r12,r0 + mov r0,r14 + .syntax divided + ldm r0,{r0,r1,r2,r3,r4,r5,r6,r7} + add r0,r4 + adc r1,r5 + adc r2,r6 + adc r3,r7 + mov r4,#16 + add r4,r14 + stm r4!,{r0,r1,r2,r3} + .syntax unified + mov r14,r4 + mov r0,r13 + .syntax divided + ldm r0!,{r4,r5,r6,r7} + .syntax unified + mov r1,r8 + .syntax divided + adc r4,r1 + .syntax unified + mov r1,r9 + .syntax divided + adc r5,r1 + .syntax unified + mov r1,r10 + .syntax divided + adc r6,r1 + .syntax unified + mov r1,r11 + .syntax divided + adc r7,r1 + .syntax unified + mov r0,r14 + .syntax divided + stm r0!,{r4,r5,r6,r7} + pop {r4,r5,r6,r7} + .syntax unified + mov r1,r12 + .syntax divided + mov r2,#0 + mvn r2,r2 + adc r1,r2 + asr r2,r1,#4 + add r4,r1 + adc r5,r2 + adc r6,r2 + adc r7,r2 + stm r0!,{r4,r5,r6,r7} + pop {r3,r4,r5,r6,r7} + .syntax unified + mov r8,r3 + mov r9,r4 + mov r10,r5 + mov r11,r6 + mov r12,r7 + .syntax divided + pop {r0,r4,r5,r6,r7,r15} +//Cycle Count ASM-Version of 256 sqr (Refined Karatsuba) (Cortex M0): 793 (697 instructions). + .size square256_asm, .-square256_asm |