summaryrefslogtreecommitdiff
path: root/third_party/unacl-curve25519/core/cortex-m0/curve25519/sqr.S
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/unacl-curve25519/core/cortex-m0/curve25519/sqr.S')
-rw-r--r--third_party/unacl-curve25519/core/cortex-m0/curve25519/sqr.S1164
1 files changed, 1164 insertions, 0 deletions
diff --git a/third_party/unacl-curve25519/core/cortex-m0/curve25519/sqr.S b/third_party/unacl-curve25519/core/cortex-m0/curve25519/sqr.S
new file mode 100644
index 0000000000..b62121adb7
--- /dev/null
+++ b/third_party/unacl-curve25519/core/cortex-m0/curve25519/sqr.S
@@ -0,0 +1,1164 @@
+// Author: Ana Helena Sánchez, Björn Haase (second implementation).
+//
+// public domain
+//
+
+ .align 2
+ .global square256_asm
+ .type square256_asm, %function
+square256_asm:
+// ######################
+// ASM Square 256 refined karatsuba:
+// ######################
+ // sqr 256 Refined Karatsuba
+ // pInput in r1
+ // pResult in r0
+ // adheres to arm eabi calling convention.
+ push {r1,r4,r5,r6,r7,r14}
+ .syntax unified
+ mov r3,r8
+ mov r4,r9
+ mov r5,r10
+ mov r6,r11
+ mov r7,r12
+ .syntax divided
+ push {r3,r4,r5,r6,r7}
+ .syntax unified
+ mov r14,r0
+ .syntax divided
+ ldm r1!,{r4,r5,r6,r7}
+ // sqr 128 Refined Karatsuba
+ // Input in r4 ... r7
+ // Result in r0 ... r7
+ // clobbers all registers except for r14
+ .syntax unified
+ mov r0,r4
+ mov r1,r5
+ .syntax divided
+ sub r0,r6
+ sbc r1,r7
+ sbc r2,r2
+ eor r0,r2
+ eor r1,r2
+ sub r0,r2
+ sbc r1,r2
+ .syntax unified
+ mov r8,r0
+ mov r9,r1
+ mov r10,r6
+ .syntax divided
+ // START: sqr 64 Refined Karatsuba
+ // Input operands in r4,r5
+ // Result in r0,r1,r2,r3
+ // Clobbers: r4-r6
+ // START: sqr 32
+ // Input operand in r4
+ // Result in r0 ,r1
+ // Clobbers: r2, r3
+ uxth r0,r4
+ lsr r1,r4,#16
+ .syntax unified
+ mov r2,r0
+ .syntax divided
+ mul r2,r1
+ mul r0,r0
+ mul r1,r1
+ lsr r3,r2,#15
+ lsl r2,r2,#17
+ add r0,r2
+ adc r1,r3
+ // End: sqr 32
+ // Result in r0 ,r1
+ sub r4,r5
+ sbc r6,r6
+ eor r4,r6
+ sub r4,r6
+ // START: sqr 32
+ // Input operand in r5
+ // Result in r2 ,r3
+ // Clobbers: r5, r6
+ uxth r2,r5
+ lsr r3,r5,#16
+ .syntax unified
+ mov r5,r2
+ .syntax divided
+ mul r5,r3
+ mul r2,r2
+ mul r3,r3
+ lsr r6,r5,#15
+ lsl r5,r5,#17
+ add r2,r5
+ adc r3,r6
+ // End: sqr 32
+ // Result in r2 ,r3
+ mov r6,#0
+ add r2,r1
+ adc r3,r6
+ // START: sqr 32
+ // Input operand in r4
+ // Result in r4 ,r5
+ // Clobbers: r1, r6
+ lsr r5,r4,#16
+ uxth r4,r4
+ .syntax unified
+ mov r1,r4
+ .syntax divided
+ mul r1,r5
+ mul r4,r4
+ mul r5,r5
+ lsr r6,r1,#15
+ lsl r1,r1,#17
+ add r4,r1
+ adc r5,r6
+ // End: sqr 32
+ // Result in r4 ,r5
+ .syntax unified
+ mov r1,r2
+ .syntax divided
+ sub r1,r4
+ sbc r2,r5
+ .syntax unified
+ mov r5,r3
+ .syntax divided
+ mov r6,#0
+ sbc r3,r6
+ add r1,r0
+ adc r2,r5
+ adc r3,r6
+ // END: sqr 64 Refined Karatsuba
+ // Result in r0,r1,r2,r3
+ // Leaves r6 zero.
+ .syntax unified
+ mov r6,r10
+ mov r10,r0
+ mov r11,r1
+ mov r12,r2
+ mov r1,r3
+ .syntax divided
+ // START: sqr 64 Refined Karatsuba
+ // Input operands in r6,r7
+ // Result in r2,r3,r4,r5
+ // Clobbers: r0,r7,r6
+ // START: sqr 32
+ // Input operand in r6
+ // Result in r2 ,r3
+ // Clobbers: r4, r5
+ uxth r2,r6
+ lsr r3,r6,#16
+ .syntax unified
+ mov r4,r2
+ .syntax divided
+ mul r4,r3
+ mul r2,r2
+ mul r3,r3
+ lsr r5,r4,#15
+ lsl r4,r4,#17
+ add r2,r4
+ adc r3,r5
+ // End: sqr 32
+ // Result in r2 ,r3
+ sub r6,r7
+ sbc r4,r4
+ eor r6,r4
+ sub r6,r4
+ // START: sqr 32
+ // Input operand in r7
+ // Result in r4 ,r5
+ // Clobbers: r0, r7
+ uxth r4,r7
+ lsr r5,r7,#16
+ .syntax unified
+ mov r0,r4
+ .syntax divided
+ mul r0,r5
+ mul r4,r4
+ mul r5,r5
+ lsr r7,r0,#15
+ lsl r0,r0,#17
+ add r4,r0
+ adc r5,r7
+ // End: sqr 32
+ // Result in r4 ,r5
+ mov r7,#0
+ add r4,r3
+ adc r5,r7
+ // START: sqr 32
+ // Input operand in r6
+ // Result in r7 ,r0
+ // Clobbers: r6, r3
+ uxth r7,r6
+ lsr r0,r6,#16
+ .syntax unified
+ mov r6,r7
+ .syntax divided
+ mul r6,r0
+ mul r7,r7
+ mul r0,r0
+ lsr r3,r6,#15
+ lsl r6,r6,#17
+ add r7,r6
+ adc r0,r3
+ // End: sqr 32
+ // Result in r7 ,r0
+ .syntax unified
+ mov r3,r4
+ .syntax divided
+ sub r3,r7
+ sbc r4,r0
+ .syntax unified
+ mov r0,r5
+ .syntax divided
+ mov r6,#0
+ sbc r5,r6
+ add r3,r2
+ adc r4,r0
+ adc r5,r6
+ // END: sqr 64 Refined Karatsuba
+ // Result in r2,r3,r4,r5
+ // Leaves r6 zero.
+ .syntax unified
+ mov r0,r12
+ .syntax divided
+ add r2,r0
+ adc r3,r1
+ adc r4,r6
+ adc r5,r6
+ .syntax unified
+ mov r12,r2
+ mov r2,r8
+ mov r8,r3
+ mov r3,r9
+ mov r9,r4
+ .syntax divided
+ // START: sqr 64 Refined Karatsuba
+ // Input operands in r2,r3
+ // Result in r6,r7,r0,r1
+ // Clobbers: r2,r3,r4
+ // START: sqr 32
+ // Input operand in r2
+ // Result in r6 ,r7
+ // Clobbers: r0, r1
+ uxth r6,r2
+ lsr r7,r2,#16
+ .syntax unified
+ mov r0,r6
+ .syntax divided
+ mul r0,r7
+ mul r6,r6
+ mul r7,r7
+ lsr r1,r0,#15
+ lsl r0,r0,#17
+ add r6,r0
+ adc r7,r1
+ // End: sqr 32
+ // Result in r6 ,r7
+ sub r2,r3
+ sbc r4,r4
+ eor r2,r4
+ sub r2,r4
+ // START: sqr 32
+ // Input operand in r3
+ // Result in r0 ,r1
+ // Clobbers: r3, r4
+ uxth r0,r3
+ lsr r1,r3,#16
+ .syntax unified
+ mov r3,r0
+ .syntax divided
+ mul r3,r1
+ mul r0,r0
+ mul r1,r1
+ lsr r4,r3,#15
+ lsl r3,r3,#17
+ add r0,r3
+ adc r1,r4
+ // End: sqr 32
+ // Result in r0 ,r1
+ mov r4,#0
+ add r0,r7
+ adc r1,r4
+ // START: sqr 32
+ // Input operand in r2
+ // Result in r3 ,r4
+ // Clobbers: r2, r7
+ uxth r3,r2
+ lsr r4,r2,#16
+ .syntax unified
+ mov r2,r3
+ .syntax divided
+ mul r2,r4
+ mul r3,r3
+ mul r4,r4
+ lsr r7,r2,#15
+ lsl r2,r2,#17
+ add r3,r2
+ adc r4,r7
+ // End: sqr 32
+ // Result in r3 ,r4
+ .syntax unified
+ mov r7,r0
+ .syntax divided
+ sub r7,r3
+ sbc r0,r4
+ .syntax unified
+ mov r2,r1
+ .syntax divided
+ mov r4,#0
+ sbc r1,r4
+ add r7,r6
+ adc r0,r2
+ adc r1,r4
+ // END: sqr 64 Refined Karatsuba
+ // Result in r6,r7,r0,r1
+ // Returns r4 as zero.
+ .syntax unified
+ mov r2,r12
+ mov r3,r8
+ mov r4,r9
+ .syntax divided
+ sub r2,r6
+ sbc r3,r7
+ .syntax unified
+ mov r6,r4
+ mov r7,r5
+ .syntax divided
+ sbc r4,r0
+ sbc r5,r1
+ mov r0,#0
+ sbc r6,r0
+ sbc r7,r0
+ .syntax unified
+ mov r0,r10
+ .syntax divided
+ add r2,r0
+ .syntax unified
+ mov r1,r11
+ .syntax divided
+ adc r3,r1
+ .syntax unified
+ mov r0,r12
+ .syntax divided
+ adc r4,r0
+ .syntax unified
+ mov r0,r8
+ .syntax divided
+ adc r5,r0
+ mov r0,#0
+ adc r6,r0
+ adc r7,r0
+ .syntax unified
+ mov r0,r10
+ .syntax divided
+ // END: sqr 128 Refined Karatsuba
+ // Result in r0 ... r7
+ push {r4,r5,r6,r7}
+ .syntax unified
+ mov r4,r14
+ .syntax divided
+ stm r4!,{r0,r1,r2,r3}
+ ldr r4,[SP,#36]
+ add r4,#16
+ ldm r4,{r4,r5,r6,r7}
+ // sqr 128 Refined Karatsuba
+ // Input in r4 ... r7
+ // Result in r0 ... r7
+ // clobbers all registers except for r14
+ .syntax unified
+ mov r0,r4
+ mov r1,r5
+ .syntax divided
+ sub r0,r6
+ sbc r1,r7
+ sbc r2,r2
+ eor r0,r2
+ eor r1,r2
+ sub r0,r2
+ sbc r1,r2
+ .syntax unified
+ mov r8,r0
+ mov r9,r1
+ mov r10,r6
+ .syntax divided
+ // START: sqr 64 Refined Karatsuba
+ // Input operands in r4,r5
+ // Result in r0,r1,r2,r3
+ // Clobbers: r4-r6
+ // START: sqr 32
+ // Input operand in r4
+ // Result in r0 ,r1
+ // Clobbers: r2, r3
+ uxth r0,r4
+ lsr r1,r4,#16
+ .syntax unified
+ mov r2,r0
+ .syntax divided
+ mul r2,r1
+ mul r0,r0
+ mul r1,r1
+ lsr r3,r2,#15
+ lsl r2,r2,#17
+ add r0,r2
+ adc r1,r3
+ // End: sqr 32
+ // Result in r0 ,r1
+ sub r4,r5
+ sbc r6,r6
+ eor r4,r6
+ sub r4,r6
+ // START: sqr 32
+ // Input operand in r5
+ // Result in r2 ,r3
+ // Clobbers: r5, r6
+ uxth r2,r5
+ lsr r3,r5,#16
+ .syntax unified
+ mov r5,r2
+ .syntax divided
+ mul r5,r3
+ mul r2,r2
+ mul r3,r3
+ lsr r6,r5,#15
+ lsl r5,r5,#17
+ add r2,r5
+ adc r3,r6
+ // End: sqr 32
+ // Result in r2 ,r3
+ mov r6,#0
+ add r2,r1
+ adc r3,r6
+ // START: sqr 32
+ // Input operand in r4
+ // Result in r4 ,r5
+ // Clobbers: r1, r6
+ lsr r5,r4,#16
+ uxth r4,r4
+ .syntax unified
+ mov r1,r4
+ .syntax divided
+ mul r1,r5
+ mul r4,r4
+ mul r5,r5
+ lsr r6,r1,#15
+ lsl r1,r1,#17
+ add r4,r1
+ adc r5,r6
+ // End: sqr 32
+ // Result in r4 ,r5
+ .syntax unified
+ mov r1,r2
+ .syntax divided
+ sub r1,r4
+ sbc r2,r5
+ .syntax unified
+ mov r5,r3
+ .syntax divided
+ mov r6,#0
+ sbc r3,r6
+ add r1,r0
+ adc r2,r5
+ adc r3,r6
+ // END: sqr 64 Refined Karatsuba
+ // Result in r0,r1,r2,r3
+ // Leaves r6 zero.
+ .syntax unified
+ mov r6,r10
+ mov r10,r0
+ mov r11,r1
+ mov r12,r2
+ mov r1,r3
+ .syntax divided
+ // START: sqr 64 Refined Karatsuba
+ // Input operands in r6,r7
+ // Result in r2,r3,r4,r5
+ // Clobbers: r0,r7,r6
+ // START: sqr 32
+ // Input operand in r6
+ // Result in r2 ,r3
+ // Clobbers: r4, r5
+ uxth r2,r6
+ lsr r3,r6,#16
+ .syntax unified
+ mov r4,r2
+ .syntax divided
+ mul r4,r3
+ mul r2,r2
+ mul r3,r3
+ lsr r5,r4,#15
+ lsl r4,r4,#17
+ add r2,r4
+ adc r3,r5
+ // End: sqr 32
+ // Result in r2 ,r3
+ sub r6,r7
+ sbc r4,r4
+ eor r6,r4
+ sub r6,r4
+ // START: sqr 32
+ // Input operand in r7
+ // Result in r4 ,r5
+ // Clobbers: r0, r7
+ uxth r4,r7
+ lsr r5,r7,#16
+ .syntax unified
+ mov r0,r4
+ .syntax divided
+ mul r0,r5
+ mul r4,r4
+ mul r5,r5
+ lsr r7,r0,#15
+ lsl r0,r0,#17
+ add r4,r0
+ adc r5,r7
+ // End: sqr 32
+ // Result in r4 ,r5
+ mov r7,#0
+ add r4,r3
+ adc r5,r7
+ // START: sqr 32
+ // Input operand in r6
+ // Result in r7 ,r0
+ // Clobbers: r6, r3
+ uxth r7,r6
+ lsr r0,r6,#16
+ .syntax unified
+ mov r6,r7
+ .syntax divided
+ mul r6,r0
+ mul r7,r7
+ mul r0,r0
+ lsr r3,r6,#15
+ lsl r6,r6,#17
+ add r7,r6
+ adc r0,r3
+ // End: sqr 32
+ // Result in r7 ,r0
+ .syntax unified
+ mov r3,r4
+ .syntax divided
+ sub r3,r7
+ sbc r4,r0
+ .syntax unified
+ mov r0,r5
+ .syntax divided
+ mov r6,#0
+ sbc r5,r6
+ add r3,r2
+ adc r4,r0
+ adc r5,r6
+ // END: sqr 64 Refined Karatsuba
+ // Result in r2,r3,r4,r5
+ // Leaves r6 zero.
+ .syntax unified
+ mov r0,r12
+ .syntax divided
+ add r2,r0
+ adc r3,r1
+ adc r4,r6
+ adc r5,r6
+ .syntax unified
+ mov r12,r2
+ mov r2,r8
+ mov r8,r3
+ mov r3,r9
+ mov r9,r4
+ .syntax divided
+ // START: sqr 64 Refined Karatsuba
+ // Input operands in r2,r3
+ // Result in r6,r7,r0,r1
+ // Clobbers: r2,r3,r4
+ // START: sqr 32
+ // Input operand in r2
+ // Result in r6 ,r7
+ // Clobbers: r0, r1
+ uxth r6,r2
+ lsr r7,r2,#16
+ .syntax unified
+ mov r0,r6
+ .syntax divided
+ mul r0,r7
+ mul r6,r6
+ mul r7,r7
+ lsr r1,r0,#15
+ lsl r0,r0,#17
+ add r6,r0
+ adc r7,r1
+ // End: sqr 32
+ // Result in r6 ,r7
+ sub r2,r3
+ sbc r4,r4
+ eor r2,r4
+ sub r2,r4
+ // START: sqr 32
+ // Input operand in r3
+ // Result in r0 ,r1
+ // Clobbers: r3, r4
+ uxth r0,r3
+ lsr r1,r3,#16
+ .syntax unified
+ mov r3,r0
+ .syntax divided
+ mul r3,r1
+ mul r0,r0
+ mul r1,r1
+ lsr r4,r3,#15
+ lsl r3,r3,#17
+ add r0,r3
+ adc r1,r4
+ // End: sqr 32
+ // Result in r0 ,r1
+ mov r4,#0
+ add r0,r7
+ adc r1,r4
+ // START: sqr 32
+ // Input operand in r2
+ // Result in r3 ,r4
+ // Clobbers: r2, r7
+ uxth r3,r2
+ lsr r4,r2,#16
+ .syntax unified
+ mov r2,r3
+ .syntax divided
+ mul r2,r4
+ mul r3,r3
+ mul r4,r4
+ lsr r7,r2,#15
+ lsl r2,r2,#17
+ add r3,r2
+ adc r4,r7
+ // End: sqr 32
+ // Result in r3 ,r4
+ .syntax unified
+ mov r7,r0
+ .syntax divided
+ sub r7,r3
+ sbc r0,r4
+ .syntax unified
+ mov r2,r1
+ .syntax divided
+ mov r4,#0
+ sbc r1,r4
+ add r7,r6
+ adc r0,r2
+ adc r1,r4
+ // END: sqr 64 Refined Karatsuba
+ // Result in r6,r7,r0,r1
+ // Returns r4 as zero.
+ .syntax unified
+ mov r2,r12
+ mov r3,r8
+ mov r4,r9
+ .syntax divided
+ sub r2,r6
+ sbc r3,r7
+ .syntax unified
+ mov r6,r4
+ mov r7,r5
+ .syntax divided
+ sbc r4,r0
+ sbc r5,r1
+ mov r0,#0
+ sbc r6,r0
+ sbc r7,r0
+ .syntax unified
+ mov r0,r10
+ .syntax divided
+ add r2,r0
+ .syntax unified
+ mov r1,r11
+ .syntax divided
+ adc r3,r1
+ .syntax unified
+ mov r0,r12
+ .syntax divided
+ adc r4,r0
+ .syntax unified
+ mov r0,r8
+ .syntax divided
+ adc r5,r0
+ mov r0,#0
+ adc r6,r0
+ adc r7,r0
+ .syntax unified
+ mov r0,r10
+ .syntax divided
+ // END: sqr 128 Refined Karatsuba
+ // Result in r0 ... r7
+ .syntax unified
+ mov r8,r4
+ mov r9,r5
+ mov r10,r6
+ mov r11,r7
+ .syntax divided
+ pop {r4,r5,r6,r7}
+ add r0,r4
+ adc r1,r5
+ adc r2,r6
+ adc r3,r7
+ .syntax unified
+ mov r4,r8
+ mov r5,r9
+ mov r6,r10
+ mov r7,r11
+ mov r8,r0
+ .syntax divided
+ mov r0,#0
+ adc r4,r0
+ adc r5,r0
+ adc r6,r0
+ adc r7,r0
+ .syntax unified
+ mov r0,r8
+ .syntax divided
+ push {r0,r1,r2,r3,r4,r5,r6,r7}
+ ldr r4,[SP,#52]
+ ldm r4,{r0,r1,r2,r3,r4,r5,r6,r7}
+ sub r4,r0
+ sbc r5,r1
+ sbc r6,r2
+ sbc r7,r3
+ sbc r0,r0
+ eor r4,r0
+ eor r5,r0
+ eor r6,r0
+ eor r7,r0
+ sub r4,r0
+ sbc r5,r0
+ sbc r6,r0
+ sbc r7,r0
+ // sqr 128 Refined Karatsuba
+ // Input in r4 ... r7
+ // Result in r0 ... r7
+ // clobbers all registers except for r14
+ .syntax unified
+ mov r0,r4
+ mov r1,r5
+ .syntax divided
+ sub r0,r6
+ sbc r1,r7
+ sbc r2,r2
+ eor r0,r2
+ eor r1,r2
+ sub r0,r2
+ sbc r1,r2
+ .syntax unified
+ mov r8,r0
+ mov r9,r1
+ mov r10,r6
+ .syntax divided
+ // START: sqr 64 Refined Karatsuba
+ // Input operands in r4,r5
+ // Result in r0,r1,r2,r3
+ // Clobbers: r4-r6
+ // START: sqr 32
+ // Input operand in r4
+ // Result in r0 ,r1
+ // Clobbers: r2, r3
+ uxth r0,r4
+ lsr r1,r4,#16
+ .syntax unified
+ mov r2,r0
+ .syntax divided
+ mul r2,r1
+ mul r0,r0
+ mul r1,r1
+ lsr r3,r2,#15
+ lsl r2,r2,#17
+ add r0,r2
+ adc r1,r3
+ // End: sqr 32
+ // Result in r0 ,r1
+ sub r4,r5
+ sbc r6,r6
+ eor r4,r6
+ sub r4,r6
+ // START: sqr 32
+ // Input operand in r5
+ // Result in r2 ,r3
+ // Clobbers: r5, r6
+ uxth r2,r5
+ lsr r3,r5,#16
+ .syntax unified
+ mov r5,r2
+ .syntax divided
+ mul r5,r3
+ mul r2,r2
+ mul r3,r3
+ lsr r6,r5,#15
+ lsl r5,r5,#17
+ add r2,r5
+ adc r3,r6
+ // End: sqr 32
+ // Result in r2 ,r3
+ mov r6,#0
+ add r2,r1
+ adc r3,r6
+ // START: sqr 32
+ // Input operand in r4
+ // Result in r4 ,r5
+ // Clobbers: r1, r6
+ lsr r5,r4,#16
+ uxth r4,r4
+ .syntax unified
+ mov r1,r4
+ .syntax divided
+ mul r1,r5
+ mul r4,r4
+ mul r5,r5
+ lsr r6,r1,#15
+ lsl r1,r1,#17
+ add r4,r1
+ adc r5,r6
+ // End: sqr 32
+ // Result in r4 ,r5
+ .syntax unified
+ mov r1,r2
+ .syntax divided
+ sub r1,r4
+ sbc r2,r5
+ .syntax unified
+ mov r5,r3
+ .syntax divided
+ mov r6,#0
+ sbc r3,r6
+ add r1,r0
+ adc r2,r5
+ adc r3,r6
+ // END: sqr 64 Refined Karatsuba
+ // Result in r0,r1,r2,r3
+ // Leaves r6 zero.
+ .syntax unified
+ mov r6,r10
+ mov r10,r0
+ mov r11,r1
+ mov r12,r2
+ mov r1,r3
+ .syntax divided
+ // START: sqr 64 Refined Karatsuba
+ // Input operands in r6,r7
+ // Result in r2,r3,r4,r5
+ // Clobbers: r0,r7,r6
+ // START: sqr 32
+ // Input operand in r6
+ // Result in r2 ,r3
+ // Clobbers: r4, r5
+ uxth r2,r6
+ lsr r3,r6,#16
+ .syntax unified
+ mov r4,r2
+ .syntax divided
+ mul r4,r3
+ mul r2,r2
+ mul r3,r3
+ lsr r5,r4,#15
+ lsl r4,r4,#17
+ add r2,r4
+ adc r3,r5
+ // End: sqr 32
+ // Result in r2 ,r3
+ sub r6,r7
+ sbc r4,r4
+ eor r6,r4
+ sub r6,r4
+ // START: sqr 32
+ // Input operand in r7
+ // Result in r4 ,r5
+ // Clobbers: r0, r7
+ uxth r4,r7
+ lsr r5,r7,#16
+ .syntax unified
+ mov r0,r4
+ .syntax divided
+ mul r0,r5
+ mul r4,r4
+ mul r5,r5
+ lsr r7,r0,#15
+ lsl r0,r0,#17
+ add r4,r0
+ adc r5,r7
+ // End: sqr 32
+ // Result in r4 ,r5
+ mov r7,#0
+ add r4,r3
+ adc r5,r7
+ // START: sqr 32
+ // Input operand in r6
+ // Result in r7 ,r0
+ // Clobbers: r6, r3
+ uxth r7,r6
+ lsr r0,r6,#16
+ .syntax unified
+ mov r6,r7
+ .syntax divided
+ mul r6,r0
+ mul r7,r7
+ mul r0,r0
+ lsr r3,r6,#15
+ lsl r6,r6,#17
+ add r7,r6
+ adc r0,r3
+ // End: sqr 32
+ // Result in r7 ,r0
+ .syntax unified
+ mov r3,r4
+ .syntax divided
+ sub r3,r7
+ sbc r4,r0
+ .syntax unified
+ mov r0,r5
+ .syntax divided
+ mov r6,#0
+ sbc r5,r6
+ add r3,r2
+ adc r4,r0
+ adc r5,r6
+ // END: sqr 64 Refined Karatsuba
+ // Result in r2,r3,r4,r5
+ // Leaves r6 zero.
+ .syntax unified
+ mov r0,r12
+ .syntax divided
+ add r2,r0
+ adc r3,r1
+ adc r4,r6
+ adc r5,r6
+ .syntax unified
+ mov r12,r2
+ mov r2,r8
+ mov r8,r3
+ mov r3,r9
+ mov r9,r4
+ .syntax divided
+ // START: sqr 64 Refined Karatsuba
+ // Input operands in r2,r3
+ // Result in r6,r7,r0,r1
+ // Clobbers: r2,r3,r4
+ // START: sqr 32
+ // Input operand in r2
+ // Result in r6 ,r7
+ // Clobbers: r0, r1
+ uxth r6,r2
+ lsr r7,r2,#16
+ .syntax unified
+ mov r0,r6
+ .syntax divided
+ mul r0,r7
+ mul r6,r6
+ mul r7,r7
+ lsr r1,r0,#15
+ lsl r0,r0,#17
+ add r6,r0
+ adc r7,r1
+ // End: sqr 32
+ // Result in r6 ,r7
+ sub r2,r3
+ sbc r4,r4
+ eor r2,r4
+ sub r2,r4
+ // START: sqr 32
+ // Input operand in r3
+ // Result in r0 ,r1
+ // Clobbers: r3, r4
+ uxth r0,r3
+ lsr r1,r3,#16
+ .syntax unified
+ mov r3,r0
+ .syntax divided
+ mul r3,r1
+ mul r0,r0
+ mul r1,r1
+ lsr r4,r3,#15
+ lsl r3,r3,#17
+ add r0,r3
+ adc r1,r4
+ // End: sqr 32
+ // Result in r0 ,r1
+ mov r4,#0
+ add r0,r7
+ adc r1,r4
+ // START: sqr 32
+ // Input operand in r2
+ // Result in r3 ,r4
+ // Clobbers: r2, r7
+ uxth r3,r2
+ lsr r4,r2,#16
+ .syntax unified
+ mov r2,r3
+ .syntax divided
+ mul r2,r4
+ mul r3,r3
+ mul r4,r4
+ lsr r7,r2,#15
+ lsl r2,r2,#17
+ add r3,r2
+ adc r4,r7
+ // End: sqr 32
+ // Result in r3 ,r4
+ .syntax unified
+ mov r7,r0
+ .syntax divided
+ sub r7,r3
+ sbc r0,r4
+ .syntax unified
+ mov r2,r1
+ .syntax divided
+ mov r4,#0
+ sbc r1,r4
+ add r7,r6
+ adc r0,r2
+ adc r1,r4
+ // END: sqr 64 Refined Karatsuba
+ // Result in r6,r7,r0,r1
+ // Returns r4 as zero.
+ .syntax unified
+ mov r2,r12
+ mov r3,r8
+ mov r4,r9
+ .syntax divided
+ sub r2,r6
+ sbc r3,r7
+ .syntax unified
+ mov r6,r4
+ mov r7,r5
+ .syntax divided
+ sbc r4,r0
+ sbc r5,r1
+ mov r0,#0
+ sbc r6,r0
+ sbc r7,r0
+ .syntax unified
+ mov r0,r10
+ .syntax divided
+ add r2,r0
+ .syntax unified
+ mov r1,r11
+ .syntax divided
+ adc r3,r1
+ .syntax unified
+ mov r0,r12
+ .syntax divided
+ adc r4,r0
+ .syntax unified
+ mov r0,r8
+ .syntax divided
+ adc r5,r0
+ mov r0,#0
+ adc r6,r0
+ adc r7,r0
+ .syntax unified
+ mov r0,r10
+ .syntax divided
+ // END: sqr 128 Refined Karatsuba
+ // Result in r0 ... r7
+ mvn r0,r0
+ mvn r1,r1
+ mvn r2,r2
+ mvn r3,r3
+ mvn r4,r4
+ mvn r5,r5
+ mvn r6,r6
+ mvn r7,r7
+ .syntax unified
+ mov r8,r4
+ mov r9,r5
+ mov r10,r6
+ mov r11,r7
+ .syntax divided
+ mov r4,#143
+ asr r4,r4,#1
+ pop {r4,r5,r6,r7}
+ adc r0,r4
+ adc r1,r5
+ adc r2,r6
+ adc r3,r7
+ .syntax unified
+ mov r12,r4
+ .syntax divided
+ mov r4,#16
+ add r4,r14
+ stm r4!,{r0,r1,r2,r3}
+ .syntax unified
+ mov r4,r12
+ mov r0,r8
+ .syntax divided
+ adc r0,r4
+ .syntax unified
+ mov r8,r0
+ mov r1,r9
+ .syntax divided
+ adc r1,r5
+ .syntax unified
+ mov r9,r1
+ mov r2,r10
+ .syntax divided
+ adc r2,r6
+ .syntax unified
+ mov r10,r2
+ mov r3,r11
+ .syntax divided
+ adc r3,r7
+ .syntax unified
+ mov r11,r3
+ .syntax divided
+ mov r0,#0
+ adc r0,r0
+ .syntax unified
+ mov r12,r0
+ mov r0,r14
+ .syntax divided
+ ldm r0,{r0,r1,r2,r3,r4,r5,r6,r7}
+ add r0,r4
+ adc r1,r5
+ adc r2,r6
+ adc r3,r7
+ mov r4,#16
+ add r4,r14
+ stm r4!,{r0,r1,r2,r3}
+ .syntax unified
+ mov r14,r4
+ mov r0,r13
+ .syntax divided
+ ldm r0!,{r4,r5,r6,r7}
+ .syntax unified
+ mov r1,r8
+ .syntax divided
+ adc r4,r1
+ .syntax unified
+ mov r1,r9
+ .syntax divided
+ adc r5,r1
+ .syntax unified
+ mov r1,r10
+ .syntax divided
+ adc r6,r1
+ .syntax unified
+ mov r1,r11
+ .syntax divided
+ adc r7,r1
+ .syntax unified
+ mov r0,r14
+ .syntax divided
+ stm r0!,{r4,r5,r6,r7}
+ pop {r4,r5,r6,r7}
+ .syntax unified
+ mov r1,r12
+ .syntax divided
+ mov r2,#0
+ mvn r2,r2
+ adc r1,r2
+ asr r2,r1,#4
+ add r4,r1
+ adc r5,r2
+ adc r6,r2
+ adc r7,r2
+ stm r0!,{r4,r5,r6,r7}
+ pop {r3,r4,r5,r6,r7}
+ .syntax unified
+ mov r8,r3
+ mov r9,r4
+ mov r10,r5
+ mov r11,r6
+ mov r12,r7
+ .syntax divided
+ pop {r0,r4,r5,r6,r7,r15}
+//Cycle Count ASM-Version of 256 sqr (Refined Karatsuba) (Cortex M0): 793 (697 instructions).
+ .size square256_asm, .-square256_asm