summaryrefslogtreecommitdiff
path: root/third_party/unacl-curve25519/core/cortex-m0/curve25519/sqr.S
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/unacl-curve25519/core/cortex-m0/curve25519/sqr.S')
-rw-r--r--third_party/unacl-curve25519/core/cortex-m0/curve25519/sqr.S1164
1 files changed, 0 insertions, 1164 deletions
diff --git a/third_party/unacl-curve25519/core/cortex-m0/curve25519/sqr.S b/third_party/unacl-curve25519/core/cortex-m0/curve25519/sqr.S
deleted file mode 100644
index b62121adb7..0000000000
--- a/third_party/unacl-curve25519/core/cortex-m0/curve25519/sqr.S
+++ /dev/null
@@ -1,1164 +0,0 @@
-// Author: Ana Helena Sánchez, Björn Haase (second implementation).
-//
-// public domain
-//
-
- .align 2
- .global square256_asm
- .type square256_asm, %function
-square256_asm:
-// ######################
-// ASM Square 256 refined karatsuba:
-// ######################
- // sqr 256 Refined Karatsuba
- // pInput in r1
- // pResult in r0
- // adheres to arm eabi calling convention.
- push {r1,r4,r5,r6,r7,r14}
- .syntax unified
- mov r3,r8
- mov r4,r9
- mov r5,r10
- mov r6,r11
- mov r7,r12
- .syntax divided
- push {r3,r4,r5,r6,r7}
- .syntax unified
- mov r14,r0
- .syntax divided
- ldm r1!,{r4,r5,r6,r7}
- // sqr 128 Refined Karatsuba
- // Input in r4 ... r7
- // Result in r0 ... r7
- // clobbers all registers except for r14
- .syntax unified
- mov r0,r4
- mov r1,r5
- .syntax divided
- sub r0,r6
- sbc r1,r7
- sbc r2,r2
- eor r0,r2
- eor r1,r2
- sub r0,r2
- sbc r1,r2
- .syntax unified
- mov r8,r0
- mov r9,r1
- mov r10,r6
- .syntax divided
- // START: sqr 64 Refined Karatsuba
- // Input operands in r4,r5
- // Result in r0,r1,r2,r3
- // Clobbers: r4-r6
- // START: sqr 32
- // Input operand in r4
- // Result in r0 ,r1
- // Clobbers: r2, r3
- uxth r0,r4
- lsr r1,r4,#16
- .syntax unified
- mov r2,r0
- .syntax divided
- mul r2,r1
- mul r0,r0
- mul r1,r1
- lsr r3,r2,#15
- lsl r2,r2,#17
- add r0,r2
- adc r1,r3
- // End: sqr 32
- // Result in r0 ,r1
- sub r4,r5
- sbc r6,r6
- eor r4,r6
- sub r4,r6
- // START: sqr 32
- // Input operand in r5
- // Result in r2 ,r3
- // Clobbers: r5, r6
- uxth r2,r5
- lsr r3,r5,#16
- .syntax unified
- mov r5,r2
- .syntax divided
- mul r5,r3
- mul r2,r2
- mul r3,r3
- lsr r6,r5,#15
- lsl r5,r5,#17
- add r2,r5
- adc r3,r6
- // End: sqr 32
- // Result in r2 ,r3
- mov r6,#0
- add r2,r1
- adc r3,r6
- // START: sqr 32
- // Input operand in r4
- // Result in r4 ,r5
- // Clobbers: r1, r6
- lsr r5,r4,#16
- uxth r4,r4
- .syntax unified
- mov r1,r4
- .syntax divided
- mul r1,r5
- mul r4,r4
- mul r5,r5
- lsr r6,r1,#15
- lsl r1,r1,#17
- add r4,r1
- adc r5,r6
- // End: sqr 32
- // Result in r4 ,r5
- .syntax unified
- mov r1,r2
- .syntax divided
- sub r1,r4
- sbc r2,r5
- .syntax unified
- mov r5,r3
- .syntax divided
- mov r6,#0
- sbc r3,r6
- add r1,r0
- adc r2,r5
- adc r3,r6
- // END: sqr 64 Refined Karatsuba
- // Result in r0,r1,r2,r3
- // Leaves r6 zero.
- .syntax unified
- mov r6,r10
- mov r10,r0
- mov r11,r1
- mov r12,r2
- mov r1,r3
- .syntax divided
- // START: sqr 64 Refined Karatsuba
- // Input operands in r6,r7
- // Result in r2,r3,r4,r5
- // Clobbers: r0,r7,r6
- // START: sqr 32
- // Input operand in r6
- // Result in r2 ,r3
- // Clobbers: r4, r5
- uxth r2,r6
- lsr r3,r6,#16
- .syntax unified
- mov r4,r2
- .syntax divided
- mul r4,r3
- mul r2,r2
- mul r3,r3
- lsr r5,r4,#15
- lsl r4,r4,#17
- add r2,r4
- adc r3,r5
- // End: sqr 32
- // Result in r2 ,r3
- sub r6,r7
- sbc r4,r4
- eor r6,r4
- sub r6,r4
- // START: sqr 32
- // Input operand in r7
- // Result in r4 ,r5
- // Clobbers: r0, r7
- uxth r4,r7
- lsr r5,r7,#16
- .syntax unified
- mov r0,r4
- .syntax divided
- mul r0,r5
- mul r4,r4
- mul r5,r5
- lsr r7,r0,#15
- lsl r0,r0,#17
- add r4,r0
- adc r5,r7
- // End: sqr 32
- // Result in r4 ,r5
- mov r7,#0
- add r4,r3
- adc r5,r7
- // START: sqr 32
- // Input operand in r6
- // Result in r7 ,r0
- // Clobbers: r6, r3
- uxth r7,r6
- lsr r0,r6,#16
- .syntax unified
- mov r6,r7
- .syntax divided
- mul r6,r0
- mul r7,r7
- mul r0,r0
- lsr r3,r6,#15
- lsl r6,r6,#17
- add r7,r6
- adc r0,r3
- // End: sqr 32
- // Result in r7 ,r0
- .syntax unified
- mov r3,r4
- .syntax divided
- sub r3,r7
- sbc r4,r0
- .syntax unified
- mov r0,r5
- .syntax divided
- mov r6,#0
- sbc r5,r6
- add r3,r2
- adc r4,r0
- adc r5,r6
- // END: sqr 64 Refined Karatsuba
- // Result in r2,r3,r4,r5
- // Leaves r6 zero.
- .syntax unified
- mov r0,r12
- .syntax divided
- add r2,r0
- adc r3,r1
- adc r4,r6
- adc r5,r6
- .syntax unified
- mov r12,r2
- mov r2,r8
- mov r8,r3
- mov r3,r9
- mov r9,r4
- .syntax divided
- // START: sqr 64 Refined Karatsuba
- // Input operands in r2,r3
- // Result in r6,r7,r0,r1
- // Clobbers: r2,r3,r4
- // START: sqr 32
- // Input operand in r2
- // Result in r6 ,r7
- // Clobbers: r0, r1
- uxth r6,r2
- lsr r7,r2,#16
- .syntax unified
- mov r0,r6
- .syntax divided
- mul r0,r7
- mul r6,r6
- mul r7,r7
- lsr r1,r0,#15
- lsl r0,r0,#17
- add r6,r0
- adc r7,r1
- // End: sqr 32
- // Result in r6 ,r7
- sub r2,r3
- sbc r4,r4
- eor r2,r4
- sub r2,r4
- // START: sqr 32
- // Input operand in r3
- // Result in r0 ,r1
- // Clobbers: r3, r4
- uxth r0,r3
- lsr r1,r3,#16
- .syntax unified
- mov r3,r0
- .syntax divided
- mul r3,r1
- mul r0,r0
- mul r1,r1
- lsr r4,r3,#15
- lsl r3,r3,#17
- add r0,r3
- adc r1,r4
- // End: sqr 32
- // Result in r0 ,r1
- mov r4,#0
- add r0,r7
- adc r1,r4
- // START: sqr 32
- // Input operand in r2
- // Result in r3 ,r4
- // Clobbers: r2, r7
- uxth r3,r2
- lsr r4,r2,#16
- .syntax unified
- mov r2,r3
- .syntax divided
- mul r2,r4
- mul r3,r3
- mul r4,r4
- lsr r7,r2,#15
- lsl r2,r2,#17
- add r3,r2
- adc r4,r7
- // End: sqr 32
- // Result in r3 ,r4
- .syntax unified
- mov r7,r0
- .syntax divided
- sub r7,r3
- sbc r0,r4
- .syntax unified
- mov r2,r1
- .syntax divided
- mov r4,#0
- sbc r1,r4
- add r7,r6
- adc r0,r2
- adc r1,r4
- // END: sqr 64 Refined Karatsuba
- // Result in r6,r7,r0,r1
- // Returns r4 as zero.
- .syntax unified
- mov r2,r12
- mov r3,r8
- mov r4,r9
- .syntax divided
- sub r2,r6
- sbc r3,r7
- .syntax unified
- mov r6,r4
- mov r7,r5
- .syntax divided
- sbc r4,r0
- sbc r5,r1
- mov r0,#0
- sbc r6,r0
- sbc r7,r0
- .syntax unified
- mov r0,r10
- .syntax divided
- add r2,r0
- .syntax unified
- mov r1,r11
- .syntax divided
- adc r3,r1
- .syntax unified
- mov r0,r12
- .syntax divided
- adc r4,r0
- .syntax unified
- mov r0,r8
- .syntax divided
- adc r5,r0
- mov r0,#0
- adc r6,r0
- adc r7,r0
- .syntax unified
- mov r0,r10
- .syntax divided
- // END: sqr 128 Refined Karatsuba
- // Result in r0 ... r7
- push {r4,r5,r6,r7}
- .syntax unified
- mov r4,r14
- .syntax divided
- stm r4!,{r0,r1,r2,r3}
- ldr r4,[SP,#36]
- add r4,#16
- ldm r4,{r4,r5,r6,r7}
- // sqr 128 Refined Karatsuba
- // Input in r4 ... r7
- // Result in r0 ... r7
- // clobbers all registers except for r14
- .syntax unified
- mov r0,r4
- mov r1,r5
- .syntax divided
- sub r0,r6
- sbc r1,r7
- sbc r2,r2
- eor r0,r2
- eor r1,r2
- sub r0,r2
- sbc r1,r2
- .syntax unified
- mov r8,r0
- mov r9,r1
- mov r10,r6
- .syntax divided
- // START: sqr 64 Refined Karatsuba
- // Input operands in r4,r5
- // Result in r0,r1,r2,r3
- // Clobbers: r4-r6
- // START: sqr 32
- // Input operand in r4
- // Result in r0 ,r1
- // Clobbers: r2, r3
- uxth r0,r4
- lsr r1,r4,#16
- .syntax unified
- mov r2,r0
- .syntax divided
- mul r2,r1
- mul r0,r0
- mul r1,r1
- lsr r3,r2,#15
- lsl r2,r2,#17
- add r0,r2
- adc r1,r3
- // End: sqr 32
- // Result in r0 ,r1
- sub r4,r5
- sbc r6,r6
- eor r4,r6
- sub r4,r6
- // START: sqr 32
- // Input operand in r5
- // Result in r2 ,r3
- // Clobbers: r5, r6
- uxth r2,r5
- lsr r3,r5,#16
- .syntax unified
- mov r5,r2
- .syntax divided
- mul r5,r3
- mul r2,r2
- mul r3,r3
- lsr r6,r5,#15
- lsl r5,r5,#17
- add r2,r5
- adc r3,r6
- // End: sqr 32
- // Result in r2 ,r3
- mov r6,#0
- add r2,r1
- adc r3,r6
- // START: sqr 32
- // Input operand in r4
- // Result in r4 ,r5
- // Clobbers: r1, r6
- lsr r5,r4,#16
- uxth r4,r4
- .syntax unified
- mov r1,r4
- .syntax divided
- mul r1,r5
- mul r4,r4
- mul r5,r5
- lsr r6,r1,#15
- lsl r1,r1,#17
- add r4,r1
- adc r5,r6
- // End: sqr 32
- // Result in r4 ,r5
- .syntax unified
- mov r1,r2
- .syntax divided
- sub r1,r4
- sbc r2,r5
- .syntax unified
- mov r5,r3
- .syntax divided
- mov r6,#0
- sbc r3,r6
- add r1,r0
- adc r2,r5
- adc r3,r6
- // END: sqr 64 Refined Karatsuba
- // Result in r0,r1,r2,r3
- // Leaves r6 zero.
- .syntax unified
- mov r6,r10
- mov r10,r0
- mov r11,r1
- mov r12,r2
- mov r1,r3
- .syntax divided
- // START: sqr 64 Refined Karatsuba
- // Input operands in r6,r7
- // Result in r2,r3,r4,r5
- // Clobbers: r0,r7,r6
- // START: sqr 32
- // Input operand in r6
- // Result in r2 ,r3
- // Clobbers: r4, r5
- uxth r2,r6
- lsr r3,r6,#16
- .syntax unified
- mov r4,r2
- .syntax divided
- mul r4,r3
- mul r2,r2
- mul r3,r3
- lsr r5,r4,#15
- lsl r4,r4,#17
- add r2,r4
- adc r3,r5
- // End: sqr 32
- // Result in r2 ,r3
- sub r6,r7
- sbc r4,r4
- eor r6,r4
- sub r6,r4
- // START: sqr 32
- // Input operand in r7
- // Result in r4 ,r5
- // Clobbers: r0, r7
- uxth r4,r7
- lsr r5,r7,#16
- .syntax unified
- mov r0,r4
- .syntax divided
- mul r0,r5
- mul r4,r4
- mul r5,r5
- lsr r7,r0,#15
- lsl r0,r0,#17
- add r4,r0
- adc r5,r7
- // End: sqr 32
- // Result in r4 ,r5
- mov r7,#0
- add r4,r3
- adc r5,r7
- // START: sqr 32
- // Input operand in r6
- // Result in r7 ,r0
- // Clobbers: r6, r3
- uxth r7,r6
- lsr r0,r6,#16
- .syntax unified
- mov r6,r7
- .syntax divided
- mul r6,r0
- mul r7,r7
- mul r0,r0
- lsr r3,r6,#15
- lsl r6,r6,#17
- add r7,r6
- adc r0,r3
- // End: sqr 32
- // Result in r7 ,r0
- .syntax unified
- mov r3,r4
- .syntax divided
- sub r3,r7
- sbc r4,r0
- .syntax unified
- mov r0,r5
- .syntax divided
- mov r6,#0
- sbc r5,r6
- add r3,r2
- adc r4,r0
- adc r5,r6
- // END: sqr 64 Refined Karatsuba
- // Result in r2,r3,r4,r5
- // Leaves r6 zero.
- .syntax unified
- mov r0,r12
- .syntax divided
- add r2,r0
- adc r3,r1
- adc r4,r6
- adc r5,r6
- .syntax unified
- mov r12,r2
- mov r2,r8
- mov r8,r3
- mov r3,r9
- mov r9,r4
- .syntax divided
- // START: sqr 64 Refined Karatsuba
- // Input operands in r2,r3
- // Result in r6,r7,r0,r1
- // Clobbers: r2,r3,r4
- // START: sqr 32
- // Input operand in r2
- // Result in r6 ,r7
- // Clobbers: r0, r1
- uxth r6,r2
- lsr r7,r2,#16
- .syntax unified
- mov r0,r6
- .syntax divided
- mul r0,r7
- mul r6,r6
- mul r7,r7
- lsr r1,r0,#15
- lsl r0,r0,#17
- add r6,r0
- adc r7,r1
- // End: sqr 32
- // Result in r6 ,r7
- sub r2,r3
- sbc r4,r4
- eor r2,r4
- sub r2,r4
- // START: sqr 32
- // Input operand in r3
- // Result in r0 ,r1
- // Clobbers: r3, r4
- uxth r0,r3
- lsr r1,r3,#16
- .syntax unified
- mov r3,r0
- .syntax divided
- mul r3,r1
- mul r0,r0
- mul r1,r1
- lsr r4,r3,#15
- lsl r3,r3,#17
- add r0,r3
- adc r1,r4
- // End: sqr 32
- // Result in r0 ,r1
- mov r4,#0
- add r0,r7
- adc r1,r4
- // START: sqr 32
- // Input operand in r2
- // Result in r3 ,r4
- // Clobbers: r2, r7
- uxth r3,r2
- lsr r4,r2,#16
- .syntax unified
- mov r2,r3
- .syntax divided
- mul r2,r4
- mul r3,r3
- mul r4,r4
- lsr r7,r2,#15
- lsl r2,r2,#17
- add r3,r2
- adc r4,r7
- // End: sqr 32
- // Result in r3 ,r4
- .syntax unified
- mov r7,r0
- .syntax divided
- sub r7,r3
- sbc r0,r4
- .syntax unified
- mov r2,r1
- .syntax divided
- mov r4,#0
- sbc r1,r4
- add r7,r6
- adc r0,r2
- adc r1,r4
- // END: sqr 64 Refined Karatsuba
- // Result in r6,r7,r0,r1
- // Returns r4 as zero.
- .syntax unified
- mov r2,r12
- mov r3,r8
- mov r4,r9
- .syntax divided
- sub r2,r6
- sbc r3,r7
- .syntax unified
- mov r6,r4
- mov r7,r5
- .syntax divided
- sbc r4,r0
- sbc r5,r1
- mov r0,#0
- sbc r6,r0
- sbc r7,r0
- .syntax unified
- mov r0,r10
- .syntax divided
- add r2,r0
- .syntax unified
- mov r1,r11
- .syntax divided
- adc r3,r1
- .syntax unified
- mov r0,r12
- .syntax divided
- adc r4,r0
- .syntax unified
- mov r0,r8
- .syntax divided
- adc r5,r0
- mov r0,#0
- adc r6,r0
- adc r7,r0
- .syntax unified
- mov r0,r10
- .syntax divided
- // END: sqr 128 Refined Karatsuba
- // Result in r0 ... r7
- .syntax unified
- mov r8,r4
- mov r9,r5
- mov r10,r6
- mov r11,r7
- .syntax divided
- pop {r4,r5,r6,r7}
- add r0,r4
- adc r1,r5
- adc r2,r6
- adc r3,r7
- .syntax unified
- mov r4,r8
- mov r5,r9
- mov r6,r10
- mov r7,r11
- mov r8,r0
- .syntax divided
- mov r0,#0
- adc r4,r0
- adc r5,r0
- adc r6,r0
- adc r7,r0
- .syntax unified
- mov r0,r8
- .syntax divided
- push {r0,r1,r2,r3,r4,r5,r6,r7}
- ldr r4,[SP,#52]
- ldm r4,{r0,r1,r2,r3,r4,r5,r6,r7}
- sub r4,r0
- sbc r5,r1
- sbc r6,r2
- sbc r7,r3
- sbc r0,r0
- eor r4,r0
- eor r5,r0
- eor r6,r0
- eor r7,r0
- sub r4,r0
- sbc r5,r0
- sbc r6,r0
- sbc r7,r0
- // sqr 128 Refined Karatsuba
- // Input in r4 ... r7
- // Result in r0 ... r7
- // clobbers all registers except for r14
- .syntax unified
- mov r0,r4
- mov r1,r5
- .syntax divided
- sub r0,r6
- sbc r1,r7
- sbc r2,r2
- eor r0,r2
- eor r1,r2
- sub r0,r2
- sbc r1,r2
- .syntax unified
- mov r8,r0
- mov r9,r1
- mov r10,r6
- .syntax divided
- // START: sqr 64 Refined Karatsuba
- // Input operands in r4,r5
- // Result in r0,r1,r2,r3
- // Clobbers: r4-r6
- // START: sqr 32
- // Input operand in r4
- // Result in r0 ,r1
- // Clobbers: r2, r3
- uxth r0,r4
- lsr r1,r4,#16
- .syntax unified
- mov r2,r0
- .syntax divided
- mul r2,r1
- mul r0,r0
- mul r1,r1
- lsr r3,r2,#15
- lsl r2,r2,#17
- add r0,r2
- adc r1,r3
- // End: sqr 32
- // Result in r0 ,r1
- sub r4,r5
- sbc r6,r6
- eor r4,r6
- sub r4,r6
- // START: sqr 32
- // Input operand in r5
- // Result in r2 ,r3
- // Clobbers: r5, r6
- uxth r2,r5
- lsr r3,r5,#16
- .syntax unified
- mov r5,r2
- .syntax divided
- mul r5,r3
- mul r2,r2
- mul r3,r3
- lsr r6,r5,#15
- lsl r5,r5,#17
- add r2,r5
- adc r3,r6
- // End: sqr 32
- // Result in r2 ,r3
- mov r6,#0
- add r2,r1
- adc r3,r6
- // START: sqr 32
- // Input operand in r4
- // Result in r4 ,r5
- // Clobbers: r1, r6
- lsr r5,r4,#16
- uxth r4,r4
- .syntax unified
- mov r1,r4
- .syntax divided
- mul r1,r5
- mul r4,r4
- mul r5,r5
- lsr r6,r1,#15
- lsl r1,r1,#17
- add r4,r1
- adc r5,r6
- // End: sqr 32
- // Result in r4 ,r5
- .syntax unified
- mov r1,r2
- .syntax divided
- sub r1,r4
- sbc r2,r5
- .syntax unified
- mov r5,r3
- .syntax divided
- mov r6,#0
- sbc r3,r6
- add r1,r0
- adc r2,r5
- adc r3,r6
- // END: sqr 64 Refined Karatsuba
- // Result in r0,r1,r2,r3
- // Leaves r6 zero.
- .syntax unified
- mov r6,r10
- mov r10,r0
- mov r11,r1
- mov r12,r2
- mov r1,r3
- .syntax divided
- // START: sqr 64 Refined Karatsuba
- // Input operands in r6,r7
- // Result in r2,r3,r4,r5
- // Clobbers: r0,r7,r6
- // START: sqr 32
- // Input operand in r6
- // Result in r2 ,r3
- // Clobbers: r4, r5
- uxth r2,r6
- lsr r3,r6,#16
- .syntax unified
- mov r4,r2
- .syntax divided
- mul r4,r3
- mul r2,r2
- mul r3,r3
- lsr r5,r4,#15
- lsl r4,r4,#17
- add r2,r4
- adc r3,r5
- // End: sqr 32
- // Result in r2 ,r3
- sub r6,r7
- sbc r4,r4
- eor r6,r4
- sub r6,r4
- // START: sqr 32
- // Input operand in r7
- // Result in r4 ,r5
- // Clobbers: r0, r7
- uxth r4,r7
- lsr r5,r7,#16
- .syntax unified
- mov r0,r4
- .syntax divided
- mul r0,r5
- mul r4,r4
- mul r5,r5
- lsr r7,r0,#15
- lsl r0,r0,#17
- add r4,r0
- adc r5,r7
- // End: sqr 32
- // Result in r4 ,r5
- mov r7,#0
- add r4,r3
- adc r5,r7
- // START: sqr 32
- // Input operand in r6
- // Result in r7 ,r0
- // Clobbers: r6, r3
- uxth r7,r6
- lsr r0,r6,#16
- .syntax unified
- mov r6,r7
- .syntax divided
- mul r6,r0
- mul r7,r7
- mul r0,r0
- lsr r3,r6,#15
- lsl r6,r6,#17
- add r7,r6
- adc r0,r3
- // End: sqr 32
- // Result in r7 ,r0
- .syntax unified
- mov r3,r4
- .syntax divided
- sub r3,r7
- sbc r4,r0
- .syntax unified
- mov r0,r5
- .syntax divided
- mov r6,#0
- sbc r5,r6
- add r3,r2
- adc r4,r0
- adc r5,r6
- // END: sqr 64 Refined Karatsuba
- // Result in r2,r3,r4,r5
- // Leaves r6 zero.
- .syntax unified
- mov r0,r12
- .syntax divided
- add r2,r0
- adc r3,r1
- adc r4,r6
- adc r5,r6
- .syntax unified
- mov r12,r2
- mov r2,r8
- mov r8,r3
- mov r3,r9
- mov r9,r4
- .syntax divided
- // START: sqr 64 Refined Karatsuba
- // Input operands in r2,r3
- // Result in r6,r7,r0,r1
- // Clobbers: r2,r3,r4
- // START: sqr 32
- // Input operand in r2
- // Result in r6 ,r7
- // Clobbers: r0, r1
- uxth r6,r2
- lsr r7,r2,#16
- .syntax unified
- mov r0,r6
- .syntax divided
- mul r0,r7
- mul r6,r6
- mul r7,r7
- lsr r1,r0,#15
- lsl r0,r0,#17
- add r6,r0
- adc r7,r1
- // End: sqr 32
- // Result in r6 ,r7
- sub r2,r3
- sbc r4,r4
- eor r2,r4
- sub r2,r4
- // START: sqr 32
- // Input operand in r3
- // Result in r0 ,r1
- // Clobbers: r3, r4
- uxth r0,r3
- lsr r1,r3,#16
- .syntax unified
- mov r3,r0
- .syntax divided
- mul r3,r1
- mul r0,r0
- mul r1,r1
- lsr r4,r3,#15
- lsl r3,r3,#17
- add r0,r3
- adc r1,r4
- // End: sqr 32
- // Result in r0 ,r1
- mov r4,#0
- add r0,r7
- adc r1,r4
- // START: sqr 32
- // Input operand in r2
- // Result in r3 ,r4
- // Clobbers: r2, r7
- uxth r3,r2
- lsr r4,r2,#16
- .syntax unified
- mov r2,r3
- .syntax divided
- mul r2,r4
- mul r3,r3
- mul r4,r4
- lsr r7,r2,#15
- lsl r2,r2,#17
- add r3,r2
- adc r4,r7
- // End: sqr 32
- // Result in r3 ,r4
- .syntax unified
- mov r7,r0
- .syntax divided
- sub r7,r3
- sbc r0,r4
- .syntax unified
- mov r2,r1
- .syntax divided
- mov r4,#0
- sbc r1,r4
- add r7,r6
- adc r0,r2
- adc r1,r4
- // END: sqr 64 Refined Karatsuba
- // Result in r6,r7,r0,r1
- // Returns r4 as zero.
- .syntax unified
- mov r2,r12
- mov r3,r8
- mov r4,r9
- .syntax divided
- sub r2,r6
- sbc r3,r7
- .syntax unified
- mov r6,r4
- mov r7,r5
- .syntax divided
- sbc r4,r0
- sbc r5,r1
- mov r0,#0
- sbc r6,r0
- sbc r7,r0
- .syntax unified
- mov r0,r10
- .syntax divided
- add r2,r0
- .syntax unified
- mov r1,r11
- .syntax divided
- adc r3,r1
- .syntax unified
- mov r0,r12
- .syntax divided
- adc r4,r0
- .syntax unified
- mov r0,r8
- .syntax divided
- adc r5,r0
- mov r0,#0
- adc r6,r0
- adc r7,r0
- .syntax unified
- mov r0,r10
- .syntax divided
- // END: sqr 128 Refined Karatsuba
- // Result in r0 ... r7
- mvn r0,r0
- mvn r1,r1
- mvn r2,r2
- mvn r3,r3
- mvn r4,r4
- mvn r5,r5
- mvn r6,r6
- mvn r7,r7
- .syntax unified
- mov r8,r4
- mov r9,r5
- mov r10,r6
- mov r11,r7
- .syntax divided
- mov r4,#143
- asr r4,r4,#1
- pop {r4,r5,r6,r7}
- adc r0,r4
- adc r1,r5
- adc r2,r6
- adc r3,r7
- .syntax unified
- mov r12,r4
- .syntax divided
- mov r4,#16
- add r4,r14
- stm r4!,{r0,r1,r2,r3}
- .syntax unified
- mov r4,r12
- mov r0,r8
- .syntax divided
- adc r0,r4
- .syntax unified
- mov r8,r0
- mov r1,r9
- .syntax divided
- adc r1,r5
- .syntax unified
- mov r9,r1
- mov r2,r10
- .syntax divided
- adc r2,r6
- .syntax unified
- mov r10,r2
- mov r3,r11
- .syntax divided
- adc r3,r7
- .syntax unified
- mov r11,r3
- .syntax divided
- mov r0,#0
- adc r0,r0
- .syntax unified
- mov r12,r0
- mov r0,r14
- .syntax divided
- ldm r0,{r0,r1,r2,r3,r4,r5,r6,r7}
- add r0,r4
- adc r1,r5
- adc r2,r6
- adc r3,r7
- mov r4,#16
- add r4,r14
- stm r4!,{r0,r1,r2,r3}
- .syntax unified
- mov r14,r4
- mov r0,r13
- .syntax divided
- ldm r0!,{r4,r5,r6,r7}
- .syntax unified
- mov r1,r8
- .syntax divided
- adc r4,r1
- .syntax unified
- mov r1,r9
- .syntax divided
- adc r5,r1
- .syntax unified
- mov r1,r10
- .syntax divided
- adc r6,r1
- .syntax unified
- mov r1,r11
- .syntax divided
- adc r7,r1
- .syntax unified
- mov r0,r14
- .syntax divided
- stm r0!,{r4,r5,r6,r7}
- pop {r4,r5,r6,r7}
- .syntax unified
- mov r1,r12
- .syntax divided
- mov r2,#0
- mvn r2,r2
- adc r1,r2
- asr r2,r1,#4
- add r4,r1
- adc r5,r2
- adc r6,r2
- adc r7,r2
- stm r0!,{r4,r5,r6,r7}
- pop {r3,r4,r5,r6,r7}
- .syntax unified
- mov r8,r3
- mov r9,r4
- mov r10,r5
- mov r11,r6
- mov r12,r7
- .syntax divided
- pop {r0,r4,r5,r6,r7,r15}
-//Cycle Count ASM-Version of 256 sqr (Refined Karatsuba) (Cortex M0): 793 (697 instructions).
- .size square256_asm, .-square256_asm