summaryrefslogtreecommitdiff
path: root/third_party
diff options
context:
space:
mode:
authorTom Hughes <tomhughes@chromium.org>2022-10-18 16:27:08 -0700
committerChromeos LUCI <chromeos-scoped@luci-project-accounts.iam.gserviceaccount.com>2022-10-24 17:33:18 +0000
commit1afeeef4f4e69960199e32f4de50094c43625b27 (patch)
tree5810fa3c6af9c97304b2d60c39ca17b20a10e0c5 /third_party
parent4b47b808d52d602d86a7ec745019b72433469bdf (diff)
downloadchrome-ec-1afeeef4f4e69960199e32f4de50094c43625b27.tar.gz
third_party/unacl-curve25519: Fix assembly
When building with clang it complains that when compiling for Thumb there are no flag-preserving variants of many of the instructions in the assembly: core/cortex-m0/curve25519/mul.S:1099:2: error: no flag-preserving variant of this instruction available adc r2, r6 ^ Using "arm-none-eabi-objdump -d" to disassemble the object files, we can see that gcc is less strict and just silently generates the version of the instruction that sets the flags (e.g., "adc" -> "adcs"). This change fixes up the assembly so that it compiles with clang. Most of the changes were done programmatically with the following script (followed by some manual cleanup): sed -i 's/adc/adcs/g' ${FILE} sed -i 's/asr/asrs/g' ${FILE} sed -i 's/mvn/mvns/g' ${FILE} sed -i 's/sbc/sbcs/g' ${FILE} sed -i 's/sub/subs/g' ${FILE} sed -i 's/lsr/lsrs/g' ${FILE} sed -i 's/lsl/lsls/g' ${FILE} sed -i 's/mul/muls/g' ${FILE} sed -i 's/eor/eors/g' ${FILE} sed -i 's/orr/orrs/g' ${FILE} sed -i 's/mov r\([[:digit:]]\+\),#\([[:digit:]]\+\)/movs r\1,#\2/g' ${FILE} sed -i 's/add r\([[:digit:]]\+\),#\([[:digit:]]\+\)/adds r\1,#\2/g' ${FILE} sed -i 's/add r\([[:digit:]]\+\),r\([[:digit:]]\+\)/adds r\1,r\1,r\2/g' ${FILE} The binary generated by gcc before and after this change is bitwise exact as seen by the use of the compare_build.sh script (see TEST line). BRANCH=none BUG=b:172020503 TEST=CC=clang make BOARD=hammer TEST=./util/compare_build.sh -b all -j 120 => MATCH Signed-off-by: Tom Hughes <tomhughes@chromium.org> Change-Id: Ice602c1996ef3b48c46e69f0d6770828cf21c15d Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/platform/ec/+/3968441 Reviewed-by: Eric Yilun Lin <yllin@google.com>
Diffstat (limited to 'third_party')
-rw-r--r--third_party/unacl-curve25519/core/cortex-m0/curve25519/mpy121666.S254
-rw-r--r--third_party/unacl-curve25519/core/cortex-m0/curve25519/mul.S1637
-rw-r--r--third_party/unacl-curve25519/core/cortex-m0/curve25519/reduce25519.S207
-rw-r--r--third_party/unacl-curve25519/core/cortex-m0/curve25519/sqr.S1165
4 files changed, 1530 insertions, 1733 deletions
diff --git a/third_party/unacl-curve25519/core/cortex-m0/curve25519/mpy121666.S b/third_party/unacl-curve25519/core/cortex-m0/curve25519/mpy121666.S
index 0f847a2484..5236dff64c 100644
--- a/third_party/unacl-curve25519/core/cortex-m0/curve25519/mpy121666.S
+++ b/third_party/unacl-curve25519/core/cortex-m0/curve25519/mpy121666.S
@@ -10,7 +10,7 @@
// ATTENTION:
// Not yet tested on target hardware.
-
+ .syntax unified
.code 16
.text
.align 2
@@ -24,156 +24,156 @@ fe25519_mpyWith121666_asm:
push {r4,r5,r6,r7,r14}
ldr r7,=56130
ldr r2,[r1,#28]
- lsl r5,r2,#16
- lsr r6,r2,#16
- lsr r3,r2,#16
+ lsls r5,r2,#16
+ lsrs r6,r2,#16
+ lsrs r3,r2,#16
uxth r2,r2
- mul r2,r7
- mul r3,r7
- add r5,r2
- mov r2,#0
- adc r6,r2
- lsl r2,r3,#16
- lsr r3,r3,#16
- add r5,r2
- adc r6,r3
- lsl r2,r5,#1
- lsr r2,r2,#1
+ muls r2,r7
+ muls r3,r7
+ adds r5,r5,r2
+ movs r2,#0
+ adcs r6,r2
+ lsls r2,r3,#16
+ lsrs r3,r3,#16
+ adds r5,r5,r2
+ adcs r6,r3
+ lsls r2,r5,#1
+ lsrs r2,r2,#1
str r2,[r0,#28]
- lsr r5,r5,#31
- lsl r6,r6,#1
- orr r5,r6
- mov r6,#19
- mul r5,r6
- mov r6,#0
+ lsrs r5,r5,#31
+ lsls r6,r6,#1
+ orrs r5,r6
+ movs r6,#19
+ muls r5,r6
+ movs r6,#0
ldr r2,[r1,#0]
- lsl r3,r2,#16
- lsr r4,r2,#16
- add r5,r3
- adc r6,r4
- lsr r3,r2,#16
+ lsls r3,r2,#16
+ lsrs r4,r2,#16
+ adds r5,r5,r3
+ adcs r6,r4
+ lsrs r3,r2,#16
uxth r2,r2
- mul r2,r7
- mul r3,r7
- add r5,r2
- mov r2,#0
- adc r6,r2
- lsl r2,r3,#16
- lsr r3,r3,#16
- add r5,r2
- adc r6,r3
+ muls r2,r7
+ muls r3,r7
+ adds r5,r5,r2
+ movs r2,#0
+ adcs r6,r2
+ lsls r2,r3,#16
+ lsrs r3,r3,#16
+ adds r5,r5,r2
+ adcs r6,r3
str r5,[r0,#0]
- mov r5,#0
+ movs r5,#0
ldr r2,[r1,#4]
- lsl r3,r2,#16
- lsr r4,r2,#16
- add r6,r3
- adc r5,r4
- lsr r3,r2,#16
+ lsls r3,r2,#16
+ lsrs r4,r2,#16
+ adds r6,r6,r3
+ adcs r5,r4
+ lsrs r3,r2,#16
uxth r2,r2
- mul r2,r7
- mul r3,r7
- add r6,r2
- mov r2,#0
- adc r5,r2
- lsl r2,r3,#16
- lsr r3,r3,#16
- add r6,r2
- adc r5,r3
+ muls r2,r7
+ muls r3,r7
+ adds r6,r6,r2
+ movs r2,#0
+ adcs r5,r2
+ lsls r2,r3,#16
+ lsrs r3,r3,#16
+ adds r6,r6,r2
+ adcs r5,r3
str r6,[r0,#4]
- mov r6,#0
+ movs r6,#0
ldr r2,[r1,#8]
- lsl r3,r2,#16
- lsr r4,r2,#16
- add r5,r3
- adc r6,r4
- lsr r3,r2,#16
+ lsls r3,r2,#16
+ lsrs r4,r2,#16
+ adds r5,r5,r3
+ adcs r6,r4
+ lsrs r3,r2,#16
uxth r2,r2
- mul r2,r7
- mul r3,r7
- add r5,r2
- mov r2,#0
- adc r6,r2
- lsl r2,r3,#16
- lsr r3,r3,#16
- add r5,r2
- adc r6,r3
+ muls r2,r7
+ muls r3,r7
+ adds r5,r5,r2
+ movs r2,#0
+ adcs r6,r2
+ lsls r2,r3,#16
+ lsrs r3,r3,#16
+ adds r5,r5,r2
+ adcs r6,r3
str r5,[r0,#8]
- mov r5,#0
+ movs r5,#0
ldr r2,[r1,#12]
- lsl r3,r2,#16
- lsr r4,r2,#16
- add r6,r3
- adc r5,r4
- lsr r3,r2,#16
+ lsls r3,r2,#16
+ lsrs r4,r2,#16
+ adds r6,r6,r3
+ adcs r5,r4
+ lsrs r3,r2,#16
uxth r2,r2
- mul r2,r7
- mul r3,r7
- add r6,r2
- mov r2,#0
- adc r5,r2
- lsl r2,r3,#16
- lsr r3,r3,#16
- add r6,r2
- adc r5,r3
+ muls r2,r7
+ muls r3,r7
+ adds r6,r6,r2
+ movs r2,#0
+ adcs r5,r2
+ lsls r2,r3,#16
+ lsrs r3,r3,#16
+ adds r6,r6,r2
+ adcs r5,r3
str r6,[r0,#12]
- mov r6,#0
+ movs r6,#0
ldr r2,[r1,#16]
- lsl r3,r2,#16
- lsr r4,r2,#16
- add r5,r3
- adc r6,r4
- lsr r3,r2,#16
+ lsls r3,r2,#16
+ lsrs r4,r2,#16
+ adds r5,r5,r3
+ adcs r6,r4
+ lsrs r3,r2,#16
uxth r2,r2
- mul r2,r7
- mul r3,r7
- add r5,r2
- mov r2,#0
- adc r6,r2
- lsl r2,r3,#16
- lsr r3,r3,#16
- add r5,r2
- adc r6,r3
+ muls r2,r7
+ muls r3,r7
+ adds r5,r5,r2
+ movs r2,#0
+ adcs r6,r2
+ lsls r2,r3,#16
+ lsrs r3,r3,#16
+ adds r5,r5,r2
+ adcs r6,r3
str r5,[r0,#16]
- mov r5,#0
+ movs r5,#0
ldr r2,[r1,#20]
- lsl r3,r2,#16
- lsr r4,r2,#16
- add r6,r3
- adc r5,r4
- lsr r3,r2,#16
+ lsls r3,r2,#16
+ lsrs r4,r2,#16
+ adds r6,r6,r3
+ adcs r5,r4
+ lsrs r3,r2,#16
uxth r2,r2
- mul r2,r7
- mul r3,r7
- add r6,r2
- mov r2,#0
- adc r5,r2
- lsl r2,r3,#16
- lsr r3,r3,#16
- add r6,r2
- adc r5,r3
+ muls r2,r7
+ muls r3,r7
+ adds r6,r6,r2
+ movs r2,#0
+ adcs r5,r2
+ lsls r2,r3,#16
+ lsrs r3,r3,#16
+ adds r6,r6,r2
+ adcs r5,r3
str r6,[r0,#20]
- mov r6,#0
+ movs r6,#0
ldr r2,[r1,#24]
- lsl r3,r2,#16
- lsr r4,r2,#16
- add r5,r3
- adc r6,r4
- lsr r3,r2,#16
+ lsls r3,r2,#16
+ lsrs r4,r2,#16
+ adds r5,r5,r3
+ adcs r6,r4
+ lsrs r3,r2,#16
uxth r2,r2
- mul r2,r7
- mul r3,r7
- add r5,r2
- mov r2,#0
- adc r6,r2
- lsl r2,r3,#16
- lsr r3,r3,#16
- add r5,r2
- adc r6,r3
+ muls r2,r7
+ muls r3,r7
+ adds r5,r5,r2
+ movs r2,#0
+ adcs r6,r2
+ lsls r2,r3,#16
+ lsrs r3,r3,#16
+ adds r5,r5,r2
+ adcs r6,r3
str r5,[r0,#24]
- mov r5,#0
+ movs r5,#0
ldr r2,[r0,#28]
- add r6,r2
+ adds r6,r6,r2
str r6,[r0,#28]
pop {r4,r5,r6,r7,r15}
diff --git a/third_party/unacl-curve25519/core/cortex-m0/curve25519/mul.S b/third_party/unacl-curve25519/core/cortex-m0/curve25519/mul.S
index cb272b9393..2980e03364 100644
--- a/third_party/unacl-curve25519/core/cortex-m0/curve25519/mul.S
+++ b/third_party/unacl-curve25519/core/cortex-m0/curve25519/mul.S
@@ -1,3 +1,4 @@
+ .syntax unified
.align 2
.global multiply256x256_asm
.type multiply256x256_asm, %function
@@ -11,96 +12,96 @@ multiply256x256_asm:
mov r12, r0
mov r10, r2
mov r11, r1
- mov r0,r2
+ adds r0, r2, #0
//ldm r0!, {r4,r5,r6,r7}
ldm r0!, {r4,r5}
- add r0,#8
+ adds r0,#8
ldm r1!, {r2,r3,r6,r7}
push {r0,r1}
/////////BEGIN LOW PART //////////////////////
/////////MUL128/////////////
//MUL64
- mov r6, r5
- mov r1, r2
- sub r5, r4
- sbc r0, r0
- eor r5, r0
- sub r5, r0
- sub r1, r3
- sbc r7, r7
- eor r1, r7
- sub r1, r7
- eor r7, r0
+ adds r6, r5, #0
+ adds r1, r2, #0
+ subs r5, r4
+ sbcs r0, r0
+ eors r5, r0
+ subs r5, r0
+ subs r1, r3
+ sbcs r7, r7
+ eors r1, r7
+ subs r1, r7
+ eors r7, r0
mov r9, r1
mov r8, r5
- lsr r1,r4,#16
+ lsrs r1,r4,#16
uxth r4,r4
- mov r0,r4
+ adds r0, r4, #0
uxth r5,r2
- lsr r2,#16
- mul r0,r5//00
- mul r5,r1//10
- mul r4,r2//01
- mul r1,r2//11
- lsl r2,r4,#16
- lsr r4,r4,#16
- add r0,r2
- adc r1,r4
- lsl r2,r5,#16
- lsr r4,r5,#16
- add r0,r2
- adc r1,r4
- lsr r4, r6,#16
+ lsrs r2,#16
+ muls r0,r5//00
+ muls r5,r1//10
+ muls r4,r2//01
+ muls r1,r2//11
+ lsls r2,r4,#16
+ lsrs r4,r4,#16
+ adds r0, r0, r2
+ adcs r1,r4
+ lsls r2,r5,#16
+ lsrs r4,r5,#16
+ adds r0, r0, r2
+ adcs r1,r4
+ lsrs r4, r6,#16
uxth r6, r6
uxth r5, r3
- lsr r3, r3, #16
- mov r2, r6
- mul r2, r5
- mul r5, r4
- mul r6, r3
- mul r3, r4
- lsl r4,r5,#16
- lsr r5,r5,#16
- add r2,r4
- adc r3,r5
- lsl r4,r6,#16
- lsr r5,r6,#16
- add r2,r4
- adc r3,r5
- eor r6, r6
- add r2, r1
- adc r3, r6
+ lsrs r3, r3, #16
+ adds r2, r6, #0
+ muls r2, r5
+ muls r5, r4
+ muls r6, r3
+ muls r3, r4
+ lsls r4,r5,#16
+ lsrs r5,r5,#16
+ adds r2, r2, r4
+ adcs r3,r5
+ lsls r4,r6,#16
+ lsrs r5,r6,#16
+ adds r2, r2, r4
+ adcs r3,r5
+ eors r6, r6
+ adds r2, r2, r1
+ adcs r3, r6
mov r1, r9
mov r5, r8
mov r8, r0
- lsr r0, r1,#16
+ lsrs r0, r1,#16
uxth r1,r1
- mov r4,r1
- lsr r6,r5,#16
+ adds r4, r1, #0
+ lsrs r6,r5,#16
uxth r5,r5
- mul r1,r5
- mul r4,r6
- mul r5,r0
- mul r0,r6
- lsl r6,r4,#16
- lsr r4,#16
- add r1,r6
- adc r0,r4
- lsl r6,r5,#16
- lsr r5,#16
- add r1,r6
- adc r0,r5
- eor r1,r7
- eor r0,r7
- eor r4, r4
- asr r7, r7, #1
- adc r1, r2
- adc r2, r0
- adc r7, r4
+ muls r1,r5
+ muls r4,r6
+ muls r5,r0
+ muls r0,r6
+ lsls r6,r4,#16
+ lsrs r4,#16
+ adds r1, r1, r6
+ adcs r0,r4
+ lsls r6,r5,#16
+ lsrs r5,#16
+ adds r1, r1, r6
+ adcs r0,r5
+ eors r1,r7
+ eors r0,r7
+ eors r4, r4
+ asrs r7, r7, #1
+ adcs r1, r2
+ adcs r2, r0
+ adcs r7, r4
mov r0, r8
- add r1, r0
- adc r2, r3
- adc r3, r7
+ adds r1, r1, r0
+ adcs r2, r3
+ adcs r3, r7
//////////////////////////
mov r4, r12
stm r4!, {r0,r1}
@@ -109,229 +110,229 @@ multiply256x256_asm:
mov r1, r10
mov r10, r2
ldm r1, {r0, r1, r4, r5}
- mov r2, r4
- mov r7, r5
- sub r2, r0
- sbc r7, r1
- sbc r6, r6
- eor r2, r6
- eor r7, r6
- sub r2, r6
- sbc r7, r6
+ adds r2, r4, #0
+ adds r7, r5, #0
+ subs r2, r0
+ sbcs r7, r1
+ sbcs r6, r6
+ eors r2, r6
+ eors r7, r6
+ subs r2, r6
+ sbcs r7, r6
push {r2, r7}
mov r2, r11
mov r11, r3
ldm r2, {r0, r1, r2, r3}
- sub r0, r2
- sbc r1, r3
- sbc r7, r7
- eor r0, r7
- eor r1, r7
- sub r0, r7
- sbc r1, r7
- eor r7, r6
+ subs r0, r2
+ sbcs r1, r3
+ sbcs r7, r7
+ eors r0, r7
+ eors r1, r7
+ subs r0, r7
+ sbcs r1, r7
+ eors r7, r6
mov r12, r7
push {r0, r1}
//MUL64
- mov r6, r5
- mov r1, r2
- sub r5, r4
- sbc r0, r0
- eor r5, r0
- sub r5, r0
- sub r1, r3
- sbc r7, r7
- eor r1, r7
- sub r1, r7
- eor r7, r0
+ adds r6, r5, #0
+ adds r1, r2, #0
+ subs r5, r4
+ sbcs r0, r0
+ eors r5, r0
+ subs r5, r0
+ subs r1, r3
+ sbcs r7, r7
+ eors r1, r7
+ subs r1, r7
+ eors r7, r0
mov r9, r1
mov r8, r5
- lsr r1,r4,#16
+ lsrs r1,r4,#16
uxth r4,r4
- mov r0,r4
+ adds r0, r4, #0
uxth r5,r2
- lsr r2,#16
- mul r0,r5//00
- mul r5,r1//10
- mul r4,r2//01
- mul r1,r2//11
- lsl r2,r4,#16
- lsr r4,r4,#16
- add r0,r2
- adc r1,r4
- lsl r2,r5,#16
- lsr r4,r5,#16
- add r0,r2
- adc r1,r4
- lsr r4, r6,#16
+ lsrs r2,#16
+ muls r0,r5//00
+ muls r5,r1//10
+ muls r4,r2//01
+ muls r1,r2//11
+ lsls r2,r4,#16
+ lsrs r4,r4,#16
+ adds r0, r0, r2
+ adcs r1,r4
+ lsls r2,r5,#16
+ lsrs r4,r5,#16
+ adds r0, r0, r2
+ adcs r1,r4
+ lsrs r4, r6,#16
uxth r6, r6
uxth r5, r3
- lsr r3, r3, #16
- mov r2, r6
- mul r2, r5
- mul r5, r4
- mul r6, r3
- mul r3, r4
- lsl r4,r5,#16
- lsr r5,r5,#16
- add r2,r4
- adc r3,r5
- lsl r4,r6,#16
- lsr r5,r6,#16
- add r2,r4
- adc r3,r5
- eor r6, r6
- add r2, r1
- adc r3, r6
+ lsrs r3, r3, #16
+ adds r2, r6, #0
+ muls r2, r5
+ muls r5, r4
+ muls r6, r3
+ muls r3, r4
+ lsls r4,r5,#16
+ lsrs r5,r5,#16
+ adds r2, r2, r4
+ adcs r3,r5
+ lsls r4,r6,#16
+ lsrs r5,r6,#16
+ adds r2, r2, r4
+ adcs r3,r5
+ eors r6, r6
+ adds r2, r2, r1
+ adcs r3, r6
mov r1, r9
mov r5, r8
mov r8, r0
- lsr r0, r1,#16
+ lsrs r0, r1,#16
uxth r1,r1
- mov r4,r1
- lsr r6,r5,#16
+ adds r4, r1, #0
+ lsrs r6,r5,#16
uxth r5,r5
- mul r1,r5
- mul r4,r6
- mul r5,r0
- mul r0,r6
- lsl r6,r4,#16
- lsr r4,#16
- add r1,r6
- adc r0,r4
- lsl r6,r5,#16
- lsr r5,#16
- add r1,r6
- adc r0,r5
- eor r1,r7
- eor r0,r7
- eor r4, r4
- asr r7, r7, #1
- adc r1, r2
- adc r2, r0
- adc r7, r4
+ muls r1,r5
+ muls r4,r6
+ muls r5,r0
+ muls r0,r6
+ lsls r6,r4,#16
+ lsrs r4,#16
+ adds r1, r1, r6
+ adcs r0,r4
+ lsls r6,r5,#16
+ lsrs r5,#16
+ adds r1, r1, r6
+ adcs r0,r5
+ eors r1,r7
+ eors r0,r7
+ eors r4, r4
+ asrs r7, r7, #1
+ adcs r1, r2
+ adcs r2, r0
+ adcs r7, r4
mov r0, r8
- add r1, r0
- adc r2, r3
- adc r3, r7
+ adds r1, r1, r0
+ adcs r2, r3
+ adcs r3, r7
mov r4, r10
mov r5, r11
- eor r6, r6
- add r0, r4
- adc r1, r5
- adc r2, r6
- adc r3, r6
+ eors r6, r6
+ adds r0, r0, r4
+ adcs r1, r5
+ adcs r2, r6
+ adcs r3, r6
mov r10, r2
mov r11, r3
pop {r2-r5}
push {r0, r1}
- mov r6, r5
- mov r1, r2
- sub r5, r4
- sbc r0, r0
- eor r5, r0
- sub r5, r0
- sub r1, r3
- sbc r7, r7
- eor r1, r7
- sub r1, r7
- eor r7, r0
+ adds r6, r5, #0
+ adds r1, r2, #0
+ subs r5, r4
+ sbcs r0, r0
+ eors r5, r0
+ subs r5, r0
+ subs r1, r3
+ sbcs r7, r7
+ eors r1, r7
+ subs r1, r7
+ eors r7, r0
mov r9, r1
mov r8, r5
- lsr r1,r4,#16
+ lsrs r1,r4,#16
uxth r4,r4
- mov r0,r4
+ adds r0, r4, #0
uxth r5,r2
- lsr r2,#16
- mul r0,r5//00
- mul r5,r1//10
- mul r4,r2//01
- mul r1,r2//11
- lsl r2,r4,#16
- lsr r4,r4,#16
- add r0,r2
- adc r1,r4
- lsl r2,r5,#16
- lsr r4,r5,#16
- add r0,r2
- adc r1,r4
- lsr r4, r6,#16
+ lsrs r2,#16
+ muls r0,r5//00
+ muls r5,r1//10
+ muls r4,r2//01
+ muls r1,r2//11
+ lsls r2,r4,#16
+ lsrs r4,r4,#16
+ adds r0, r0, r2
+ adcs r1,r4
+ lsls r2,r5,#16
+ lsrs r4,r5,#16
+ adds r0, r0, r2
+ adcs r1,r4
+ lsrs r4, r6,#16
uxth r6, r6
uxth r5, r3
- lsr r3, r3, #16
- mov r2, r6
- mul r2, r5
- mul r5, r4
- mul r6, r3
- mul r3, r4
- lsl r4,r5,#16
- lsr r5,r5,#16
- add r2,r4
- adc r3,r5
- lsl r4,r6,#16
- lsr r5,r6,#16
- add r2,r4
- adc r3,r5
- eor r6, r6
- add r2, r1
- adc r3, r6
+ lsrs r3, r3, #16
+ adds r2, r6, #0
+ muls r2, r5
+ muls r5, r4
+ muls r6, r3
+ muls r3, r4
+ lsls r4,r5,#16
+ lsrs r5,r5,#16
+ adds r2, r2, r4
+ adcs r3,r5
+ lsls r4,r6,#16
+ lsrs r5,r6,#16
+ adds r2, r2, r4
+ adcs r3,r5
+ eors r6, r6
+ adds r2, r2, r1
+ adcs r3, r6
mov r1, r9
mov r5, r8
mov r8, r0
- lsr r0, r1,#16
+ lsrs r0, r1,#16
uxth r1,r1
- mov r4,r1
- lsr r6,r5,#16
+ adds r4, r1, #0
+ lsrs r6,r5,#16
uxth r5,r5
- mul r1,r5
- mul r4,r6
- mul r5,r0
- mul r0,r6
- lsl r6,r4,#16
- lsr r4,#16
- add r1,r6
- adc r0,r4
- lsl r6,r5,#16
- lsr r5,#16
- add r1,r6
- adc r0,r5
- eor r1,r7
- eor r0,r7
- eor r4, r4
- asr r7, r7, #1
- adc r1, r2
- adc r2, r0
- adc r7, r4
+ muls r1,r5
+ muls r4,r6
+ muls r5,r0
+ muls r0,r6
+ lsls r6,r4,#16
+ lsrs r4,#16
+ adds r1, r1, r6
+ adcs r0,r4
+ lsls r6,r5,#16
+ lsrs r5,#16
+ adds r1, r1, r6
+ adcs r0,r5
+ eors r1,r7
+ eors r0,r7
+ eors r4, r4
+ asrs r7, r7, #1
+ adcs r1, r2
+ adcs r2, r0
+ adcs r7, r4
mov r0, r8
- add r1, r0
- adc r2, r3
- adc r3, r7
+ adds r1, r1, r0
+ adcs r2, r3
+ adcs r3, r7
pop {r4, r5}
mov r6, r12
mov r7, r12
- eor r0, r6
- eor r1, r6
- eor r2, r6
- eor r3, r6
- asr r6, r6, #1
- adc r0, r4
- adc r1, r5
- adc r4, r2
- adc r5, r3
- eor r2, r2
- adc r6,r2
- adc r7,r2
+ eors r0, r6
+ eors r1, r6
+ eors r2, r6
+ eors r3, r6
+ asrs r6, r6, #1
+ adcs r0, r4
+ adcs r1, r5
+ adcs r4, r2
+ adcs r5, r3
+ eors r2, r2
+ adcs r6,r2
+ adcs r7,r2
pop {r2, r3}
mov r8, r2
mov r9, r3
- add r2, r0
- adc r3, r1
+ adds r2, r2, r0
+ adcs r3, r1
mov r0, r10
mov r1, r11
- adc r4, r0
- adc r5, r1
- adc r6, r0
- adc r7, r1
+ adcs r4, r0
+ adcs r5, r1
+ adcs r6, r0
+ adcs r7, r1
////////END LOW PART/////////////////////
pop {r0}
stm r0!, {r2,r3}
@@ -345,353 +346,353 @@ multiply256x256_asm:
/////////BEGIN HIGH PART////////////////
/////////MUL128/////////////
//MUL64
- mov r6, r5
- mov r1, r2
- sub r5, r4
- sbc r0, r0
- eor r5, r0
- sub r5, r0
- sub r1, r3
- sbc r7, r7
- eor r1, r7
- sub r1, r7
- eor r7, r0
+ adds r6, r5, #0
+ adds r1, r2, #0
+ subs r5, r4
+ sbcs r0, r0
+ eors r5, r0
+ subs r5, r0
+ subs r1, r3
+ sbcs r7, r7
+ eors r1, r7
+ subs r1, r7
+ eors r7, r0
mov r9, r1
mov r8, r5
- lsr r1,r4,#16
+ lsrs r1,r4,#16
uxth r4,r4
- mov r0,r4
+ adds r0, r4, #0
uxth r5,r2
- lsr r2,#16
- mul r0,r5//00
- mul r5,r1//10
- mul r4,r2//01
- mul r1,r2//11
- lsl r2,r4,#16
- lsr r4,r4,#16
- add r0,r2
- adc r1,r4
- lsl r2,r5,#16
- lsr r4,r5,#16
- add r0,r2
- adc r1,r4
- lsr r4, r6,#16
+ lsrs r2,#16
+ muls r0,r5//00
+ muls r5,r1//10
+ muls r4,r2//01
+ muls r1,r2//11
+ lsls r2,r4,#16
+ lsrs r4,r4,#16
+ adds r0, r0, r2
+ adcs r1,r4
+ lsls r2,r5,#16
+ lsrs r4,r5,#16
+ adds r0, r0, r2
+ adcs r1,r4
+ lsrs r4, r6,#16
uxth r6, r6
uxth r5, r3
- lsr r3, r3, #16
- mov r2, r6
- mul r2, r5
- mul r5, r4
- mul r6, r3
- mul r3, r4
- lsl r4,r5,#16
- lsr r5,r5,#16
- add r2,r4
- adc r3,r5
- lsl r4,r6,#16
- lsr r5,r6,#16
- add r2,r4
- adc r3,r5
- eor r6, r6
- add r2, r1
- adc r3, r6
+ lsrs r3, r3, #16
+ adds r2, r6, #0
+ muls r2, r5
+ muls r5, r4
+ muls r6, r3
+ muls r3, r4
+ lsls r4,r5,#16
+ lsrs r5,r5,#16
+ adds r2, r2, r4
+ adcs r3,r5
+ lsls r4,r6,#16
+ lsrs r5,r6,#16
+ adds r2, r2, r4
+ adcs r3,r5
+ eors r6, r6
+ adds r2, r2, r1
+ adcs r3, r6
mov r1, r9
mov r5, r8
mov r8, r0
- lsr r0, r1,#16
+ lsrs r0, r1,#16
uxth r1,r1
- mov r4,r1
- lsr r6,r5,#16
+ adds r4, r1, #0
+ lsrs r6,r5,#16
uxth r5,r5
- mul r1,r5
- mul r4,r6
- mul r5,r0
- mul r0,r6
- lsl r6,r4,#16
- lsr r4,#16
- add r1,r6
- adc r0,r4
- lsl r6,r5,#16
- lsr r5,#16
- add r1,r6
- adc r0,r5
- eor r1,r7
- eor r0,r7
- eor r4, r4
- asr r7, r7, #1
- adc r1, r2
- adc r2, r0
- adc r7, r4
+ muls r1,r5
+ muls r4,r6
+ muls r5,r0
+ muls r0,r6
+ lsls r6,r4,#16
+ lsrs r4,#16
+ adds r1, r1, r6
+ adcs r0,r4
+ lsls r6,r5,#16
+ lsrs r5,#16
+ adds r1, r1, r6
+ adcs r0,r5
+ eors r1,r7
+ eors r0,r7
+ eors r4, r4
+ asrs r7, r7, #1
+ adcs r1, r2
+ adcs r2, r0
+ adcs r7, r4
mov r0, r8
- add r1, r0
- adc r2, r3
- adc r3, r7
+ adds r1, r1, r0
+ adcs r2, r3
+ adcs r3, r7
push {r0,r1}
mov r1, r10
mov r10, r2
ldm r1, {r0, r1, r4, r5}
- mov r2, r4
- mov r7, r5
- sub r2, r0
- sbc r7, r1
- sbc r6, r6
- eor r2, r6
- eor r7, r6
- sub r2, r6
- sbc r7, r6
+ adds r2, r4, #0
+ adds r7, r5, #0
+ subs r2, r0
+ sbcs r7, r1
+ sbcs r6, r6
+ eors r2, r6
+ eors r7, r6
+ subs r2, r6
+ sbcs r7, r6
push {r2, r7}
mov r2, r11
mov r11, r3
ldm r2, {r0, r1, r2, r3}
- sub r0, r2
- sbc r1, r3
- sbc r7, r7
- eor r0, r7
- eor r1, r7
- sub r0, r7
- sbc r1, r7
- eor r7, r6
+ subs r0, r2
+ sbcs r1, r3
+ sbcs r7, r7
+ eors r0, r7
+ eors r1, r7
+ subs r0, r7
+ sbcs r1, r7
+ eors r7, r6
mov r12, r7
push {r0, r1}
//MUL64
- mov r6, r5
- mov r1, r2
- sub r5, r4
- sbc r0, r0
- eor r5, r0
- sub r5, r0
- sub r1, r3
- sbc r7, r7
- eor r1, r7
- sub r1, r7
- eor r7, r0
+ adds r6, r5, #0
+ adds r1, r2, #0
+ subs r5, r4
+ sbcs r0, r0
+ eors r5, r0
+ subs r5, r0
+ subs r1, r3
+ sbcs r7, r7
+ eors r1, r7
+ subs r1, r7
+ eors r7, r0
mov r9, r1
mov r8, r5
- lsr r1,r4,#16
+ lsrs r1,r4,#16
uxth r4,r4
- mov r0,r4
+ adds r0, r4, #0
uxth r5,r2
- lsr r2,#16
- mul r0,r5//00
- mul r5,r1//10
- mul r4,r2//01
- mul r1,r2//11
- lsl r2,r4,#16
- lsr r4,r4,#16
- add r0,r2
- adc r1,r4
- lsl r2,r5,#16
- lsr r4,r5,#16
- add r0,r2
- adc r1,r4
- lsr r4, r6,#16
+ lsrs r2,#16
+ muls r0,r5//00
+ muls r5,r1//10
+ muls r4,r2//01
+ muls r1,r2//11
+ lsls r2,r4,#16
+ lsrs r4,r4,#16
+ adds r0, r0, r2
+ adcs r1,r4
+ lsls r2,r5,#16
+ lsrs r4,r5,#16
+ adds r0, r0, r2
+ adcs r1,r4
+ lsrs r4, r6,#16
uxth r6, r6
uxth r5, r3
- lsr r3, r3, #16
- mov r2, r6
- mul r2, r5
- mul r5, r4
- mul r6, r3
- mul r3, r4
- lsl r4,r5,#16
- lsr r5,r5,#16
- add r2,r4
- adc r3,r5
- lsl r4,r6,#16
- lsr r5,r6,#16
- add r2,r4
- adc r3,r5
- eor r6, r6
- add r2, r1
- adc r3, r6
+ lsrs r3, r3, #16
+ adds r2, r6, #0
+ muls r2, r5
+ muls r5, r4
+ muls r6, r3
+ muls r3, r4
+ lsls r4,r5,#16
+ lsrs r5,r5,#16
+ adds r2, r2, r4
+ adcs r3,r5
+ lsls r4,r6,#16
+ lsrs r5,r6,#16
+ adds r2, r2, r4
+ adcs r3,r5
+ eors r6, r6
+ adds r2, r2, r1
+ adcs r3, r6
mov r1, r9
mov r5, r8
mov r8, r0
- lsr r0, r1,#16
+ lsrs r0, r1,#16
uxth r1,r1
- mov r4,r1
- lsr r6,r5,#16
+ adds r4, r1, #0
+ lsrs r6,r5,#16
uxth r5,r5
- mul r1,r5
- mul r4,r6
- mul r5,r0
- mul r0,r6
- lsl r6,r4,#16
- lsr r4,#16
- add r1,r6
- adc r0,r4
- lsl r6,r5,#16
- lsr r5,#16
- add r1,r6
- adc r0,r5
- eor r1,r7
- eor r0,r7
- eor r4, r4
- asr r7, r7, #1
- adc r1, r2
- adc r2, r0
- adc r7, r4
+ muls r1,r5
+ muls r4,r6
+ muls r5,r0
+ muls r0,r6
+ lsls r6,r4,#16
+ lsrs r4,#16
+ adds r1, r1, r6
+ adcs r0,r4
+ lsls r6,r5,#16
+ lsrs r5,#16
+ adds r1, r1, r6
+ adcs r0,r5
+ eors r1,r7
+ eors r0,r7
+ eors r4, r4
+ asrs r7, r7, #1
+ adcs r1, r2
+ adcs r2, r0
+ adcs r7, r4
mov r0, r8
- add r1, r0
- adc r2, r3
- adc r3, r7
+ adds r1, r1, r0
+ adcs r2, r3
+ adcs r3, r7
mov r4, r10
mov r5, r11
- eor r6, r6
- add r0, r4
- adc r1, r5
- adc r2, r6
- adc r3, r6
+ eors r6, r6
+ adds r0, r0, r4
+ adcs r1, r5
+ adcs r2, r6
+ adcs r3, r6
mov r10, r2
mov r11, r3
pop {r2-r5}
push {r0, r1}
- mov r6, r5
- mov r1, r2
- sub r5, r4
- sbc r0, r0
- eor r5, r0
- sub r5, r0
- sub r1, r3
- sbc r7, r7
- eor r1, r7
- sub r1, r7
- eor r7, r0
+ adds r6, r5, #0
+ adds r1, r2, #0
+ subs r5, r4
+ sbcs r0, r0
+ eors r5, r0
+ subs r5, r0
+ subs r1, r3
+ sbcs r7, r7
+ eors r1, r7
+ subs r1, r7
+ eors r7, r0
mov r9, r1
mov r8, r5
- lsr r1,r4,#16
+ lsrs r1,r4,#16
uxth r4,r4
- mov r0,r4
+ adds r0, r4, #0
uxth r5,r2
- lsr r2,#16
- mul r0,r5//00
- mul r5,r1//10
- mul r4,r2//01
- mul r1,r2//11
- lsl r2,r4,#16
- lsr r4,r4,#16
- add r0,r2
- adc r1,r4
- lsl r2,r5,#16
- lsr r4,r5,#16
- add r0,r2
- adc r1,r4
- lsr r4, r6,#16
+ lsrs r2,#16
+ muls r0,r5//00
+ muls r5,r1//10
+ muls r4,r2//01
+ muls r1,r2//11
+ lsls r2,r4,#16
+ lsrs r4,r4,#16
+ adds r0, r0, r2
+ adcs r1,r4
+ lsls r2,r5,#16
+ lsrs r4,r5,#16
+ adds r0, r0, r2
+ adcs r1,r4
+ lsrs r4, r6,#16
uxth r6, r6
uxth r5, r3
- lsr r3, r3, #16
- mov r2, r6
- mul r2, r5
- mul r5, r4
- mul r6, r3
- mul r3, r4
- lsl r4,r5,#16
- lsr r5,r5,#16
- add r2,r4
- adc r3,r5
- lsl r4,r6,#16
- lsr r5,r6,#16
- add r2,r4
- adc r3,r5
- eor r6, r6
- add r2, r1
- adc r3, r6
+ lsrs r3, r3, #16
+ adds r2, r6, #0
+ muls r2, r5
+ muls r5, r4
+ muls r6, r3
+ muls r3, r4
+ lsls r4,r5,#16
+ lsrs r5,r5,#16
+ adds r2, r2, r4
+ adcs r3,r5
+ lsls r4,r6,#16
+ lsrs r5,r6,#16
+ adds r2, r2, r4
+ adcs r3,r5
+ eors r6, r6
+ adds r2, r2, r1
+ adcs r3, r6
mov r1, r9
mov r5, r8
mov r8, r0
- lsr r0, r1,#16
+ lsrs r0, r1,#16
uxth r1,r1
- mov r4,r1
- lsr r6,r5,#16
+ adds r4, r1, #0
+ lsrs r6,r5,#16
uxth r5,r5
- mul r1,r5
- mul r4,r6
- mul r5,r0
- mul r0,r6
- lsl r6,r4,#16
- lsr r4,#16
- add r1,r6
- adc r0,r4
- lsl r6,r5,#16
- lsr r5,#16
- add r1,r6
- adc r0,r5
- eor r1,r7
- eor r0,r7
- eor r4, r4
- asr r7, r7, #1
- adc r1, r2
- adc r2, r0
- adc r7, r4
+ muls r1,r5
+ muls r4,r6
+ muls r5,r0
+ muls r0,r6
+ lsls r6,r4,#16
+ lsrs r4,#16
+ adds r1, r1, r6
+ adcs r0,r4
+ lsls r6,r5,#16
+ lsrs r5,#16
+ adds r1, r1, r6
+ adcs r0,r5
+ eors r1,r7
+ eors r0,r7
+ eors r4, r4
+ asrs r7, r7, #1
+ adcs r1, r2
+ adcs r2, r0
+ adcs r7, r4
mov r0, r8
- add r1, r0
- adc r2, r3
- adc r3, r7
+ adds r1, r1, r0
+ adcs r2, r3
+ adcs r3, r7
pop {r4, r5}
mov r6, r12
mov r7, r12
- eor r0, r6
- eor r1, r6
- eor r2, r6
- eor r3, r6
- asr r6, r6, #1
- adc r0, r4
- adc r1, r5
- adc r4, r2
- adc r5, r3
- eor r2, r2
- adc r6,r2 //0,1
- adc r7,r2
+ eors r0, r6
+ eors r1, r6
+ eors r2, r6
+ eors r3, r6
+ asrs r6, r6, #1
+ adcs r0, r4
+ adcs r1, r5
+ adcs r4, r2
+ adcs r5, r3
+ eors r2, r2
+ adcs r6,r2 //0,1
+ adcs r7,r2
pop {r2, r3}
mov r8, r2
mov r9, r3
- add r2, r0
- adc r3, r1
+ adds r2, r2, r0
+ adcs r3, r1
mov r0, r10
mov r1, r11
- adc r4, r0
- adc r5, r1
- adc r6, r0
- adc r7, r1
+ adcs r4, r0
+ adcs r5, r1
+ adcs r6, r0
+ adcs r7, r1
////////END HIGH PART/////////////////////
mov r0, r8
mov r1, r9
mov r8, r6
mov r9, r7
pop {r6, r7}
- add r0, r6
- adc r1, r7
+ adds r0, r0, r6
+ adcs r1, r7
pop {r6, r7}
- adc r2, r6
- adc r3, r7
+ adcs r2, r6
+ adcs r3, r7
pop {r7}
stm r7!, {r0-r3}
mov r10, r7
- eor r0,r0
+ eors r0,r0
mov r6, r8
mov r7, r9
- adc r4, r0
- adc r5, r0
- adc r6, r0
- adc r7, r0
+ adcs r4, r0
+ adcs r5, r0
+ adcs r6, r0
+ adcs r7, r0
pop {r0,r1,r2}
mov r12, r2
push {r0, r4-r7}
ldm r1, {r0-r7}
- sub r0, r4
- sbc r1, r5
- sbc r2, r6
- sbc r3, r7
- eor r4, r4
- sbc r4, r4
- eor r0, r4
- eor r1, r4
- eor r2, r4
- eor r3, r4
- sub r0, r4
- sbc r1, r4
- sbc r2, r4
- sbc r3, r4
+ subs r0, r4
+ sbcs r1, r5
+ sbcs r2, r6
+ sbcs r3, r7
+ eors r4, r4
+ sbcs r4, r4
+ eors r0, r4
+ eors r1, r4
+ eors r2, r4
+ eors r3, r4
+ subs r0, r4
+ sbcs r1, r4
+ sbcs r2, r4
+ sbcs r3, r4
mov r6, r12
mov r12, r4 //carry
mov r5, r10
@@ -700,22 +701,22 @@ multiply256x256_asm:
mov r8, r0
mov r9, r1
ldm r6, {r0-r7}
- sub r4, r0
- sbc r5, r1
- sbc r6, r2
- sbc r7, r3
- eor r0, r0
- sbc r0, r0
- eor r4, r0
- eor r5, r0
- eor r6, r0
- eor r7, r0
- sub r4, r0
- sbc r5, r0
- sbc r6, r0
- sbc r7, r0
+ subs r4, r0
+ sbcs r5, r1
+ sbcs r6, r2
+ sbcs r7, r3
+ eors r0, r0
+ sbcs r0, r0
+ eors r4, r0
+ eors r5, r0
+ eors r6, r0
+ eors r7, r0
+ subs r4, r0
+ sbcs r5, r0
+ sbcs r6, r0
+ sbcs r7, r0
mov r1, r12
- eor r0, r1
+ eors r0, r1
mov r1, r11
stm r1!, {r4-r7}
push {r0}
@@ -724,366 +725,366 @@ multiply256x256_asm:
/////////BEGIN MIDDLE PART////////////////
/////////MUL128/////////////
//MUL64
- mov r6, r5
- mov r1, r2
- sub r5, r4
- sbc r0, r0
- eor r5, r0
- sub r5, r0
- sub r1, r3
- sbc r7, r7
- eor r1, r7
- sub r1, r7
- eor r7, r0
+ adds r6, r5, #0
+ adds r1, r2, #0
+ subs r5, r4
+ sbcs r0, r0
+ eors r5, r0
+ subs r5, r0
+ subs r1, r3
+ sbcs r7, r7
+ eors r1, r7
+ subs r1, r7
+ eors r7, r0
mov r9, r1
mov r8, r5
- lsr r1,r4,#16
+ lsrs r1,r4,#16
uxth r4,r4
- mov r0,r4
+ adds r0, r4, #0
uxth r5,r2
- lsr r2,#16
- mul r0,r5//00
- mul r5,r1//10
- mul r4,r2//01
- mul r1,r2//11
- lsl r2,r4,#16
- lsr r4,r4,#16
- add r0,r2
- adc r1,r4
- lsl r2,r5,#16
- lsr r4,r5,#16
- add r0,r2
- adc r1,r4
- lsr r4, r6,#16
+ lsrs r2,#16
+ muls r0,r5//00
+ muls r5,r1//10
+ muls r4,r2//01
+ muls r1,r2//11
+ lsls r2,r4,#16
+ lsrs r4,r4,#16
+ adds r0, r0, r2
+ adcs r1,r4
+ lsls r2,r5,#16
+ lsrs r4,r5,#16
+ adds r0, r0, r2
+ adcs r1,r4
+ lsrs r4, r6,#16
uxth r6, r6
uxth r5, r3
- lsr r3, r3, #16
- mov r2, r6
- mul r2, r5
- mul r5, r4
- mul r6, r3
- mul r3, r4
- lsl r4,r5,#16
- lsr r5,r5,#16
- add r2,r4
- adc r3,r5
- lsl r4,r6,#16
- lsr r5,r6,#16
- add r2,r4
- adc r3,r5
- eor r6, r6
- add r2, r1
- adc r3, r6
+ lsrs r3, r3, #16
+ adds r2, r6, #0
+ muls r2, r5
+ muls r5, r4
+ muls r6, r3
+ muls r3, r4
+ lsls r4,r5,#16
+ lsrs r5,r5,#16
+ adds r2, r2, r4
+ adcs r3,r5
+ lsls r4,r6,#16
+ lsrs r5,r6,#16
+ adds r2, r2, r4
+ adcs r3,r5
+ eors r6, r6
+ adds r2, r2, r1
+ adcs r3, r6
mov r1, r9
mov r5, r8
mov r8, r0
- lsr r0, r1,#16
+ lsrs r0, r1,#16
uxth r1,r1
- mov r4,r1
- lsr r6,r5,#16
+ adds r4, r1, #0
+ lsrs r6,r5,#16
uxth r5,r5
- mul r1,r5
- mul r4,r6
- mul r5,r0
- mul r0,r6
- lsl r6,r4,#16
- lsr r4,#16
- add r1,r6
- adc r0,r4
- lsl r6,r5,#16
- lsr r5,#16
- add r1,r6
- adc r0,r5
- eor r1,r7
- eor r0,r7
- eor r4, r4
- asr r7, r7, #1
- adc r1, r2
- adc r2, r0
- adc r7, r4
+ muls r1,r5
+ muls r4,r6
+ muls r5,r0
+ muls r0,r6
+ lsls r6,r4,#16
+ lsrs r4,#16
+ adds r1, r1, r6
+ adcs r0,r4
+ lsls r6,r5,#16
+ lsrs r5,#16
+ adds r1, r1, r6
+ adcs r0,r5
+ eors r1,r7
+ eors r0,r7
+ eors r4, r4
+ asrs r7, r7, #1
+ adcs r1, r2
+ adcs r2, r0
+ adcs r7, r4
mov r0, r8
- add r1, r0
- adc r2, r3
- adc r3, r7
+ adds r1, r1, r0
+ adcs r2, r3
+ adcs r3, r7
push {r0,r1}
mov r1, r10
mov r10, r2
ldm r1, {r0, r1, r4, r5}
- mov r2, r4
- mov r7, r5
- sub r2, r0
- sbc r7, r1
- sbc r6, r6
- eor r2, r6
- eor r7, r6
- sub r2, r6
- sbc r7, r6
+ adds r2, r4, #0
+ adds r7, r5, #0
+ subs r2, r0
+ sbcs r7, r1
+ sbcs r6, r6
+ eors r2, r6
+ eors r7, r6
+ subs r2, r6
+ sbcs r7, r6
push {r2, r7}
mov r2, r11
mov r11, r3
ldm r2, {r0, r1, r2, r3}
- sub r0, r2
- sbc r1, r3
- sbc r7, r7
- eor r0, r7
- eor r1, r7
- sub r0, r7
- sbc r1, r7
- eor r7, r6
+ subs r0, r2
+ sbcs r1, r3
+ sbcs r7, r7
+ eors r0, r7
+ eors r1, r7
+ subs r0, r7
+ sbcs r1, r7
+ eors r7, r6
mov r12, r7
push {r0, r1}
//MUL64
- mov r6, r5
- mov r1, r2
- sub r5, r4
- sbc r0, r0
- eor r5, r0
- sub r5, r0
- sub r1, r3
- sbc r7, r7
- eor r1, r7
- sub r1, r7
- eor r7, r0
+ adds r6, r5, #0
+ adds r1, r2, #0
+ subs r5, r4
+ sbcs r0, r0
+ eors r5, r0
+ subs r5, r0
+ subs r1, r3
+ sbcs r7, r7
+ eors r1, r7
+ subs r1, r7
+ eors r7, r0
mov r9, r1
mov r8, r5
- lsr r1,r4,#16
+ lsrs r1,r4,#16
uxth r4,r4
- mov r0,r4
+ adds r0, r4, #0
uxth r5,r2
- lsr r2,#16
- mul r0,r5//00
- mul r5,r1//10
- mul r4,r2//01
- mul r1,r2//11
- lsl r2,r4,#16
- lsr r4,r4,#16
- add r0,r2
- adc r1,r4
- lsl r2,r5,#16
- lsr r4,r5,#16
- add r0,r2
- adc r1,r4
- lsr r4, r6,#16
+ lsrs r2,#16
+ muls r0,r5//00
+ muls r5,r1//10
+ muls r4,r2//01
+ muls r1,r2//11
+ lsls r2,r4,#16
+ lsrs r4,r4,#16
+ adds r0, r0, r2
+ adcs r1,r4
+ lsls r2,r5,#16
+ lsrs r4,r5,#16
+ adds r0, r0, r2
+ adcs r1,r4
+ lsrs r4, r6,#16
uxth r6, r6
uxth r5, r3
- lsr r3, r3, #16
- mov r2, r6
- mul r2, r5
- mul r5, r4
- mul r6, r3
- mul r3, r4
- lsl r4,r5,#16
- lsr r5,r5,#16
- add r2,r4
- adc r3,r5
- lsl r4,r6,#16
- lsr r5,r6,#16
- add r2,r4
- adc r3,r5
- eor r6, r6
- add r2, r1
- adc r3, r6
+ lsrs r3, r3, #16
+ adds r2, r6, #0
+ muls r2, r5
+ muls r5, r4
+ muls r6, r3
+ muls r3, r4
+ lsls r4,r5,#16
+ lsrs r5,r5,#16
+ adds r2, r2, r4
+ adcs r3,r5
+ lsls r4,r6,#16
+ lsrs r5,r6,#16
+ adds r2, r2, r4
+ adcs r3,r5
+ eors r6, r6
+ adds r2, r2, r1
+ adcs r3, r6
mov r1, r9
mov r5, r8
mov r8, r0
- lsr r0, r1,#16
+ lsrs r0, r1,#16
uxth r1,r1
- mov r4,r1
- lsr r6,r5,#16
+ adds r4, r1, #0
+ lsrs r6,r5,#16
uxth r5,r5
- mul r1,r5
- mul r4,r6
- mul r5,r0
- mul r0,r6
- lsl r6,r4,#16
- lsr r4,#16
- add r1,r6
- adc r0,r4
- lsl r6,r5,#16
- lsr r5,#16
- add r1,r6
- adc r0,r5
- eor r1,r7
- eor r0,r7
- eor r4, r4
- asr r7, r7, #1
- adc r1, r2
- adc r2, r0
- adc r7, r4
+ muls r1,r5
+ muls r4,r6
+ muls r5,r0
+ muls r0,r6
+ lsls r6,r4,#16
+ lsrs r4,#16
+ adds r1, r1, r6
+ adcs r0,r4
+ lsls r6,r5,#16
+ lsrs r5,#16
+ adds r1, r1, r6
+ adcs r0,r5
+ eors r1,r7
+ eors r0,r7
+ eors r4, r4
+ asrs r7, r7, #1
+ adcs r1, r2
+ adcs r2, r0
+ adcs r7, r4
mov r0, r8
- add r1, r0
- adc r2, r3
- adc r3, r7
+ adds r1, r1, r0
+ adcs r2, r3
+ adcs r3, r7
mov r4, r10
mov r5, r11
- eor r6, r6
- add r0, r4
- adc r1, r5
- adc r2, r6
- adc r3, r6
+ eors r6, r6
+ adds r0, r0, r4
+ adcs r1, r5
+ adcs r2, r6
+ adcs r3, r6
mov r10, r2
mov r11, r3
pop {r2-r5}
push {r0, r1}
- mov r6, r5
- mov r1, r2
- sub r5, r4
- sbc r0, r0
- eor r5, r0
- sub r5, r0
- sub r1, r3
- sbc r7, r7
- eor r1, r7
- sub r1, r7
- eor r7, r0
+ adds r6, r5, #0
+ adds r1, r2, #0
+ subs r5, r4
+ sbcs r0, r0
+ eors r5, r0
+ subs r5, r0
+ subs r1, r3
+ sbcs r7, r7
+ eors r1, r7
+ subs r1, r7
+ eors r7, r0
mov r9, r1
mov r8, r5
- lsr r1,r4,#16
+ lsrs r1,r4,#16
uxth r4,r4
- mov r0,r4
+ adds r0, r4, #0
uxth r5,r2
- lsr r2,#16
- mul r0,r5//00
- mul r5,r1//10
- mul r4,r2//01
- mul r1,r2//11
- lsl r2,r4,#16
- lsr r4,r4,#16
- add r0,r2
- adc r1,r4
- lsl r2,r5,#16
- lsr r4,r5,#16
- add r0,r2
- adc r1,r4
- lsr r4, r6,#16
+ lsrs r2,#16
+ muls r0,r5//00
+ muls r5,r1//10
+ muls r4,r2//01
+ muls r1,r2//11
+ lsls r2,r4,#16
+ lsrs r4,r4,#16
+ adds r0, r0, r2
+ adcs r1,r4
+ lsls r2,r5,#16
+ lsrs r4,r5,#16
+ adds r0, r0, r2
+ adcs r1,r4
+ lsrs r4, r6,#16
uxth r6, r6
uxth r5, r3
- lsr r3, r3, #16
- mov r2, r6
- mul r2, r5
- mul r5, r4
- mul r6, r3
- mul r3, r4
- lsl r4,r5,#16
- lsr r5,r5,#16
- add r2,r4
- adc r3,r5
- lsl r4,r6,#16
- lsr r5,r6,#16
- add r2,r4
- adc r3,r5
- eor r6, r6
- add r2, r1
- adc r3, r6
+ lsrs r3, r3, #16
+ adds r2, r6, #0
+ muls r2, r5
+ muls r5, r4
+ muls r6, r3
+ muls r3, r4
+ lsls r4,r5,#16
+ lsrs r5,r5,#16
+ adds r2, r2, r4
+ adcs r3,r5
+ lsls r4,r6,#16
+ lsrs r5,r6,#16
+ adds r2, r2, r4
+ adcs r3,r5
+ eors r6, r6
+ adds r2, r2, r1
+ adcs r3, r6
mov r1, r9
mov r5, r8
mov r8, r0
- lsr r0, r1,#16
+ lsrs r0, r1,#16
uxth r1,r1
- mov r4,r1
- lsr r6,r5,#16
+ adds r4, r1, #0
+ lsrs r6,r5,#16
uxth r5,r5
- mul r1,r5
- mul r4,r6
- mul r5,r0
- mul r0,r6
- lsl r6,r4,#16
- lsr r4,#16
- add r1,r6
- adc r0,r4
- lsl r6,r5,#16
- lsr r5,#16
- add r1,r6
- adc r0,r5
- eor r1,r7
- eor r0,r7
- eor r4, r4
- asr r7, r7, #1
- adc r1, r2
- adc r2, r0
- adc r7, r4
+ muls r1,r5
+ muls r4,r6
+ muls r5,r0
+ muls r0,r6
+ lsls r6,r4,#16
+ lsrs r4,#16
+ adds r1, r1, r6
+ adcs r0,r4
+ lsls r6,r5,#16
+ lsrs r5,#16
+ adds r1, r1, r6
+ adcs r0,r5
+ eors r1,r7
+ eors r0,r7
+ eors r4, r4
+ asrs r7, r7, #1
+ adcs r1, r2
+ adcs r2, r0
+ adcs r7, r4
mov r0, r8
- add r1, r0
- adc r2, r3
- adc r3, r7
+ adds r1, r1, r0
+ adcs r2, r3
+ adcs r3, r7
pop {r4, r5}
mov r6, r12
mov r7, r12
- eor r0, r6
- eor r1, r6
- eor r2, r6
- eor r3, r6
- asr r6, r6, #1
- adc r0, r4
- adc r1, r5
- adc r4, r2
- adc r5, r3
- eor r2, r2
- adc r6,r2 //0,1
- adc r7,r2
+ eors r0, r6
+ eors r1, r6
+ eors r2, r6
+ eors r3, r6
+ asrs r6, r6, #1
+ adcs r0, r4
+ adcs r1, r5
+ adcs r4, r2
+ adcs r5, r3
+ eors r2, r2
+ adcs r6,r2 //0,1
+ adcs r7,r2
pop {r2, r3}
mov r8, r2
mov r9, r3
- add r2, r0
- adc r3, r1
+ adds r2, r2, r0
+ adcs r3, r1
mov r0, r10
mov r1, r11
- adc r4, r0
- adc r5, r1
- adc r6, r0
- adc r7, r1
+ adcs r4, r0
+ adcs r5, r1
+ adcs r6, r0
+ adcs r7, r1
//////////END MIDDLE PART////////////////
pop {r0,r1} //r0,r1
mov r12, r0 //negative
- eor r2, r0
- eor r3, r0
- eor r4, r0
- eor r5, r0
- eor r6, r0
- eor r7, r0
+ eors r2, r0
+ eors r3, r0
+ eors r4, r0
+ eors r5, r0
+ eors r6, r0
+ eors r7, r0
push {r4-r7}
ldm r1!, {r4-r7}
mov r11, r1 //reference
mov r1, r9
- eor r1, r0
+ eors r1, r0
mov r10, r4
mov r4, r8
- asr r0, #1
- eor r0, r4
+ asrs r0, #1
+ eors r0, r4
mov r4, r10
- adc r0, r4
- adc r1, r5
- adc r2, r6
- adc r3, r7
- eor r4, r4
- adc r4, r4
+ adcs r0, r4
+ adcs r1, r5
+ adcs r2, r6
+ adcs r3, r7
+ eors r4, r4
+ adcs r4, r4
mov r10, r4 //carry
mov r4, r11
ldm r4, {r4-r7}
- add r0, r4
- adc r1, r5
- adc r2, r6
- adc r3, r7
+ adds r0, r0, r4
+ adcs r1, r5
+ adcs r2, r6
+ adcs r3, r7
mov r9, r4
mov r4, r11
stm r4!, {r0-r3}
mov r11, r4
pop {r0-r3}
mov r4, r9
- adc r4, r0
- adc r5, r1
- adc r6, r2
- adc r7, r3
- mov r1, #0
- adc r1, r1
+ adcs r4, r0
+ adcs r5, r1
+ adcs r6, r2
+ adcs r7, r3
+ movs r1, #0
+ adcs r1, r1
mov r0, r10
mov r10, r1 //carry
- asr r0, #1
+ asrs r0, #1
pop {r0-r3}
- adc r4, r0
- adc r5, r1
- adc r6, r2
- adc r7, r3
+ adcs r4, r0
+ adcs r5, r1
+ adcs r6, r2
+ adcs r7, r3
mov r8, r0
mov r0, r11
stm r0!, {r4-r7}
@@ -1091,13 +1092,13 @@ multiply256x256_asm:
mov r0, r8
mov r6, r12
mov r5, r10
- eor r4, r4
- adc r5, r6
- adc r6, r4
- add r0, r5
- adc r1, r6
- adc r2, r6
- adc r3, r6
+ eors r4, r4
+ adcs r5, r6
+ adcs r6, r4
+ adds r0, r0, r5
+ adcs r1, r6
+ adcs r2, r6
+ adcs r3, r6
mov r7, r11
stm r7!, {r0-r3}
pop {r3-r6}
diff --git a/third_party/unacl-curve25519/core/cortex-m0/curve25519/reduce25519.S b/third_party/unacl-curve25519/core/cortex-m0/curve25519/reduce25519.S
index 73e613330f..15e68373ea 100644
--- a/third_party/unacl-curve25519/core/cortex-m0/curve25519/reduce25519.S
+++ b/third_party/unacl-curve25519/core/cortex-m0/curve25519/reduce25519.S
@@ -8,6 +8,7 @@
// Generated and tested with C++ functions in the test subdirectory and on the target.
//
+ .syntax unified
.code 16
.text
@@ -21,141 +22,141 @@
fe25519_reduceTo256Bits_asm:
push {r4,r5,r6,r7,r14}
ldr r2,[r1,#60]
- lsr r3,r2,#16
+ lsrs r3,r2,#16
uxth r2,r2
- mov r7,#38
- mul r2,r7
- mul r3,r7
+ movs r7,#38
+ muls r2,r7
+ muls r3,r7
ldr r4,[r1,#28]
- lsr r5,r3,#16
- lsl r3,r3,#16
- mov r6,#0
- add r4,r2
- adc r5,r6
- add r4,r3
- adc r5,r6
- lsl r2,r4,#1
- lsr r2,r2,#1
+ lsrs r5,r3,#16
+ lsls r3,r3,#16
+ movs r6,#0
+ adds r4,r4,r2
+ adcs r5,r6
+ adds r4,r4,r3
+ adcs r5,r6
+ lsls r2,r4,#1
+ lsrs r2,r2,#1
str r2,[r0,#28]
- lsr r4,r4,#31
- lsl r5,r5,#1
- orr r4,r5
- mov r2,#19
- mul r2,r4
+ lsrs r4,r4,#31
+ lsls r5,r5,#1
+ orrs r4,r5
+ movs r2,#19
+ muls r2,r4
ldr r4,[r1,#0]
- add r2,r4
- mov r3,#0
- adc r3,r6
+ adds r2,r2,r4
+ movs r3,#0
+ adcs r3,r6
ldr r4,[r1,#32]
- lsr r5,r4,#16
+ lsrs r5,r4,#16
uxth r4,r4
- mul r5,r7
- mul r4,r7
- add r2,r4
- adc r3,r6
- lsl r4,r5,#16
- lsr r5,r5,#16
- add r2,r4
- adc r3,r5
+ muls r5,r7
+ muls r4,r7
+ adds r2,r2,r4
+ adcs r3,r6
+ lsls r4,r5,#16
+ lsrs r5,r5,#16
+ adds r2,r2,r4
+ adcs r3,r5
str r2,[r0,#0]
ldr r4,[r1,#4]
- add r3,r4
- mov r2,#0
- adc r2,r6
+ adds r3,r3,r4
+ movs r2,#0
+ adcs r2,r6
ldr r4,[r1,#36]
- lsr r5,r4,#16
+ lsrs r5,r4,#16
uxth r4,r4
- mul r5,r7
- mul r4,r7
- add r3,r4
- adc r2,r6
- lsl r4,r5,#16
- lsr r5,r5,#16
- add r3,r4
- adc r2,r5
+ muls r5,r7
+ muls r4,r7
+ adds r3,r3,r4
+ adcs r2,r6
+ lsls r4,r5,#16
+ lsrs r5,r5,#16
+ adds r3,r3,r4
+ adcs r2,r5
str r3,[r0,#4]
ldr r4,[r1,#8]
- add r2,r4
- mov r3,#0
- adc r3,r6
+ adds r2,r2,r4
+ movs r3,#0
+ adcs r3,r6
ldr r4,[r1,#40]
- lsr r5,r4,#16
+ lsrs r5,r4,#16
uxth r4,r4
- mul r5,r7
- mul r4,r7
- add r2,r4
- adc r3,r6
- lsl r4,r5,#16
- lsr r5,r5,#16
- add r2,r4
- adc r3,r5
+ muls r5,r7
+ muls r4,r7
+ adds r2,r2,r4
+ adcs r3,r6
+ lsls r4,r5,#16
+ lsrs r5,r5,#16
+ adds r2,r2,r4
+ adcs r3,r5
str r2,[r0,#8]
ldr r4,[r1,#12]
- add r3,r4
- mov r2,#0
- adc r2,r6
+ adds r3,r3,r4
+ movs r2,#0
+ adcs r2,r6
ldr r4,[r1,#44]
- lsr r5,r4,#16
+ lsrs r5,r4,#16
uxth r4,r4
- mul r5,r7
- mul r4,r7
- add r3,r4
- adc r2,r6
- lsl r4,r5,#16
- lsr r5,r5,#16
- add r3,r4
- adc r2,r5
+ muls r5,r7
+ muls r4,r7
+ adds r3,r3,r4
+ adcs r2,r6
+ lsls r4,r5,#16
+ lsrs r5,r5,#16
+ adds r3,r3,r4
+ adcs r2,r5
str r3,[r0,#12]
ldr r4,[r1,#16]
- add r2,r4
- mov r3,#0
- adc r3,r6
+ adds r2,r2,r4
+ movs r3,#0
+ adcs r3,r6
ldr r4,[r1,#48]
- lsr r5,r4,#16
+ lsrs r5,r4,#16
uxth r4,r4
- mul r5,r7
- mul r4,r7
- add r2,r4
- adc r3,r6
- lsl r4,r5,#16
- lsr r5,r5,#16
- add r2,r4
- adc r3,r5
+ muls r5,r7
+ muls r4,r7
+ adds r2,r2,r4
+ adcs r3,r6
+ lsls r4,r5,#16
+ lsrs r5,r5,#16
+ adds r2,r2,r4
+ adcs r3,r5
str r2,[r0,#16]
ldr r4,[r1,#20]
- add r3,r4
- mov r2,#0
- adc r2,r6
+ adds r3,r3,r4
+ movs r2,#0
+ adcs r2,r6
ldr r4,[r1,#52]
- lsr r5,r4,#16
+ lsrs r5,r4,#16
uxth r4,r4
- mul r5,r7
- mul r4,r7
- add r3,r4
- adc r2,r6
- lsl r4,r5,#16
- lsr r5,r5,#16
- add r3,r4
- adc r2,r5
+ muls r5,r7
+ muls r4,r7
+ adds r3,r3,r4
+ adcs r2,r6
+ lsls r4,r5,#16
+ lsrs r5,r5,#16
+ adds r3,r3,r4
+ adcs r2,r5
str r3,[r0,#20]
ldr r4,[r1,#24]
- add r2,r4
- mov r3,#0
- adc r3,r6
+ adds r2,r2,r4
+ movs r3,#0
+ adcs r3,r6
ldr r4,[r1,#56]
- lsr r5,r4,#16
+ lsrs r5,r4,#16
uxth r4,r4
- mul r5,r7
- mul r4,r7
- add r2,r4
- adc r3,r6
- lsl r4,r5,#16
- lsr r5,r5,#16
- add r2,r4
- adc r3,r5
+ muls r5,r7
+ muls r4,r7
+ adds r2,r2,r4
+ adcs r3,r6
+ lsls r4,r5,#16
+ lsrs r5,r5,#16
+ adds r2,r2,r4
+ adcs r3,r5
str r2,[r0,#24]
ldr r4,[r0,#28]
- add r4,r3
+ adds r4,r4,r3
str r4,[r0,#28]
pop {r4,r5,r6,r7,r15}
diff --git a/third_party/unacl-curve25519/core/cortex-m0/curve25519/sqr.S b/third_party/unacl-curve25519/core/cortex-m0/curve25519/sqr.S
index b62121adb7..340564e87b 100644
--- a/third_party/unacl-curve25519/core/cortex-m0/curve25519/sqr.S
+++ b/third_party/unacl-curve25519/core/cortex-m0/curve25519/sqr.S
@@ -3,6 +3,7 @@
// public domain
//
+ .syntax unified
.align 2
.global square256_asm
.type square256_asm, %function
@@ -15,38 +16,30 @@ square256_asm:
// pResult in r0
// adheres to arm eabi calling convention.
push {r1,r4,r5,r6,r7,r14}
- .syntax unified
mov r3,r8
mov r4,r9
mov r5,r10
mov r6,r11
mov r7,r12
- .syntax divided
push {r3,r4,r5,r6,r7}
- .syntax unified
mov r14,r0
- .syntax divided
- ldm r1!,{r4,r5,r6,r7}
+ ldmia r1!,{r4,r5,r6,r7}
// sqr 128 Refined Karatsuba
// Input in r4 ... r7
// Result in r0 ... r7
// clobbers all registers except for r14
- .syntax unified
mov r0,r4
mov r1,r5
- .syntax divided
- sub r0,r6
- sbc r1,r7
- sbc r2,r2
- eor r0,r2
- eor r1,r2
- sub r0,r2
- sbc r1,r2
- .syntax unified
+ subs r0,r6
+ sbcs r1,r7
+ sbcs r2,r2
+ eors r0,r2
+ eors r1,r2
+ subs r0,r2
+ sbcs r1,r2
mov r8,r0
mov r9,r1
mov r10,r6
- .syntax divided
// START: sqr 64 Refined Karatsuba
// Input operands in r4,r5
// Result in r0,r1,r2,r3
@@ -56,85 +49,73 @@ square256_asm:
// Result in r0 ,r1
// Clobbers: r2, r3
uxth r0,r4
- lsr r1,r4,#16
- .syntax unified
+ lsrs r1,r4,#16
mov r2,r0
- .syntax divided
- mul r2,r1
- mul r0,r0
- mul r1,r1
- lsr r3,r2,#15
- lsl r2,r2,#17
- add r0,r2
- adc r1,r3
+ muls r2,r1
+ muls r0,r0
+ muls r1,r1
+ lsrs r3,r2,#15
+ lsls r2,r2,#17
+ adds r0,r0,r2
+ adcs r1,r3
// End: sqr 32
// Result in r0 ,r1
- sub r4,r5
- sbc r6,r6
- eor r4,r6
- sub r4,r6
+ subs r4,r5
+ sbcs r6,r6
+ eors r4,r6
+ subs r4,r6
// START: sqr 32
// Input operand in r5
// Result in r2 ,r3
// Clobbers: r5, r6
uxth r2,r5
- lsr r3,r5,#16
- .syntax unified
+ lsrs r3,r5,#16
mov r5,r2
- .syntax divided
- mul r5,r3
- mul r2,r2
- mul r3,r3
- lsr r6,r5,#15
- lsl r5,r5,#17
- add r2,r5
- adc r3,r6
+ muls r5,r3
+ muls r2,r2
+ muls r3,r3
+ lsrs r6,r5,#15
+ lsls r5,r5,#17
+ adds r2,r2,r5
+ adcs r3,r6
// End: sqr 32
// Result in r2 ,r3
- mov r6,#0
- add r2,r1
- adc r3,r6
+ movs r6,#0
+ adds r2,r2,r1
+ adcs r3,r6
// START: sqr 32
// Input operand in r4
// Result in r4 ,r5
// Clobbers: r1, r6
- lsr r5,r4,#16
+ lsrs r5,r4,#16
uxth r4,r4
- .syntax unified
mov r1,r4
- .syntax divided
- mul r1,r5
- mul r4,r4
- mul r5,r5
- lsr r6,r1,#15
- lsl r1,r1,#17
- add r4,r1
- adc r5,r6
+ muls r1,r5
+ muls r4,r4
+ muls r5,r5
+ lsrs r6,r1,#15
+ lsls r1,r1,#17
+ adds r4,r4,r1
+ adcs r5,r6
// End: sqr 32
// Result in r4 ,r5
- .syntax unified
mov r1,r2
- .syntax divided
- sub r1,r4
- sbc r2,r5
- .syntax unified
+ subs r1,r4
+ sbcs r2,r5
mov r5,r3
- .syntax divided
- mov r6,#0
- sbc r3,r6
- add r1,r0
- adc r2,r5
- adc r3,r6
+ movs r6,#0
+ sbcs r3,r6
+ adds r1,r1,r0
+ adcs r2,r5
+ adcs r3,r6
// END: sqr 64 Refined Karatsuba
// Result in r0,r1,r2,r3
// Leaves r6 zero.
- .syntax unified
mov r6,r10
mov r10,r0
mov r11,r1
mov r12,r2
mov r1,r3
- .syntax divided
// START: sqr 64 Refined Karatsuba
// Input operands in r6,r7
// Result in r2,r3,r4,r5
@@ -144,92 +125,78 @@ square256_asm:
// Result in r2 ,r3
// Clobbers: r4, r5
uxth r2,r6
- lsr r3,r6,#16
- .syntax unified
+ lsrs r3,r6,#16
mov r4,r2
- .syntax divided
- mul r4,r3
- mul r2,r2
- mul r3,r3
- lsr r5,r4,#15
- lsl r4,r4,#17
- add r2,r4
- adc r3,r5
+ muls r4,r3
+ muls r2,r2
+ muls r3,r3
+ lsrs r5,r4,#15
+ lsls r4,r4,#17
+ adds r2,r2,r4
+ adcs r3,r5
// End: sqr 32
// Result in r2 ,r3
- sub r6,r7
- sbc r4,r4
- eor r6,r4
- sub r6,r4
+ subs r6,r7
+ sbcs r4,r4
+ eors r6,r4
+ subs r6,r4
// START: sqr 32
// Input operand in r7
// Result in r4 ,r5
// Clobbers: r0, r7
uxth r4,r7
- lsr r5,r7,#16
- .syntax unified
+ lsrs r5,r7,#16
mov r0,r4
- .syntax divided
- mul r0,r5
- mul r4,r4
- mul r5,r5
- lsr r7,r0,#15
- lsl r0,r0,#17
- add r4,r0
- adc r5,r7
+ muls r0,r5
+ muls r4,r4
+ muls r5,r5
+ lsrs r7,r0,#15
+ lsls r0,r0,#17
+ adds r4,r4,r0
+ adcs r5,r7
// End: sqr 32
// Result in r4 ,r5
- mov r7,#0
- add r4,r3
- adc r5,r7
+ movs r7,#0
+ adds r4,r4,r3
+ adcs r5,r7
// START: sqr 32
// Input operand in r6
// Result in r7 ,r0
// Clobbers: r6, r3
uxth r7,r6
- lsr r0,r6,#16
- .syntax unified
+ lsrs r0,r6,#16
mov r6,r7
- .syntax divided
- mul r6,r0
- mul r7,r7
- mul r0,r0
- lsr r3,r6,#15
- lsl r6,r6,#17
- add r7,r6
- adc r0,r3
+ muls r6,r0
+ muls r7,r7
+ muls r0,r0
+ lsrs r3,r6,#15
+ lsls r6,r6,#17
+ adds r7,r7,r6
+ adcs r0,r3
// End: sqr 32
// Result in r7 ,r0
- .syntax unified
mov r3,r4
- .syntax divided
- sub r3,r7
- sbc r4,r0
- .syntax unified
+ subs r3,r7
+ sbcs r4,r0
mov r0,r5
- .syntax divided
- mov r6,#0
- sbc r5,r6
- add r3,r2
- adc r4,r0
- adc r5,r6
+ movs r6,#0
+ sbcs r5,r6
+ adds r3,r3,r2
+ adcs r4,r0
+ adcs r5,r6
// END: sqr 64 Refined Karatsuba
// Result in r2,r3,r4,r5
// Leaves r6 zero.
- .syntax unified
mov r0,r12
- .syntax divided
- add r2,r0
- adc r3,r1
- adc r4,r6
- adc r5,r6
- .syntax unified
+ adds r2,r2,r0
+ adcs r3,r1
+ adcs r4,r6
+ adcs r5,r6
mov r12,r2
mov r2,r8
mov r8,r3
mov r3,r9
mov r9,r4
- .syntax divided
// START: sqr 64 Refined Karatsuba
// Input operands in r2,r3
// Result in r6,r7,r0,r1
@@ -239,146 +206,116 @@ square256_asm:
// Result in r6 ,r7
// Clobbers: r0, r1
uxth r6,r2
- lsr r7,r2,#16
- .syntax unified
+ lsrs r7,r2,#16
mov r0,r6
- .syntax divided
- mul r0,r7
- mul r6,r6
- mul r7,r7
- lsr r1,r0,#15
- lsl r0,r0,#17
- add r6,r0
- adc r7,r1
+ muls r0,r7
+ muls r6,r6
+ muls r7,r7
+ lsrs r1,r0,#15
+ lsls r0,r0,#17
+ adds r6,r6,r0
+ adcs r7,r1
// End: sqr 32
// Result in r6 ,r7
- sub r2,r3
- sbc r4,r4
- eor r2,r4
- sub r2,r4
+ subs r2,r3
+ sbcs r4,r4
+ eors r2,r4
+ subs r2,r4
// START: sqr 32
// Input operand in r3
// Result in r0 ,r1
// Clobbers: r3, r4
uxth r0,r3
- lsr r1,r3,#16
- .syntax unified
+ lsrs r1,r3,#16
mov r3,r0
- .syntax divided
- mul r3,r1
- mul r0,r0
- mul r1,r1
- lsr r4,r3,#15
- lsl r3,r3,#17
- add r0,r3
- adc r1,r4
+ muls r3,r1
+ muls r0,r0
+ muls r1,r1
+ lsrs r4,r3,#15
+ lsls r3,r3,#17
+ adds r0,r0,r3
+ adcs r1,r4
// End: sqr 32
// Result in r0 ,r1
- mov r4,#0
- add r0,r7
- adc r1,r4
+ movs r4,#0
+ adds r0,r0,r7
+ adcs r1,r4
// START: sqr 32
// Input operand in r2
// Result in r3 ,r4
// Clobbers: r2, r7
uxth r3,r2
- lsr r4,r2,#16
- .syntax unified
+ lsrs r4,r2,#16
mov r2,r3
- .syntax divided
- mul r2,r4
- mul r3,r3
- mul r4,r4
- lsr r7,r2,#15
- lsl r2,r2,#17
- add r3,r2
- adc r4,r7
+ muls r2,r4
+ muls r3,r3
+ muls r4,r4
+ lsrs r7,r2,#15
+ lsls r2,r2,#17
+ adds r3,r3,r2
+ adcs r4,r7
// End: sqr 32
// Result in r3 ,r4
- .syntax unified
mov r7,r0
- .syntax divided
- sub r7,r3
- sbc r0,r4
- .syntax unified
+ subs r7,r3
+ sbcs r0,r4
mov r2,r1
- .syntax divided
- mov r4,#0
- sbc r1,r4
- add r7,r6
- adc r0,r2
- adc r1,r4
+ movs r4,#0
+ sbcs r1,r4
+ adds r7,r7,r6
+ adcs r0,r2
+ adcs r1,r4
// END: sqr 64 Refined Karatsuba
// Result in r6,r7,r0,r1
// Returns r4 as zero.
- .syntax unified
mov r2,r12
mov r3,r8
mov r4,r9
- .syntax divided
- sub r2,r6
- sbc r3,r7
- .syntax unified
+ subs r2,r6
+ sbcs r3,r7
mov r6,r4
mov r7,r5
- .syntax divided
- sbc r4,r0
- sbc r5,r1
- mov r0,#0
- sbc r6,r0
- sbc r7,r0
- .syntax unified
+ sbcs r4,r0
+ sbcs r5,r1
+ movs r0,#0
+ sbcs r6,r0
+ sbcs r7,r0
mov r0,r10
- .syntax divided
- add r2,r0
- .syntax unified
+ adds r2,r2,r0
mov r1,r11
- .syntax divided
- adc r3,r1
- .syntax unified
+ adcs r3,r1
mov r0,r12
- .syntax divided
- adc r4,r0
- .syntax unified
+ adcs r4,r0
mov r0,r8
- .syntax divided
- adc r5,r0
- mov r0,#0
- adc r6,r0
- adc r7,r0
- .syntax unified
+ adcs r5,r0
+ movs r0,#0
+ adcs r6,r0
+ adcs r7,r0
mov r0,r10
- .syntax divided
// END: sqr 128 Refined Karatsuba
// Result in r0 ... r7
push {r4,r5,r6,r7}
- .syntax unified
mov r4,r14
- .syntax divided
stm r4!,{r0,r1,r2,r3}
ldr r4,[SP,#36]
- add r4,#16
+ adds r4,#16
ldm r4,{r4,r5,r6,r7}
// sqr 128 Refined Karatsuba
// Input in r4 ... r7
// Result in r0 ... r7
// clobbers all registers except for r14
- .syntax unified
mov r0,r4
mov r1,r5
- .syntax divided
- sub r0,r6
- sbc r1,r7
- sbc r2,r2
- eor r0,r2
- eor r1,r2
- sub r0,r2
- sbc r1,r2
- .syntax unified
+ subs r0,r6
+ sbcs r1,r7
+ sbcs r2,r2
+ eors r0,r2
+ eors r1,r2
+ subs r0,r2
+ sbcs r1,r2
mov r8,r0
mov r9,r1
mov r10,r6
- .syntax divided
// START: sqr 64 Refined Karatsuba
// Input operands in r4,r5
// Result in r0,r1,r2,r3
@@ -388,85 +325,73 @@ square256_asm:
// Result in r0 ,r1
// Clobbers: r2, r3
uxth r0,r4
- lsr r1,r4,#16
- .syntax unified
+ lsrs r1,r4,#16
mov r2,r0
- .syntax divided
- mul r2,r1
- mul r0,r0
- mul r1,r1
- lsr r3,r2,#15
- lsl r2,r2,#17
- add r0,r2
- adc r1,r3
+ muls r2,r1
+ muls r0,r0
+ muls r1,r1
+ lsrs r3,r2,#15
+ lsls r2,r2,#17
+ adds r0,r0,r2
+ adcs r1,r3
// End: sqr 32
// Result in r0 ,r1
- sub r4,r5
- sbc r6,r6
- eor r4,r6
- sub r4,r6
+ subs r4,r5
+ sbcs r6,r6
+ eors r4,r6
+ subs r4,r6
// START: sqr 32
// Input operand in r5
// Result in r2 ,r3
// Clobbers: r5, r6
uxth r2,r5
- lsr r3,r5,#16
- .syntax unified
+ lsrs r3,r5,#16
mov r5,r2
- .syntax divided
- mul r5,r3
- mul r2,r2
- mul r3,r3
- lsr r6,r5,#15
- lsl r5,r5,#17
- add r2,r5
- adc r3,r6
+ muls r5,r3
+ muls r2,r2
+ muls r3,r3
+ lsrs r6,r5,#15
+ lsls r5,r5,#17
+ adds r2,r2,r5
+ adcs r3,r6
// End: sqr 32
// Result in r2 ,r3
- mov r6,#0
- add r2,r1
- adc r3,r6
+ movs r6,#0
+ adds r2,r2,r1
+ adcs r3,r6
// START: sqr 32
// Input operand in r4
// Result in r4 ,r5
// Clobbers: r1, r6
- lsr r5,r4,#16
+ lsrs r5,r4,#16
uxth r4,r4
- .syntax unified
mov r1,r4
- .syntax divided
- mul r1,r5
- mul r4,r4
- mul r5,r5
- lsr r6,r1,#15
- lsl r1,r1,#17
- add r4,r1
- adc r5,r6
+ muls r1,r5
+ muls r4,r4
+ muls r5,r5
+ lsrs r6,r1,#15
+ lsls r1,r1,#17
+ adds r4,r4,r1
+ adcs r5,r6
// End: sqr 32
// Result in r4 ,r5
- .syntax unified
mov r1,r2
- .syntax divided
- sub r1,r4
- sbc r2,r5
- .syntax unified
+ subs r1,r4
+ sbcs r2,r5
mov r5,r3
- .syntax divided
- mov r6,#0
- sbc r3,r6
- add r1,r0
- adc r2,r5
- adc r3,r6
+ movs r6,#0
+ sbcs r3,r6
+ adds r1,r1,r0
+ adcs r2,r5
+ adcs r3,r6
// END: sqr 64 Refined Karatsuba
// Result in r0,r1,r2,r3
// Leaves r6 zero.
- .syntax unified
mov r6,r10
mov r10,r0
mov r11,r1
mov r12,r2
mov r1,r3
- .syntax divided
// START: sqr 64 Refined Karatsuba
// Input operands in r6,r7
// Result in r2,r3,r4,r5
@@ -476,92 +401,78 @@ square256_asm:
// Result in r2 ,r3
// Clobbers: r4, r5
uxth r2,r6
- lsr r3,r6,#16
- .syntax unified
+ lsrs r3,r6,#16
mov r4,r2
- .syntax divided
- mul r4,r3
- mul r2,r2
- mul r3,r3
- lsr r5,r4,#15
- lsl r4,r4,#17
- add r2,r4
- adc r3,r5
+ muls r4,r3
+ muls r2,r2
+ muls r3,r3
+ lsrs r5,r4,#15
+ lsls r4,r4,#17
+ adds r2,r2,r4
+ adcs r3,r5
// End: sqr 32
// Result in r2 ,r3
- sub r6,r7
- sbc r4,r4
- eor r6,r4
- sub r6,r4
+ subs r6,r7
+ sbcs r4,r4
+ eors r6,r4
+ subs r6,r4
// START: sqr 32
// Input operand in r7
// Result in r4 ,r5
// Clobbers: r0, r7
uxth r4,r7
- lsr r5,r7,#16
- .syntax unified
+ lsrs r5,r7,#16
mov r0,r4
- .syntax divided
- mul r0,r5
- mul r4,r4
- mul r5,r5
- lsr r7,r0,#15
- lsl r0,r0,#17
- add r4,r0
- adc r5,r7
+ muls r0,r5
+ muls r4,r4
+ muls r5,r5
+ lsrs r7,r0,#15
+ lsls r0,r0,#17
+ adds r4,r4,r0
+ adcs r5,r7
// End: sqr 32
// Result in r4 ,r5
- mov r7,#0
- add r4,r3
- adc r5,r7
+ movs r7,#0
+ adds r4,r4,r3
+ adcs r5,r7
// START: sqr 32
// Input operand in r6
// Result in r7 ,r0
// Clobbers: r6, r3
uxth r7,r6
- lsr r0,r6,#16
- .syntax unified
+ lsrs r0,r6,#16
mov r6,r7
- .syntax divided
- mul r6,r0
- mul r7,r7
- mul r0,r0
- lsr r3,r6,#15
- lsl r6,r6,#17
- add r7,r6
- adc r0,r3
+ muls r6,r0
+ muls r7,r7
+ muls r0,r0
+ lsrs r3,r6,#15
+ lsls r6,r6,#17
+ adds r7,r7,r6
+ adcs r0,r3
// End: sqr 32
// Result in r7 ,r0
- .syntax unified
mov r3,r4
- .syntax divided
- sub r3,r7
- sbc r4,r0
- .syntax unified
+ subs r3,r7
+ sbcs r4,r0
mov r0,r5
- .syntax divided
- mov r6,#0
- sbc r5,r6
- add r3,r2
- adc r4,r0
- adc r5,r6
+ movs r6,#0
+ sbcs r5,r6
+ adds r3,r3,r2
+ adcs r4,r0
+ adcs r5,r6
// END: sqr 64 Refined Karatsuba
// Result in r2,r3,r4,r5
// Leaves r6 zero.
- .syntax unified
mov r0,r12
- .syntax divided
- add r2,r0
- adc r3,r1
- adc r4,r6
- adc r5,r6
- .syntax unified
+ adds r2,r2,r0
+ adcs r3,r1
+ adcs r4,r6
+ adcs r5,r6
mov r12,r2
mov r2,r8
mov r8,r3
mov r3,r9
mov r9,r4
- .syntax divided
// START: sqr 64 Refined Karatsuba
// Input operands in r2,r3
// Result in r6,r7,r0,r1
@@ -571,180 +482,146 @@ square256_asm:
// Result in r6 ,r7
// Clobbers: r0, r1
uxth r6,r2
- lsr r7,r2,#16
- .syntax unified
+ lsrs r7,r2,#16
mov r0,r6
- .syntax divided
- mul r0,r7
- mul r6,r6
- mul r7,r7
- lsr r1,r0,#15
- lsl r0,r0,#17
- add r6,r0
- adc r7,r1
+ muls r0,r7
+ muls r6,r6
+ muls r7,r7
+ lsrs r1,r0,#15
+ lsls r0,r0,#17
+ adds r6,r6,r0
+ adcs r7,r1
// End: sqr 32
// Result in r6 ,r7
- sub r2,r3
- sbc r4,r4
- eor r2,r4
- sub r2,r4
+ subs r2,r3
+ sbcs r4,r4
+ eors r2,r4
+ subs r2,r4
// START: sqr 32
// Input operand in r3
// Result in r0 ,r1
// Clobbers: r3, r4
uxth r0,r3
- lsr r1,r3,#16
- .syntax unified
+ lsrs r1,r3,#16
mov r3,r0
- .syntax divided
- mul r3,r1
- mul r0,r0
- mul r1,r1
- lsr r4,r3,#15
- lsl r3,r3,#17
- add r0,r3
- adc r1,r4
+ muls r3,r1
+ muls r0,r0
+ muls r1,r1
+ lsrs r4,r3,#15
+ lsls r3,r3,#17
+ adds r0,r0,r3
+ adcs r1,r4
// End: sqr 32
// Result in r0 ,r1
- mov r4,#0
- add r0,r7
- adc r1,r4
+ movs r4,#0
+ adds r0,r0,r7
+ adcs r1,r4
// START: sqr 32
// Input operand in r2
// Result in r3 ,r4
// Clobbers: r2, r7
uxth r3,r2
- lsr r4,r2,#16
- .syntax unified
+ lsrs r4,r2,#16
mov r2,r3
- .syntax divided
- mul r2,r4
- mul r3,r3
- mul r4,r4
- lsr r7,r2,#15
- lsl r2,r2,#17
- add r3,r2
- adc r4,r7
+ muls r2,r4
+ muls r3,r3
+ muls r4,r4
+ lsrs r7,r2,#15
+ lsls r2,r2,#17
+ adds r3,r3,r2
+ adcs r4,r7
// End: sqr 32
// Result in r3 ,r4
- .syntax unified
mov r7,r0
- .syntax divided
- sub r7,r3
- sbc r0,r4
- .syntax unified
+ subs r7,r3
+ sbcs r0,r4
mov r2,r1
- .syntax divided
- mov r4,#0
- sbc r1,r4
- add r7,r6
- adc r0,r2
- adc r1,r4
+ movs r4,#0
+ sbcs r1,r4
+ adds r7,r7,r6
+ adcs r0,r2
+ adcs r1,r4
// END: sqr 64 Refined Karatsuba
// Result in r6,r7,r0,r1
// Returns r4 as zero.
- .syntax unified
mov r2,r12
mov r3,r8
mov r4,r9
- .syntax divided
- sub r2,r6
- sbc r3,r7
- .syntax unified
+ subs r2,r6
+ sbcs r3,r7
mov r6,r4
mov r7,r5
- .syntax divided
- sbc r4,r0
- sbc r5,r1
- mov r0,#0
- sbc r6,r0
- sbc r7,r0
- .syntax unified
+ sbcs r4,r0
+ sbcs r5,r1
+ movs r0,#0
+ sbcs r6,r0
+ sbcs r7,r0
mov r0,r10
- .syntax divided
- add r2,r0
- .syntax unified
+ adds r2,r2,r0
mov r1,r11
- .syntax divided
- adc r3,r1
- .syntax unified
+ adcs r3,r1
mov r0,r12
- .syntax divided
- adc r4,r0
- .syntax unified
+ adcs r4,r0
mov r0,r8
- .syntax divided
- adc r5,r0
- mov r0,#0
- adc r6,r0
- adc r7,r0
- .syntax unified
+ adcs r5,r0
+ movs r0,#0
+ adcs r6,r0
+ adcs r7,r0
mov r0,r10
- .syntax divided
// END: sqr 128 Refined Karatsuba
// Result in r0 ... r7
- .syntax unified
mov r8,r4
mov r9,r5
mov r10,r6
mov r11,r7
- .syntax divided
pop {r4,r5,r6,r7}
- add r0,r4
- adc r1,r5
- adc r2,r6
- adc r3,r7
- .syntax unified
+ adds r0,r0,r4
+ adcs r1,r5
+ adcs r2,r6
+ adcs r3,r7
mov r4,r8
mov r5,r9
mov r6,r10
mov r7,r11
mov r8,r0
- .syntax divided
- mov r0,#0
- adc r4,r0
- adc r5,r0
- adc r6,r0
- adc r7,r0
- .syntax unified
+ movs r0,#0
+ adcs r4,r0
+ adcs r5,r0
+ adcs r6,r0
+ adcs r7,r0
mov r0,r8
- .syntax divided
push {r0,r1,r2,r3,r4,r5,r6,r7}
ldr r4,[SP,#52]
ldm r4,{r0,r1,r2,r3,r4,r5,r6,r7}
- sub r4,r0
- sbc r5,r1
- sbc r6,r2
- sbc r7,r3
- sbc r0,r0
- eor r4,r0
- eor r5,r0
- eor r6,r0
- eor r7,r0
- sub r4,r0
- sbc r5,r0
- sbc r6,r0
- sbc r7,r0
+ subs r4,r0
+ sbcs r5,r1
+ sbcs r6,r2
+ sbcs r7,r3
+ sbcs r0,r0
+ eors r4,r0
+ eors r5,r0
+ eors r6,r0
+ eors r7,r0
+ subs r4,r0
+ sbcs r5,r0
+ sbcs r6,r0
+ sbcs r7,r0
// sqr 128 Refined Karatsuba
// Input in r4 ... r7
// Result in r0 ... r7
// clobbers all registers except for r14
- .syntax unified
mov r0,r4
mov r1,r5
- .syntax divided
- sub r0,r6
- sbc r1,r7
- sbc r2,r2
- eor r0,r2
- eor r1,r2
- sub r0,r2
- sbc r1,r2
- .syntax unified
+ subs r0,r6
+ sbcs r1,r7
+ sbcs r2,r2
+ eors r0,r2
+ eors r1,r2
+ subs r0,r2
+ sbcs r1,r2
mov r8,r0
mov r9,r1
mov r10,r6
- .syntax divided
// START: sqr 64 Refined Karatsuba
// Input operands in r4,r5
// Result in r0,r1,r2,r3
@@ -754,85 +631,73 @@ square256_asm:
// Result in r0 ,r1
// Clobbers: r2, r3
uxth r0,r4
- lsr r1,r4,#16
- .syntax unified
+ lsrs r1,r4,#16
mov r2,r0
- .syntax divided
- mul r2,r1
- mul r0,r0
- mul r1,r1
- lsr r3,r2,#15
- lsl r2,r2,#17
- add r0,r2
- adc r1,r3
+ muls r2,r1
+ muls r0,r0
+ muls r1,r1
+ lsrs r3,r2,#15
+ lsls r2,r2,#17
+ adds r0,r0,r2
+ adcs r1,r3
// End: sqr 32
// Result in r0 ,r1
- sub r4,r5
- sbc r6,r6
- eor r4,r6
- sub r4,r6
+ subs r4,r5
+ sbcs r6,r6
+ eors r4,r6
+ subs r4,r6
// START: sqr 32
// Input operand in r5
// Result in r2 ,r3
// Clobbers: r5, r6
uxth r2,r5
- lsr r3,r5,#16
- .syntax unified
+ lsrs r3,r5,#16
mov r5,r2
- .syntax divided
- mul r5,r3
- mul r2,r2
- mul r3,r3
- lsr r6,r5,#15
- lsl r5,r5,#17
- add r2,r5
- adc r3,r6
+ muls r5,r3
+ muls r2,r2
+ muls r3,r3
+ lsrs r6,r5,#15
+ lsls r5,r5,#17
+ adds r2,r2,r5
+ adcs r3,r6
// End: sqr 32
// Result in r2 ,r3
- mov r6,#0
- add r2,r1
- adc r3,r6
+ movs r6,#0
+ adds r2,r2,r1
+ adcs r3,r6
// START: sqr 32
// Input operand in r4
// Result in r4 ,r5
// Clobbers: r1, r6
- lsr r5,r4,#16
+ lsrs r5,r4,#16
uxth r4,r4
- .syntax unified
mov r1,r4
- .syntax divided
- mul r1,r5
- mul r4,r4
- mul r5,r5
- lsr r6,r1,#15
- lsl r1,r1,#17
- add r4,r1
- adc r5,r6
+ muls r1,r5
+ muls r4,r4
+ muls r5,r5
+ lsrs r6,r1,#15
+ lsls r1,r1,#17
+ adds r4,r4,r1
+ adcs r5,r6
// End: sqr 32
// Result in r4 ,r5
- .syntax unified
mov r1,r2
- .syntax divided
- sub r1,r4
- sbc r2,r5
- .syntax unified
+ subs r1,r4
+ sbcs r2,r5
mov r5,r3
- .syntax divided
- mov r6,#0
- sbc r3,r6
- add r1,r0
- adc r2,r5
- adc r3,r6
+ movs r6,#0
+ sbcs r3,r6
+ adds r1,r1,r0
+ adcs r2,r5
+ adcs r3,r6
// END: sqr 64 Refined Karatsuba
// Result in r0,r1,r2,r3
// Leaves r6 zero.
- .syntax unified
mov r6,r10
mov r10,r0
mov r11,r1
mov r12,r2
mov r1,r3
- .syntax divided
// START: sqr 64 Refined Karatsuba
// Input operands in r6,r7
// Result in r2,r3,r4,r5
@@ -842,92 +707,78 @@ square256_asm:
// Result in r2 ,r3
// Clobbers: r4, r5
uxth r2,r6
- lsr r3,r6,#16
- .syntax unified
+ lsrs r3,r6,#16
mov r4,r2
- .syntax divided
- mul r4,r3
- mul r2,r2
- mul r3,r3
- lsr r5,r4,#15
- lsl r4,r4,#17
- add r2,r4
- adc r3,r5
+ muls r4,r3
+ muls r2,r2
+ muls r3,r3
+ lsrs r5,r4,#15
+ lsls r4,r4,#17
+ adds r2,r2,r4
+ adcs r3,r5
// End: sqr 32
// Result in r2 ,r3
- sub r6,r7
- sbc r4,r4
- eor r6,r4
- sub r6,r4
+ subs r6,r7
+ sbcs r4,r4
+ eors r6,r4
+ subs r6,r4
// START: sqr 32
// Input operand in r7
// Result in r4 ,r5
// Clobbers: r0, r7
uxth r4,r7
- lsr r5,r7,#16
- .syntax unified
+ lsrs r5,r7,#16
mov r0,r4
- .syntax divided
- mul r0,r5
- mul r4,r4
- mul r5,r5
- lsr r7,r0,#15
- lsl r0,r0,#17
- add r4,r0
- adc r5,r7
+ muls r0,r5
+ muls r4,r4
+ muls r5,r5
+ lsrs r7,r0,#15
+ lsls r0,r0,#17
+ adds r4,r4,r0
+ adcs r5,r7
// End: sqr 32
// Result in r4 ,r5
- mov r7,#0
- add r4,r3
- adc r5,r7
+ movs r7,#0
+ adds r4,r4,r3
+ adcs r5,r7
// START: sqr 32
// Input operand in r6
// Result in r7 ,r0
// Clobbers: r6, r3
uxth r7,r6
- lsr r0,r6,#16
- .syntax unified
+ lsrs r0,r6,#16
mov r6,r7
- .syntax divided
- mul r6,r0
- mul r7,r7
- mul r0,r0
- lsr r3,r6,#15
- lsl r6,r6,#17
- add r7,r6
- adc r0,r3
+ muls r6,r0
+ muls r7,r7
+ muls r0,r0
+ lsrs r3,r6,#15
+ lsls r6,r6,#17
+ adds r7,r7,r6
+ adcs r0,r3
// End: sqr 32
// Result in r7 ,r0
- .syntax unified
mov r3,r4
- .syntax divided
- sub r3,r7
- sbc r4,r0
- .syntax unified
+ subs r3,r7
+ sbcs r4,r0
mov r0,r5
- .syntax divided
- mov r6,#0
- sbc r5,r6
- add r3,r2
- adc r4,r0
- adc r5,r6
+ movs r6,#0
+ sbcs r5,r6
+ adds r3,r3,r2
+ adcs r4,r0
+ adcs r5,r6
// END: sqr 64 Refined Karatsuba
// Result in r2,r3,r4,r5
// Leaves r6 zero.
- .syntax unified
mov r0,r12
- .syntax divided
- add r2,r0
- adc r3,r1
- adc r4,r6
- adc r5,r6
- .syntax unified
+ adds r2,r2,r0
+ adcs r3,r1
+ adcs r4,r6
+ adcs r5,r6
mov r12,r2
mov r2,r8
mov r8,r3
mov r3,r9
mov r9,r4
- .syntax divided
// START: sqr 64 Refined Karatsuba
// Input operands in r2,r3
// Result in r6,r7,r0,r1
@@ -937,228 +788,172 @@ square256_asm:
// Result in r6 ,r7
// Clobbers: r0, r1
uxth r6,r2
- lsr r7,r2,#16
- .syntax unified
+ lsrs r7,r2,#16
mov r0,r6
- .syntax divided
- mul r0,r7
- mul r6,r6
- mul r7,r7
- lsr r1,r0,#15
- lsl r0,r0,#17
- add r6,r0
- adc r7,r1
+ muls r0,r7
+ muls r6,r6
+ muls r7,r7
+ lsrs r1,r0,#15
+ lsls r0,r0,#17
+ adds r6,r6,r0
+ adcs r7,r1
// End: sqr 32
// Result in r6 ,r7
- sub r2,r3
- sbc r4,r4
- eor r2,r4
- sub r2,r4
+ subs r2,r3
+ sbcs r4,r4
+ eors r2,r4
+ subs r2,r4
// START: sqr 32
// Input operand in r3
// Result in r0 ,r1
// Clobbers: r3, r4
uxth r0,r3
- lsr r1,r3,#16
- .syntax unified
+ lsrs r1,r3,#16
mov r3,r0
- .syntax divided
- mul r3,r1
- mul r0,r0
- mul r1,r1
- lsr r4,r3,#15
- lsl r3,r3,#17
- add r0,r3
- adc r1,r4
+ muls r3,r1
+ muls r0,r0
+ muls r1,r1
+ lsrs r4,r3,#15
+ lsls r3,r3,#17
+ adds r0,r0,r3
+ adcs r1,r4
// End: sqr 32
// Result in r0 ,r1
- mov r4,#0
- add r0,r7
- adc r1,r4
+ movs r4,#0
+ adds r0,r0,r7
+ adcs r1,r4
// START: sqr 32
// Input operand in r2
// Result in r3 ,r4
// Clobbers: r2, r7
uxth r3,r2
- lsr r4,r2,#16
- .syntax unified
+ lsrs r4,r2,#16
mov r2,r3
- .syntax divided
- mul r2,r4
- mul r3,r3
- mul r4,r4
- lsr r7,r2,#15
- lsl r2,r2,#17
- add r3,r2
- adc r4,r7
+ muls r2,r4
+ muls r3,r3
+ muls r4,r4
+ lsrs r7,r2,#15
+ lsls r2,r2,#17
+ adds r3,r3,r2
+ adcs r4,r7
// End: sqr 32
// Result in r3 ,r4
- .syntax unified
mov r7,r0
- .syntax divided
- sub r7,r3
- sbc r0,r4
- .syntax unified
+ subs r7,r3
+ sbcs r0,r4
mov r2,r1
- .syntax divided
- mov r4,#0
- sbc r1,r4
- add r7,r6
- adc r0,r2
- adc r1,r4
+ movs r4,#0
+ sbcs r1,r4
+ adds r7,r7,r6
+ adcs r0,r2
+ adcs r1,r4
// END: sqr 64 Refined Karatsuba
// Result in r6,r7,r0,r1
// Returns r4 as zero.
- .syntax unified
mov r2,r12
mov r3,r8
mov r4,r9
- .syntax divided
- sub r2,r6
- sbc r3,r7
- .syntax unified
+ subs r2,r6
+ sbcs r3,r7
mov r6,r4
mov r7,r5
- .syntax divided
- sbc r4,r0
- sbc r5,r1
- mov r0,#0
- sbc r6,r0
- sbc r7,r0
- .syntax unified
+ sbcs r4,r0
+ sbcs r5,r1
+ movs r0,#0
+ sbcs r6,r0
+ sbcs r7,r0
mov r0,r10
- .syntax divided
- add r2,r0
- .syntax unified
+ adds r2,r2,r0
mov r1,r11
- .syntax divided
- adc r3,r1
- .syntax unified
+ adcs r3,r1
mov r0,r12
- .syntax divided
- adc r4,r0
- .syntax unified
+ adcs r4,r0
mov r0,r8
- .syntax divided
- adc r5,r0
- mov r0,#0
- adc r6,r0
- adc r7,r0
- .syntax unified
+ adcs r5,r0
+ movs r0,#0
+ adcs r6,r0
+ adcs r7,r0
mov r0,r10
- .syntax divided
// END: sqr 128 Refined Karatsuba
// Result in r0 ... r7
- mvn r0,r0
- mvn r1,r1
- mvn r2,r2
- mvn r3,r3
- mvn r4,r4
- mvn r5,r5
- mvn r6,r6
- mvn r7,r7
- .syntax unified
+ mvns r0,r0
+ mvns r1,r1
+ mvns r2,r2
+ mvns r3,r3
+ mvns r4,r4
+ mvns r5,r5
+ mvns r6,r6
+ mvns r7,r7
mov r8,r4
mov r9,r5
mov r10,r6
mov r11,r7
- .syntax divided
- mov r4,#143
- asr r4,r4,#1
+ movs r4,#143
+ asrs r4,r4,#1
pop {r4,r5,r6,r7}
- adc r0,r4
- adc r1,r5
- adc r2,r6
- adc r3,r7
- .syntax unified
+ adcs r0,r4
+ adcs r1,r5
+ adcs r2,r6
+ adcs r3,r7
mov r12,r4
- .syntax divided
- mov r4,#16
+ movs r4,#16
add r4,r14
stm r4!,{r0,r1,r2,r3}
- .syntax unified
mov r4,r12
mov r0,r8
- .syntax divided
- adc r0,r4
- .syntax unified
+ adcs r0,r4
mov r8,r0
mov r1,r9
- .syntax divided
- adc r1,r5
- .syntax unified
+ adcs r1,r5
mov r9,r1
mov r2,r10
- .syntax divided
- adc r2,r6
- .syntax unified
+ adcs r2,r6
mov r10,r2
mov r3,r11
- .syntax divided
- adc r3,r7
- .syntax unified
+ adcs r3,r7
mov r11,r3
- .syntax divided
- mov r0,#0
- adc r0,r0
- .syntax unified
+ movs r0,#0
+ adcs r0,r0
mov r12,r0
mov r0,r14
- .syntax divided
ldm r0,{r0,r1,r2,r3,r4,r5,r6,r7}
- add r0,r4
- adc r1,r5
- adc r2,r6
- adc r3,r7
- mov r4,#16
+ adds r0,r0,r4
+ adcs r1,r5
+ adcs r2,r6
+ adcs r3,r7
+ movs r4,#16
add r4,r14
stm r4!,{r0,r1,r2,r3}
- .syntax unified
mov r14,r4
mov r0,r13
- .syntax divided
ldm r0!,{r4,r5,r6,r7}
- .syntax unified
mov r1,r8
- .syntax divided
- adc r4,r1
- .syntax unified
+ adcs r4,r1
mov r1,r9
- .syntax divided
- adc r5,r1
- .syntax unified
+ adcs r5,r1
mov r1,r10
- .syntax divided
- adc r6,r1
- .syntax unified
+ adcs r6,r1
mov r1,r11
- .syntax divided
- adc r7,r1
- .syntax unified
+ adcs r7,r1
mov r0,r14
- .syntax divided
stm r0!,{r4,r5,r6,r7}
pop {r4,r5,r6,r7}
- .syntax unified
mov r1,r12
- .syntax divided
- mov r2,#0
- mvn r2,r2
- adc r1,r2
- asr r2,r1,#4
- add r4,r1
- adc r5,r2
- adc r6,r2
- adc r7,r2
+ movs r2,#0
+ mvns r2,r2
+ adcs r1,r2
+ asrs r2,r1,#4
+ adds r4,r4,r1
+ adcs r5,r2
+ adcs r6,r2
+ adcs r7,r2
stm r0!,{r4,r5,r6,r7}
pop {r3,r4,r5,r6,r7}
- .syntax unified
mov r8,r3
mov r9,r4
mov r10,r5
mov r11,r6
mov r12,r7
- .syntax divided
pop {r0,r4,r5,r6,r7,r15}
//Cycle Count ASM-Version of 256 sqr (Refined Karatsuba) (Cortex M0): 793 (697 instructions).
.size square256_asm, .-square256_asm