diff options
author | Tom Hughes <tomhughes@chromium.org> | 2022-10-18 16:27:08 -0700 |
---|---|---|
committer | Chromeos LUCI <chromeos-scoped@luci-project-accounts.iam.gserviceaccount.com> | 2022-10-24 17:33:18 +0000 |
commit | 1afeeef4f4e69960199e32f4de50094c43625b27 (patch) | |
tree | 5810fa3c6af9c97304b2d60c39ca17b20a10e0c5 /third_party | |
parent | 4b47b808d52d602d86a7ec745019b72433469bdf (diff) | |
download | chrome-ec-1afeeef4f4e69960199e32f4de50094c43625b27.tar.gz |
third_party/unacl-curve25519: Fix assembly
When building with clang it complains that when compiling for Thumb
there are no flag-preserving variants of many of the instructions in the
assembly:
core/cortex-m0/curve25519/mul.S:1099:2: error: no flag-preserving
variant of this instruction available
adc r2, r6
^
Using "arm-none-eabi-objdump -d" to disassemble the object
files, we can see that gcc is less strict and just silently generates
the version of the instruction that sets the flags (e.g., "adc" ->
"adcs").
This change fixes up the assembly so that it compiles with clang. Most
of the changes were done programmatically with the following script
(followed by some manual cleanup):
sed -i 's/adc/adcs/g' ${FILE}
sed -i 's/asr/asrs/g' ${FILE}
sed -i 's/mvn/mvns/g' ${FILE}
sed -i 's/sbc/sbcs/g' ${FILE}
sed -i 's/sub/subs/g' ${FILE}
sed -i 's/lsr/lsrs/g' ${FILE}
sed -i 's/lsl/lsls/g' ${FILE}
sed -i 's/mul/muls/g' ${FILE}
sed -i 's/eor/eors/g' ${FILE}
sed -i 's/orr/orrs/g' ${FILE}
sed -i 's/mov r\([[:digit:]]\+\),#\([[:digit:]]\+\)/movs r\1,#\2/g' ${FILE}
sed -i 's/add r\([[:digit:]]\+\),#\([[:digit:]]\+\)/adds r\1,#\2/g' ${FILE}
sed -i 's/add r\([[:digit:]]\+\),r\([[:digit:]]\+\)/adds r\1,r\1,r\2/g' ${FILE}
The binary generated by gcc before and after this change is bitwise
exact as seen by the use of the compare_build.sh script (see TEST line).
BRANCH=none
BUG=b:172020503
TEST=CC=clang make BOARD=hammer
TEST=./util/compare_build.sh -b all -j 120
=> MATCH
Signed-off-by: Tom Hughes <tomhughes@chromium.org>
Change-Id: Ice602c1996ef3b48c46e69f0d6770828cf21c15d
Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/platform/ec/+/3968441
Reviewed-by: Eric Yilun Lin <yllin@google.com>
Diffstat (limited to 'third_party')
4 files changed, 1530 insertions, 1733 deletions
diff --git a/third_party/unacl-curve25519/core/cortex-m0/curve25519/mpy121666.S b/third_party/unacl-curve25519/core/cortex-m0/curve25519/mpy121666.S index 0f847a2484..5236dff64c 100644 --- a/third_party/unacl-curve25519/core/cortex-m0/curve25519/mpy121666.S +++ b/third_party/unacl-curve25519/core/cortex-m0/curve25519/mpy121666.S @@ -10,7 +10,7 @@ // ATTENTION: // Not yet tested on target hardware. - + .syntax unified .code 16 .text .align 2 @@ -24,156 +24,156 @@ fe25519_mpyWith121666_asm: push {r4,r5,r6,r7,r14} ldr r7,=56130 ldr r2,[r1,#28] - lsl r5,r2,#16 - lsr r6,r2,#16 - lsr r3,r2,#16 + lsls r5,r2,#16 + lsrs r6,r2,#16 + lsrs r3,r2,#16 uxth r2,r2 - mul r2,r7 - mul r3,r7 - add r5,r2 - mov r2,#0 - adc r6,r2 - lsl r2,r3,#16 - lsr r3,r3,#16 - add r5,r2 - adc r6,r3 - lsl r2,r5,#1 - lsr r2,r2,#1 + muls r2,r7 + muls r3,r7 + adds r5,r5,r2 + movs r2,#0 + adcs r6,r2 + lsls r2,r3,#16 + lsrs r3,r3,#16 + adds r5,r5,r2 + adcs r6,r3 + lsls r2,r5,#1 + lsrs r2,r2,#1 str r2,[r0,#28] - lsr r5,r5,#31 - lsl r6,r6,#1 - orr r5,r6 - mov r6,#19 - mul r5,r6 - mov r6,#0 + lsrs r5,r5,#31 + lsls r6,r6,#1 + orrs r5,r6 + movs r6,#19 + muls r5,r6 + movs r6,#0 ldr r2,[r1,#0] - lsl r3,r2,#16 - lsr r4,r2,#16 - add r5,r3 - adc r6,r4 - lsr r3,r2,#16 + lsls r3,r2,#16 + lsrs r4,r2,#16 + adds r5,r5,r3 + adcs r6,r4 + lsrs r3,r2,#16 uxth r2,r2 - mul r2,r7 - mul r3,r7 - add r5,r2 - mov r2,#0 - adc r6,r2 - lsl r2,r3,#16 - lsr r3,r3,#16 - add r5,r2 - adc r6,r3 + muls r2,r7 + muls r3,r7 + adds r5,r5,r2 + movs r2,#0 + adcs r6,r2 + lsls r2,r3,#16 + lsrs r3,r3,#16 + adds r5,r5,r2 + adcs r6,r3 str r5,[r0,#0] - mov r5,#0 + movs r5,#0 ldr r2,[r1,#4] - lsl r3,r2,#16 - lsr r4,r2,#16 - add r6,r3 - adc r5,r4 - lsr r3,r2,#16 + lsls r3,r2,#16 + lsrs r4,r2,#16 + adds r6,r6,r3 + adcs r5,r4 + lsrs r3,r2,#16 uxth r2,r2 - mul r2,r7 - mul r3,r7 - add r6,r2 - mov r2,#0 - adc r5,r2 - lsl r2,r3,#16 - lsr r3,r3,#16 - add r6,r2 - adc r5,r3 + muls r2,r7 + muls r3,r7 + adds r6,r6,r2 + movs r2,#0 + adcs r5,r2 + lsls r2,r3,#16 + lsrs r3,r3,#16 + adds r6,r6,r2 + adcs r5,r3 str r6,[r0,#4] - mov r6,#0 + movs r6,#0 ldr r2,[r1,#8] - lsl r3,r2,#16 - lsr r4,r2,#16 - add r5,r3 - adc r6,r4 - lsr r3,r2,#16 + lsls r3,r2,#16 + lsrs r4,r2,#16 + adds r5,r5,r3 + adcs r6,r4 + lsrs r3,r2,#16 uxth r2,r2 - mul r2,r7 - mul r3,r7 - add r5,r2 - mov r2,#0 - adc r6,r2 - lsl r2,r3,#16 - lsr r3,r3,#16 - add r5,r2 - adc r6,r3 + muls r2,r7 + muls r3,r7 + adds r5,r5,r2 + movs r2,#0 + adcs r6,r2 + lsls r2,r3,#16 + lsrs r3,r3,#16 + adds r5,r5,r2 + adcs r6,r3 str r5,[r0,#8] - mov r5,#0 + movs r5,#0 ldr r2,[r1,#12] - lsl r3,r2,#16 - lsr r4,r2,#16 - add r6,r3 - adc r5,r4 - lsr r3,r2,#16 + lsls r3,r2,#16 + lsrs r4,r2,#16 + adds r6,r6,r3 + adcs r5,r4 + lsrs r3,r2,#16 uxth r2,r2 - mul r2,r7 - mul r3,r7 - add r6,r2 - mov r2,#0 - adc r5,r2 - lsl r2,r3,#16 - lsr r3,r3,#16 - add r6,r2 - adc r5,r3 + muls r2,r7 + muls r3,r7 + adds r6,r6,r2 + movs r2,#0 + adcs r5,r2 + lsls r2,r3,#16 + lsrs r3,r3,#16 + adds r6,r6,r2 + adcs r5,r3 str r6,[r0,#12] - mov r6,#0 + movs r6,#0 ldr r2,[r1,#16] - lsl r3,r2,#16 - lsr r4,r2,#16 - add r5,r3 - adc r6,r4 - lsr r3,r2,#16 + lsls r3,r2,#16 + lsrs r4,r2,#16 + adds r5,r5,r3 + adcs r6,r4 + lsrs r3,r2,#16 uxth r2,r2 - mul r2,r7 - mul r3,r7 - add r5,r2 - mov r2,#0 - adc r6,r2 - lsl r2,r3,#16 - lsr r3,r3,#16 - add r5,r2 - adc r6,r3 + muls r2,r7 + muls r3,r7 + adds r5,r5,r2 + movs r2,#0 + adcs r6,r2 + lsls r2,r3,#16 + lsrs r3,r3,#16 + adds r5,r5,r2 + adcs r6,r3 str r5,[r0,#16] - mov r5,#0 + movs r5,#0 ldr r2,[r1,#20] - lsl r3,r2,#16 - lsr r4,r2,#16 - add r6,r3 - adc r5,r4 - lsr r3,r2,#16 + lsls r3,r2,#16 + lsrs r4,r2,#16 + adds r6,r6,r3 + adcs r5,r4 + lsrs r3,r2,#16 uxth r2,r2 - mul r2,r7 - mul r3,r7 - add r6,r2 - mov r2,#0 - adc r5,r2 - lsl r2,r3,#16 - lsr r3,r3,#16 - add r6,r2 - adc r5,r3 + muls r2,r7 + muls r3,r7 + adds r6,r6,r2 + movs r2,#0 + adcs r5,r2 + lsls r2,r3,#16 + lsrs r3,r3,#16 + adds r6,r6,r2 + adcs r5,r3 str r6,[r0,#20] - mov r6,#0 + movs r6,#0 ldr r2,[r1,#24] - lsl r3,r2,#16 - lsr r4,r2,#16 - add r5,r3 - adc r6,r4 - lsr r3,r2,#16 + lsls r3,r2,#16 + lsrs r4,r2,#16 + adds r5,r5,r3 + adcs r6,r4 + lsrs r3,r2,#16 uxth r2,r2 - mul r2,r7 - mul r3,r7 - add r5,r2 - mov r2,#0 - adc r6,r2 - lsl r2,r3,#16 - lsr r3,r3,#16 - add r5,r2 - adc r6,r3 + muls r2,r7 + muls r3,r7 + adds r5,r5,r2 + movs r2,#0 + adcs r6,r2 + lsls r2,r3,#16 + lsrs r3,r3,#16 + adds r5,r5,r2 + adcs r6,r3 str r5,[r0,#24] - mov r5,#0 + movs r5,#0 ldr r2,[r0,#28] - add r6,r2 + adds r6,r6,r2 str r6,[r0,#28] pop {r4,r5,r6,r7,r15} diff --git a/third_party/unacl-curve25519/core/cortex-m0/curve25519/mul.S b/third_party/unacl-curve25519/core/cortex-m0/curve25519/mul.S index cb272b9393..2980e03364 100644 --- a/third_party/unacl-curve25519/core/cortex-m0/curve25519/mul.S +++ b/third_party/unacl-curve25519/core/cortex-m0/curve25519/mul.S @@ -1,3 +1,4 @@ + .syntax unified .align 2 .global multiply256x256_asm .type multiply256x256_asm, %function @@ -11,96 +12,96 @@ multiply256x256_asm: mov r12, r0 mov r10, r2 mov r11, r1 - mov r0,r2 + adds r0, r2, #0 //ldm r0!, {r4,r5,r6,r7} ldm r0!, {r4,r5} - add r0,#8 + adds r0,#8 ldm r1!, {r2,r3,r6,r7} push {r0,r1} /////////BEGIN LOW PART ////////////////////// /////////MUL128///////////// //MUL64 - mov r6, r5 - mov r1, r2 - sub r5, r4 - sbc r0, r0 - eor r5, r0 - sub r5, r0 - sub r1, r3 - sbc r7, r7 - eor r1, r7 - sub r1, r7 - eor r7, r0 + adds r6, r5, #0 + adds r1, r2, #0 + subs r5, r4 + sbcs r0, r0 + eors r5, r0 + subs r5, r0 + subs r1, r3 + sbcs r7, r7 + eors r1, r7 + subs r1, r7 + eors r7, r0 mov r9, r1 mov r8, r5 - lsr r1,r4,#16 + lsrs r1,r4,#16 uxth r4,r4 - mov r0,r4 + adds r0, r4, #0 uxth r5,r2 - lsr r2,#16 - mul r0,r5//00 - mul r5,r1//10 - mul r4,r2//01 - mul r1,r2//11 - lsl r2,r4,#16 - lsr r4,r4,#16 - add r0,r2 - adc r1,r4 - lsl r2,r5,#16 - lsr r4,r5,#16 - add r0,r2 - adc r1,r4 - lsr r4, r6,#16 + lsrs r2,#16 + muls r0,r5//00 + muls r5,r1//10 + muls r4,r2//01 + muls r1,r2//11 + lsls r2,r4,#16 + lsrs r4,r4,#16 + adds r0, r0, r2 + adcs r1,r4 + lsls r2,r5,#16 + lsrs r4,r5,#16 + adds r0, r0, r2 + adcs r1,r4 + lsrs r4, r6,#16 uxth r6, r6 uxth r5, r3 - lsr r3, r3, #16 - mov r2, r6 - mul r2, r5 - mul r5, r4 - mul r6, r3 - mul r3, r4 - lsl r4,r5,#16 - lsr r5,r5,#16 - add r2,r4 - adc r3,r5 - lsl r4,r6,#16 - lsr r5,r6,#16 - add r2,r4 - adc r3,r5 - eor r6, r6 - add r2, r1 - adc r3, r6 + lsrs r3, r3, #16 + adds r2, r6, #0 + muls r2, r5 + muls r5, r4 + muls r6, r3 + muls r3, r4 + lsls r4,r5,#16 + lsrs r5,r5,#16 + adds r2, r2, r4 + adcs r3,r5 + lsls r4,r6,#16 + lsrs r5,r6,#16 + adds r2, r2, r4 + adcs r3,r5 + eors r6, r6 + adds r2, r2, r1 + adcs r3, r6 mov r1, r9 mov r5, r8 mov r8, r0 - lsr r0, r1,#16 + lsrs r0, r1,#16 uxth r1,r1 - mov r4,r1 - lsr r6,r5,#16 + adds r4, r1, #0 + lsrs r6,r5,#16 uxth r5,r5 - mul r1,r5 - mul r4,r6 - mul r5,r0 - mul r0,r6 - lsl r6,r4,#16 - lsr r4,#16 - add r1,r6 - adc r0,r4 - lsl r6,r5,#16 - lsr r5,#16 - add r1,r6 - adc r0,r5 - eor r1,r7 - eor r0,r7 - eor r4, r4 - asr r7, r7, #1 - adc r1, r2 - adc r2, r0 - adc r7, r4 + muls r1,r5 + muls r4,r6 + muls r5,r0 + muls r0,r6 + lsls r6,r4,#16 + lsrs r4,#16 + adds r1, r1, r6 + adcs r0,r4 + lsls r6,r5,#16 + lsrs r5,#16 + adds r1, r1, r6 + adcs r0,r5 + eors r1,r7 + eors r0,r7 + eors r4, r4 + asrs r7, r7, #1 + adcs r1, r2 + adcs r2, r0 + adcs r7, r4 mov r0, r8 - add r1, r0 - adc r2, r3 - adc r3, r7 + adds r1, r1, r0 + adcs r2, r3 + adcs r3, r7 ////////////////////////// mov r4, r12 stm r4!, {r0,r1} @@ -109,229 +110,229 @@ multiply256x256_asm: mov r1, r10 mov r10, r2 ldm r1, {r0, r1, r4, r5} - mov r2, r4 - mov r7, r5 - sub r2, r0 - sbc r7, r1 - sbc r6, r6 - eor r2, r6 - eor r7, r6 - sub r2, r6 - sbc r7, r6 + adds r2, r4, #0 + adds r7, r5, #0 + subs r2, r0 + sbcs r7, r1 + sbcs r6, r6 + eors r2, r6 + eors r7, r6 + subs r2, r6 + sbcs r7, r6 push {r2, r7} mov r2, r11 mov r11, r3 ldm r2, {r0, r1, r2, r3} - sub r0, r2 - sbc r1, r3 - sbc r7, r7 - eor r0, r7 - eor r1, r7 - sub r0, r7 - sbc r1, r7 - eor r7, r6 + subs r0, r2 + sbcs r1, r3 + sbcs r7, r7 + eors r0, r7 + eors r1, r7 + subs r0, r7 + sbcs r1, r7 + eors r7, r6 mov r12, r7 push {r0, r1} //MUL64 - mov r6, r5 - mov r1, r2 - sub r5, r4 - sbc r0, r0 - eor r5, r0 - sub r5, r0 - sub r1, r3 - sbc r7, r7 - eor r1, r7 - sub r1, r7 - eor r7, r0 + adds r6, r5, #0 + adds r1, r2, #0 + subs r5, r4 + sbcs r0, r0 + eors r5, r0 + subs r5, r0 + subs r1, r3 + sbcs r7, r7 + eors r1, r7 + subs r1, r7 + eors r7, r0 mov r9, r1 mov r8, r5 - lsr r1,r4,#16 + lsrs r1,r4,#16 uxth r4,r4 - mov r0,r4 + adds r0, r4, #0 uxth r5,r2 - lsr r2,#16 - mul r0,r5//00 - mul r5,r1//10 - mul r4,r2//01 - mul r1,r2//11 - lsl r2,r4,#16 - lsr r4,r4,#16 - add r0,r2 - adc r1,r4 - lsl r2,r5,#16 - lsr r4,r5,#16 - add r0,r2 - adc r1,r4 - lsr r4, r6,#16 + lsrs r2,#16 + muls r0,r5//00 + muls r5,r1//10 + muls r4,r2//01 + muls r1,r2//11 + lsls r2,r4,#16 + lsrs r4,r4,#16 + adds r0, r0, r2 + adcs r1,r4 + lsls r2,r5,#16 + lsrs r4,r5,#16 + adds r0, r0, r2 + adcs r1,r4 + lsrs r4, r6,#16 uxth r6, r6 uxth r5, r3 - lsr r3, r3, #16 - mov r2, r6 - mul r2, r5 - mul r5, r4 - mul r6, r3 - mul r3, r4 - lsl r4,r5,#16 - lsr r5,r5,#16 - add r2,r4 - adc r3,r5 - lsl r4,r6,#16 - lsr r5,r6,#16 - add r2,r4 - adc r3,r5 - eor r6, r6 - add r2, r1 - adc r3, r6 + lsrs r3, r3, #16 + adds r2, r6, #0 + muls r2, r5 + muls r5, r4 + muls r6, r3 + muls r3, r4 + lsls r4,r5,#16 + lsrs r5,r5,#16 + adds r2, r2, r4 + adcs r3,r5 + lsls r4,r6,#16 + lsrs r5,r6,#16 + adds r2, r2, r4 + adcs r3,r5 + eors r6, r6 + adds r2, r2, r1 + adcs r3, r6 mov r1, r9 mov r5, r8 mov r8, r0 - lsr r0, r1,#16 + lsrs r0, r1,#16 uxth r1,r1 - mov r4,r1 - lsr r6,r5,#16 + adds r4, r1, #0 + lsrs r6,r5,#16 uxth r5,r5 - mul r1,r5 - mul r4,r6 - mul r5,r0 - mul r0,r6 - lsl r6,r4,#16 - lsr r4,#16 - add r1,r6 - adc r0,r4 - lsl r6,r5,#16 - lsr r5,#16 - add r1,r6 - adc r0,r5 - eor r1,r7 - eor r0,r7 - eor r4, r4 - asr r7, r7, #1 - adc r1, r2 - adc r2, r0 - adc r7, r4 + muls r1,r5 + muls r4,r6 + muls r5,r0 + muls r0,r6 + lsls r6,r4,#16 + lsrs r4,#16 + adds r1, r1, r6 + adcs r0,r4 + lsls r6,r5,#16 + lsrs r5,#16 + adds r1, r1, r6 + adcs r0,r5 + eors r1,r7 + eors r0,r7 + eors r4, r4 + asrs r7, r7, #1 + adcs r1, r2 + adcs r2, r0 + adcs r7, r4 mov r0, r8 - add r1, r0 - adc r2, r3 - adc r3, r7 + adds r1, r1, r0 + adcs r2, r3 + adcs r3, r7 mov r4, r10 mov r5, r11 - eor r6, r6 - add r0, r4 - adc r1, r5 - adc r2, r6 - adc r3, r6 + eors r6, r6 + adds r0, r0, r4 + adcs r1, r5 + adcs r2, r6 + adcs r3, r6 mov r10, r2 mov r11, r3 pop {r2-r5} push {r0, r1} - mov r6, r5 - mov r1, r2 - sub r5, r4 - sbc r0, r0 - eor r5, r0 - sub r5, r0 - sub r1, r3 - sbc r7, r7 - eor r1, r7 - sub r1, r7 - eor r7, r0 + adds r6, r5, #0 + adds r1, r2, #0 + subs r5, r4 + sbcs r0, r0 + eors r5, r0 + subs r5, r0 + subs r1, r3 + sbcs r7, r7 + eors r1, r7 + subs r1, r7 + eors r7, r0 mov r9, r1 mov r8, r5 - lsr r1,r4,#16 + lsrs r1,r4,#16 uxth r4,r4 - mov r0,r4 + adds r0, r4, #0 uxth r5,r2 - lsr r2,#16 - mul r0,r5//00 - mul r5,r1//10 - mul r4,r2//01 - mul r1,r2//11 - lsl r2,r4,#16 - lsr r4,r4,#16 - add r0,r2 - adc r1,r4 - lsl r2,r5,#16 - lsr r4,r5,#16 - add r0,r2 - adc r1,r4 - lsr r4, r6,#16 + lsrs r2,#16 + muls r0,r5//00 + muls r5,r1//10 + muls r4,r2//01 + muls r1,r2//11 + lsls r2,r4,#16 + lsrs r4,r4,#16 + adds r0, r0, r2 + adcs r1,r4 + lsls r2,r5,#16 + lsrs r4,r5,#16 + adds r0, r0, r2 + adcs r1,r4 + lsrs r4, r6,#16 uxth r6, r6 uxth r5, r3 - lsr r3, r3, #16 - mov r2, r6 - mul r2, r5 - mul r5, r4 - mul r6, r3 - mul r3, r4 - lsl r4,r5,#16 - lsr r5,r5,#16 - add r2,r4 - adc r3,r5 - lsl r4,r6,#16 - lsr r5,r6,#16 - add r2,r4 - adc r3,r5 - eor r6, r6 - add r2, r1 - adc r3, r6 + lsrs r3, r3, #16 + adds r2, r6, #0 + muls r2, r5 + muls r5, r4 + muls r6, r3 + muls r3, r4 + lsls r4,r5,#16 + lsrs r5,r5,#16 + adds r2, r2, r4 + adcs r3,r5 + lsls r4,r6,#16 + lsrs r5,r6,#16 + adds r2, r2, r4 + adcs r3,r5 + eors r6, r6 + adds r2, r2, r1 + adcs r3, r6 mov r1, r9 mov r5, r8 mov r8, r0 - lsr r0, r1,#16 + lsrs r0, r1,#16 uxth r1,r1 - mov r4,r1 - lsr r6,r5,#16 + adds r4, r1, #0 + lsrs r6,r5,#16 uxth r5,r5 - mul r1,r5 - mul r4,r6 - mul r5,r0 - mul r0,r6 - lsl r6,r4,#16 - lsr r4,#16 - add r1,r6 - adc r0,r4 - lsl r6,r5,#16 - lsr r5,#16 - add r1,r6 - adc r0,r5 - eor r1,r7 - eor r0,r7 - eor r4, r4 - asr r7, r7, #1 - adc r1, r2 - adc r2, r0 - adc r7, r4 + muls r1,r5 + muls r4,r6 + muls r5,r0 + muls r0,r6 + lsls r6,r4,#16 + lsrs r4,#16 + adds r1, r1, r6 + adcs r0,r4 + lsls r6,r5,#16 + lsrs r5,#16 + adds r1, r1, r6 + adcs r0,r5 + eors r1,r7 + eors r0,r7 + eors r4, r4 + asrs r7, r7, #1 + adcs r1, r2 + adcs r2, r0 + adcs r7, r4 mov r0, r8 - add r1, r0 - adc r2, r3 - adc r3, r7 + adds r1, r1, r0 + adcs r2, r3 + adcs r3, r7 pop {r4, r5} mov r6, r12 mov r7, r12 - eor r0, r6 - eor r1, r6 - eor r2, r6 - eor r3, r6 - asr r6, r6, #1 - adc r0, r4 - adc r1, r5 - adc r4, r2 - adc r5, r3 - eor r2, r2 - adc r6,r2 - adc r7,r2 + eors r0, r6 + eors r1, r6 + eors r2, r6 + eors r3, r6 + asrs r6, r6, #1 + adcs r0, r4 + adcs r1, r5 + adcs r4, r2 + adcs r5, r3 + eors r2, r2 + adcs r6,r2 + adcs r7,r2 pop {r2, r3} mov r8, r2 mov r9, r3 - add r2, r0 - adc r3, r1 + adds r2, r2, r0 + adcs r3, r1 mov r0, r10 mov r1, r11 - adc r4, r0 - adc r5, r1 - adc r6, r0 - adc r7, r1 + adcs r4, r0 + adcs r5, r1 + adcs r6, r0 + adcs r7, r1 ////////END LOW PART///////////////////// pop {r0} stm r0!, {r2,r3} @@ -345,353 +346,353 @@ multiply256x256_asm: /////////BEGIN HIGH PART//////////////// /////////MUL128///////////// //MUL64 - mov r6, r5 - mov r1, r2 - sub r5, r4 - sbc r0, r0 - eor r5, r0 - sub r5, r0 - sub r1, r3 - sbc r7, r7 - eor r1, r7 - sub r1, r7 - eor r7, r0 + adds r6, r5, #0 + adds r1, r2, #0 + subs r5, r4 + sbcs r0, r0 + eors r5, r0 + subs r5, r0 + subs r1, r3 + sbcs r7, r7 + eors r1, r7 + subs r1, r7 + eors r7, r0 mov r9, r1 mov r8, r5 - lsr r1,r4,#16 + lsrs r1,r4,#16 uxth r4,r4 - mov r0,r4 + adds r0, r4, #0 uxth r5,r2 - lsr r2,#16 - mul r0,r5//00 - mul r5,r1//10 - mul r4,r2//01 - mul r1,r2//11 - lsl r2,r4,#16 - lsr r4,r4,#16 - add r0,r2 - adc r1,r4 - lsl r2,r5,#16 - lsr r4,r5,#16 - add r0,r2 - adc r1,r4 - lsr r4, r6,#16 + lsrs r2,#16 + muls r0,r5//00 + muls r5,r1//10 + muls r4,r2//01 + muls r1,r2//11 + lsls r2,r4,#16 + lsrs r4,r4,#16 + adds r0, r0, r2 + adcs r1,r4 + lsls r2,r5,#16 + lsrs r4,r5,#16 + adds r0, r0, r2 + adcs r1,r4 + lsrs r4, r6,#16 uxth r6, r6 uxth r5, r3 - lsr r3, r3, #16 - mov r2, r6 - mul r2, r5 - mul r5, r4 - mul r6, r3 - mul r3, r4 - lsl r4,r5,#16 - lsr r5,r5,#16 - add r2,r4 - adc r3,r5 - lsl r4,r6,#16 - lsr r5,r6,#16 - add r2,r4 - adc r3,r5 - eor r6, r6 - add r2, r1 - adc r3, r6 + lsrs r3, r3, #16 + adds r2, r6, #0 + muls r2, r5 + muls r5, r4 + muls r6, r3 + muls r3, r4 + lsls r4,r5,#16 + lsrs r5,r5,#16 + adds r2, r2, r4 + adcs r3,r5 + lsls r4,r6,#16 + lsrs r5,r6,#16 + adds r2, r2, r4 + adcs r3,r5 + eors r6, r6 + adds r2, r2, r1 + adcs r3, r6 mov r1, r9 mov r5, r8 mov r8, r0 - lsr r0, r1,#16 + lsrs r0, r1,#16 uxth r1,r1 - mov r4,r1 - lsr r6,r5,#16 + adds r4, r1, #0 + lsrs r6,r5,#16 uxth r5,r5 - mul r1,r5 - mul r4,r6 - mul r5,r0 - mul r0,r6 - lsl r6,r4,#16 - lsr r4,#16 - add r1,r6 - adc r0,r4 - lsl r6,r5,#16 - lsr r5,#16 - add r1,r6 - adc r0,r5 - eor r1,r7 - eor r0,r7 - eor r4, r4 - asr r7, r7, #1 - adc r1, r2 - adc r2, r0 - adc r7, r4 + muls r1,r5 + muls r4,r6 + muls r5,r0 + muls r0,r6 + lsls r6,r4,#16 + lsrs r4,#16 + adds r1, r1, r6 + adcs r0,r4 + lsls r6,r5,#16 + lsrs r5,#16 + adds r1, r1, r6 + adcs r0,r5 + eors r1,r7 + eors r0,r7 + eors r4, r4 + asrs r7, r7, #1 + adcs r1, r2 + adcs r2, r0 + adcs r7, r4 mov r0, r8 - add r1, r0 - adc r2, r3 - adc r3, r7 + adds r1, r1, r0 + adcs r2, r3 + adcs r3, r7 push {r0,r1} mov r1, r10 mov r10, r2 ldm r1, {r0, r1, r4, r5} - mov r2, r4 - mov r7, r5 - sub r2, r0 - sbc r7, r1 - sbc r6, r6 - eor r2, r6 - eor r7, r6 - sub r2, r6 - sbc r7, r6 + adds r2, r4, #0 + adds r7, r5, #0 + subs r2, r0 + sbcs r7, r1 + sbcs r6, r6 + eors r2, r6 + eors r7, r6 + subs r2, r6 + sbcs r7, r6 push {r2, r7} mov r2, r11 mov r11, r3 ldm r2, {r0, r1, r2, r3} - sub r0, r2 - sbc r1, r3 - sbc r7, r7 - eor r0, r7 - eor r1, r7 - sub r0, r7 - sbc r1, r7 - eor r7, r6 + subs r0, r2 + sbcs r1, r3 + sbcs r7, r7 + eors r0, r7 + eors r1, r7 + subs r0, r7 + sbcs r1, r7 + eors r7, r6 mov r12, r7 push {r0, r1} //MUL64 - mov r6, r5 - mov r1, r2 - sub r5, r4 - sbc r0, r0 - eor r5, r0 - sub r5, r0 - sub r1, r3 - sbc r7, r7 - eor r1, r7 - sub r1, r7 - eor r7, r0 + adds r6, r5, #0 + adds r1, r2, #0 + subs r5, r4 + sbcs r0, r0 + eors r5, r0 + subs r5, r0 + subs r1, r3 + sbcs r7, r7 + eors r1, r7 + subs r1, r7 + eors r7, r0 mov r9, r1 mov r8, r5 - lsr r1,r4,#16 + lsrs r1,r4,#16 uxth r4,r4 - mov r0,r4 + adds r0, r4, #0 uxth r5,r2 - lsr r2,#16 - mul r0,r5//00 - mul r5,r1//10 - mul r4,r2//01 - mul r1,r2//11 - lsl r2,r4,#16 - lsr r4,r4,#16 - add r0,r2 - adc r1,r4 - lsl r2,r5,#16 - lsr r4,r5,#16 - add r0,r2 - adc r1,r4 - lsr r4, r6,#16 + lsrs r2,#16 + muls r0,r5//00 + muls r5,r1//10 + muls r4,r2//01 + muls r1,r2//11 + lsls r2,r4,#16 + lsrs r4,r4,#16 + adds r0, r0, r2 + adcs r1,r4 + lsls r2,r5,#16 + lsrs r4,r5,#16 + adds r0, r0, r2 + adcs r1,r4 + lsrs r4, r6,#16 uxth r6, r6 uxth r5, r3 - lsr r3, r3, #16 - mov r2, r6 - mul r2, r5 - mul r5, r4 - mul r6, r3 - mul r3, r4 - lsl r4,r5,#16 - lsr r5,r5,#16 - add r2,r4 - adc r3,r5 - lsl r4,r6,#16 - lsr r5,r6,#16 - add r2,r4 - adc r3,r5 - eor r6, r6 - add r2, r1 - adc r3, r6 + lsrs r3, r3, #16 + adds r2, r6, #0 + muls r2, r5 + muls r5, r4 + muls r6, r3 + muls r3, r4 + lsls r4,r5,#16 + lsrs r5,r5,#16 + adds r2, r2, r4 + adcs r3,r5 + lsls r4,r6,#16 + lsrs r5,r6,#16 + adds r2, r2, r4 + adcs r3,r5 + eors r6, r6 + adds r2, r2, r1 + adcs r3, r6 mov r1, r9 mov r5, r8 mov r8, r0 - lsr r0, r1,#16 + lsrs r0, r1,#16 uxth r1,r1 - mov r4,r1 - lsr r6,r5,#16 + adds r4, r1, #0 + lsrs r6,r5,#16 uxth r5,r5 - mul r1,r5 - mul r4,r6 - mul r5,r0 - mul r0,r6 - lsl r6,r4,#16 - lsr r4,#16 - add r1,r6 - adc r0,r4 - lsl r6,r5,#16 - lsr r5,#16 - add r1,r6 - adc r0,r5 - eor r1,r7 - eor r0,r7 - eor r4, r4 - asr r7, r7, #1 - adc r1, r2 - adc r2, r0 - adc r7, r4 + muls r1,r5 + muls r4,r6 + muls r5,r0 + muls r0,r6 + lsls r6,r4,#16 + lsrs r4,#16 + adds r1, r1, r6 + adcs r0,r4 + lsls r6,r5,#16 + lsrs r5,#16 + adds r1, r1, r6 + adcs r0,r5 + eors r1,r7 + eors r0,r7 + eors r4, r4 + asrs r7, r7, #1 + adcs r1, r2 + adcs r2, r0 + adcs r7, r4 mov r0, r8 - add r1, r0 - adc r2, r3 - adc r3, r7 + adds r1, r1, r0 + adcs r2, r3 + adcs r3, r7 mov r4, r10 mov r5, r11 - eor r6, r6 - add r0, r4 - adc r1, r5 - adc r2, r6 - adc r3, r6 + eors r6, r6 + adds r0, r0, r4 + adcs r1, r5 + adcs r2, r6 + adcs r3, r6 mov r10, r2 mov r11, r3 pop {r2-r5} push {r0, r1} - mov r6, r5 - mov r1, r2 - sub r5, r4 - sbc r0, r0 - eor r5, r0 - sub r5, r0 - sub r1, r3 - sbc r7, r7 - eor r1, r7 - sub r1, r7 - eor r7, r0 + adds r6, r5, #0 + adds r1, r2, #0 + subs r5, r4 + sbcs r0, r0 + eors r5, r0 + subs r5, r0 + subs r1, r3 + sbcs r7, r7 + eors r1, r7 + subs r1, r7 + eors r7, r0 mov r9, r1 mov r8, r5 - lsr r1,r4,#16 + lsrs r1,r4,#16 uxth r4,r4 - mov r0,r4 + adds r0, r4, #0 uxth r5,r2 - lsr r2,#16 - mul r0,r5//00 - mul r5,r1//10 - mul r4,r2//01 - mul r1,r2//11 - lsl r2,r4,#16 - lsr r4,r4,#16 - add r0,r2 - adc r1,r4 - lsl r2,r5,#16 - lsr r4,r5,#16 - add r0,r2 - adc r1,r4 - lsr r4, r6,#16 + lsrs r2,#16 + muls r0,r5//00 + muls r5,r1//10 + muls r4,r2//01 + muls r1,r2//11 + lsls r2,r4,#16 + lsrs r4,r4,#16 + adds r0, r0, r2 + adcs r1,r4 + lsls r2,r5,#16 + lsrs r4,r5,#16 + adds r0, r0, r2 + adcs r1,r4 + lsrs r4, r6,#16 uxth r6, r6 uxth r5, r3 - lsr r3, r3, #16 - mov r2, r6 - mul r2, r5 - mul r5, r4 - mul r6, r3 - mul r3, r4 - lsl r4,r5,#16 - lsr r5,r5,#16 - add r2,r4 - adc r3,r5 - lsl r4,r6,#16 - lsr r5,r6,#16 - add r2,r4 - adc r3,r5 - eor r6, r6 - add r2, r1 - adc r3, r6 + lsrs r3, r3, #16 + adds r2, r6, #0 + muls r2, r5 + muls r5, r4 + muls r6, r3 + muls r3, r4 + lsls r4,r5,#16 + lsrs r5,r5,#16 + adds r2, r2, r4 + adcs r3,r5 + lsls r4,r6,#16 + lsrs r5,r6,#16 + adds r2, r2, r4 + adcs r3,r5 + eors r6, r6 + adds r2, r2, r1 + adcs r3, r6 mov r1, r9 mov r5, r8 mov r8, r0 - lsr r0, r1,#16 + lsrs r0, r1,#16 uxth r1,r1 - mov r4,r1 - lsr r6,r5,#16 + adds r4, r1, #0 + lsrs r6,r5,#16 uxth r5,r5 - mul r1,r5 - mul r4,r6 - mul r5,r0 - mul r0,r6 - lsl r6,r4,#16 - lsr r4,#16 - add r1,r6 - adc r0,r4 - lsl r6,r5,#16 - lsr r5,#16 - add r1,r6 - adc r0,r5 - eor r1,r7 - eor r0,r7 - eor r4, r4 - asr r7, r7, #1 - adc r1, r2 - adc r2, r0 - adc r7, r4 + muls r1,r5 + muls r4,r6 + muls r5,r0 + muls r0,r6 + lsls r6,r4,#16 + lsrs r4,#16 + adds r1, r1, r6 + adcs r0,r4 + lsls r6,r5,#16 + lsrs r5,#16 + adds r1, r1, r6 + adcs r0,r5 + eors r1,r7 + eors r0,r7 + eors r4, r4 + asrs r7, r7, #1 + adcs r1, r2 + adcs r2, r0 + adcs r7, r4 mov r0, r8 - add r1, r0 - adc r2, r3 - adc r3, r7 + adds r1, r1, r0 + adcs r2, r3 + adcs r3, r7 pop {r4, r5} mov r6, r12 mov r7, r12 - eor r0, r6 - eor r1, r6 - eor r2, r6 - eor r3, r6 - asr r6, r6, #1 - adc r0, r4 - adc r1, r5 - adc r4, r2 - adc r5, r3 - eor r2, r2 - adc r6,r2 //0,1 - adc r7,r2 + eors r0, r6 + eors r1, r6 + eors r2, r6 + eors r3, r6 + asrs r6, r6, #1 + adcs r0, r4 + adcs r1, r5 + adcs r4, r2 + adcs r5, r3 + eors r2, r2 + adcs r6,r2 //0,1 + adcs r7,r2 pop {r2, r3} mov r8, r2 mov r9, r3 - add r2, r0 - adc r3, r1 + adds r2, r2, r0 + adcs r3, r1 mov r0, r10 mov r1, r11 - adc r4, r0 - adc r5, r1 - adc r6, r0 - adc r7, r1 + adcs r4, r0 + adcs r5, r1 + adcs r6, r0 + adcs r7, r1 ////////END HIGH PART///////////////////// mov r0, r8 mov r1, r9 mov r8, r6 mov r9, r7 pop {r6, r7} - add r0, r6 - adc r1, r7 + adds r0, r0, r6 + adcs r1, r7 pop {r6, r7} - adc r2, r6 - adc r3, r7 + adcs r2, r6 + adcs r3, r7 pop {r7} stm r7!, {r0-r3} mov r10, r7 - eor r0,r0 + eors r0,r0 mov r6, r8 mov r7, r9 - adc r4, r0 - adc r5, r0 - adc r6, r0 - adc r7, r0 + adcs r4, r0 + adcs r5, r0 + adcs r6, r0 + adcs r7, r0 pop {r0,r1,r2} mov r12, r2 push {r0, r4-r7} ldm r1, {r0-r7} - sub r0, r4 - sbc r1, r5 - sbc r2, r6 - sbc r3, r7 - eor r4, r4 - sbc r4, r4 - eor r0, r4 - eor r1, r4 - eor r2, r4 - eor r3, r4 - sub r0, r4 - sbc r1, r4 - sbc r2, r4 - sbc r3, r4 + subs r0, r4 + sbcs r1, r5 + sbcs r2, r6 + sbcs r3, r7 + eors r4, r4 + sbcs r4, r4 + eors r0, r4 + eors r1, r4 + eors r2, r4 + eors r3, r4 + subs r0, r4 + sbcs r1, r4 + sbcs r2, r4 + sbcs r3, r4 mov r6, r12 mov r12, r4 //carry mov r5, r10 @@ -700,22 +701,22 @@ multiply256x256_asm: mov r8, r0 mov r9, r1 ldm r6, {r0-r7} - sub r4, r0 - sbc r5, r1 - sbc r6, r2 - sbc r7, r3 - eor r0, r0 - sbc r0, r0 - eor r4, r0 - eor r5, r0 - eor r6, r0 - eor r7, r0 - sub r4, r0 - sbc r5, r0 - sbc r6, r0 - sbc r7, r0 + subs r4, r0 + sbcs r5, r1 + sbcs r6, r2 + sbcs r7, r3 + eors r0, r0 + sbcs r0, r0 + eors r4, r0 + eors r5, r0 + eors r6, r0 + eors r7, r0 + subs r4, r0 + sbcs r5, r0 + sbcs r6, r0 + sbcs r7, r0 mov r1, r12 - eor r0, r1 + eors r0, r1 mov r1, r11 stm r1!, {r4-r7} push {r0} @@ -724,366 +725,366 @@ multiply256x256_asm: /////////BEGIN MIDDLE PART//////////////// /////////MUL128///////////// //MUL64 - mov r6, r5 - mov r1, r2 - sub r5, r4 - sbc r0, r0 - eor r5, r0 - sub r5, r0 - sub r1, r3 - sbc r7, r7 - eor r1, r7 - sub r1, r7 - eor r7, r0 + adds r6, r5, #0 + adds r1, r2, #0 + subs r5, r4 + sbcs r0, r0 + eors r5, r0 + subs r5, r0 + subs r1, r3 + sbcs r7, r7 + eors r1, r7 + subs r1, r7 + eors r7, r0 mov r9, r1 mov r8, r5 - lsr r1,r4,#16 + lsrs r1,r4,#16 uxth r4,r4 - mov r0,r4 + adds r0, r4, #0 uxth r5,r2 - lsr r2,#16 - mul r0,r5//00 - mul r5,r1//10 - mul r4,r2//01 - mul r1,r2//11 - lsl r2,r4,#16 - lsr r4,r4,#16 - add r0,r2 - adc r1,r4 - lsl r2,r5,#16 - lsr r4,r5,#16 - add r0,r2 - adc r1,r4 - lsr r4, r6,#16 + lsrs r2,#16 + muls r0,r5//00 + muls r5,r1//10 + muls r4,r2//01 + muls r1,r2//11 + lsls r2,r4,#16 + lsrs r4,r4,#16 + adds r0, r0, r2 + adcs r1,r4 + lsls r2,r5,#16 + lsrs r4,r5,#16 + adds r0, r0, r2 + adcs r1,r4 + lsrs r4, r6,#16 uxth r6, r6 uxth r5, r3 - lsr r3, r3, #16 - mov r2, r6 - mul r2, r5 - mul r5, r4 - mul r6, r3 - mul r3, r4 - lsl r4,r5,#16 - lsr r5,r5,#16 - add r2,r4 - adc r3,r5 - lsl r4,r6,#16 - lsr r5,r6,#16 - add r2,r4 - adc r3,r5 - eor r6, r6 - add r2, r1 - adc r3, r6 + lsrs r3, r3, #16 + adds r2, r6, #0 + muls r2, r5 + muls r5, r4 + muls r6, r3 + muls r3, r4 + lsls r4,r5,#16 + lsrs r5,r5,#16 + adds r2, r2, r4 + adcs r3,r5 + lsls r4,r6,#16 + lsrs r5,r6,#16 + adds r2, r2, r4 + adcs r3,r5 + eors r6, r6 + adds r2, r2, r1 + adcs r3, r6 mov r1, r9 mov r5, r8 mov r8, r0 - lsr r0, r1,#16 + lsrs r0, r1,#16 uxth r1,r1 - mov r4,r1 - lsr r6,r5,#16 + adds r4, r1, #0 + lsrs r6,r5,#16 uxth r5,r5 - mul r1,r5 - mul r4,r6 - mul r5,r0 - mul r0,r6 - lsl r6,r4,#16 - lsr r4,#16 - add r1,r6 - adc r0,r4 - lsl r6,r5,#16 - lsr r5,#16 - add r1,r6 - adc r0,r5 - eor r1,r7 - eor r0,r7 - eor r4, r4 - asr r7, r7, #1 - adc r1, r2 - adc r2, r0 - adc r7, r4 + muls r1,r5 + muls r4,r6 + muls r5,r0 + muls r0,r6 + lsls r6,r4,#16 + lsrs r4,#16 + adds r1, r1, r6 + adcs r0,r4 + lsls r6,r5,#16 + lsrs r5,#16 + adds r1, r1, r6 + adcs r0,r5 + eors r1,r7 + eors r0,r7 + eors r4, r4 + asrs r7, r7, #1 + adcs r1, r2 + adcs r2, r0 + adcs r7, r4 mov r0, r8 - add r1, r0 - adc r2, r3 - adc r3, r7 + adds r1, r1, r0 + adcs r2, r3 + adcs r3, r7 push {r0,r1} mov r1, r10 mov r10, r2 ldm r1, {r0, r1, r4, r5} - mov r2, r4 - mov r7, r5 - sub r2, r0 - sbc r7, r1 - sbc r6, r6 - eor r2, r6 - eor r7, r6 - sub r2, r6 - sbc r7, r6 + adds r2, r4, #0 + adds r7, r5, #0 + subs r2, r0 + sbcs r7, r1 + sbcs r6, r6 + eors r2, r6 + eors r7, r6 + subs r2, r6 + sbcs r7, r6 push {r2, r7} mov r2, r11 mov r11, r3 ldm r2, {r0, r1, r2, r3} - sub r0, r2 - sbc r1, r3 - sbc r7, r7 - eor r0, r7 - eor r1, r7 - sub r0, r7 - sbc r1, r7 - eor r7, r6 + subs r0, r2 + sbcs r1, r3 + sbcs r7, r7 + eors r0, r7 + eors r1, r7 + subs r0, r7 + sbcs r1, r7 + eors r7, r6 mov r12, r7 push {r0, r1} //MUL64 - mov r6, r5 - mov r1, r2 - sub r5, r4 - sbc r0, r0 - eor r5, r0 - sub r5, r0 - sub r1, r3 - sbc r7, r7 - eor r1, r7 - sub r1, r7 - eor r7, r0 + adds r6, r5, #0 + adds r1, r2, #0 + subs r5, r4 + sbcs r0, r0 + eors r5, r0 + subs r5, r0 + subs r1, r3 + sbcs r7, r7 + eors r1, r7 + subs r1, r7 + eors r7, r0 mov r9, r1 mov r8, r5 - lsr r1,r4,#16 + lsrs r1,r4,#16 uxth r4,r4 - mov r0,r4 + adds r0, r4, #0 uxth r5,r2 - lsr r2,#16 - mul r0,r5//00 - mul r5,r1//10 - mul r4,r2//01 - mul r1,r2//11 - lsl r2,r4,#16 - lsr r4,r4,#16 - add r0,r2 - adc r1,r4 - lsl r2,r5,#16 - lsr r4,r5,#16 - add r0,r2 - adc r1,r4 - lsr r4, r6,#16 + lsrs r2,#16 + muls r0,r5//00 + muls r5,r1//10 + muls r4,r2//01 + muls r1,r2//11 + lsls r2,r4,#16 + lsrs r4,r4,#16 + adds r0, r0, r2 + adcs r1,r4 + lsls r2,r5,#16 + lsrs r4,r5,#16 + adds r0, r0, r2 + adcs r1,r4 + lsrs r4, r6,#16 uxth r6, r6 uxth r5, r3 - lsr r3, r3, #16 - mov r2, r6 - mul r2, r5 - mul r5, r4 - mul r6, r3 - mul r3, r4 - lsl r4,r5,#16 - lsr r5,r5,#16 - add r2,r4 - adc r3,r5 - lsl r4,r6,#16 - lsr r5,r6,#16 - add r2,r4 - adc r3,r5 - eor r6, r6 - add r2, r1 - adc r3, r6 + lsrs r3, r3, #16 + adds r2, r6, #0 + muls r2, r5 + muls r5, r4 + muls r6, r3 + muls r3, r4 + lsls r4,r5,#16 + lsrs r5,r5,#16 + adds r2, r2, r4 + adcs r3,r5 + lsls r4,r6,#16 + lsrs r5,r6,#16 + adds r2, r2, r4 + adcs r3,r5 + eors r6, r6 + adds r2, r2, r1 + adcs r3, r6 mov r1, r9 mov r5, r8 mov r8, r0 - lsr r0, r1,#16 + lsrs r0, r1,#16 uxth r1,r1 - mov r4,r1 - lsr r6,r5,#16 + adds r4, r1, #0 + lsrs r6,r5,#16 uxth r5,r5 - mul r1,r5 - mul r4,r6 - mul r5,r0 - mul r0,r6 - lsl r6,r4,#16 - lsr r4,#16 - add r1,r6 - adc r0,r4 - lsl r6,r5,#16 - lsr r5,#16 - add r1,r6 - adc r0,r5 - eor r1,r7 - eor r0,r7 - eor r4, r4 - asr r7, r7, #1 - adc r1, r2 - adc r2, r0 - adc r7, r4 + muls r1,r5 + muls r4,r6 + muls r5,r0 + muls r0,r6 + lsls r6,r4,#16 + lsrs r4,#16 + adds r1, r1, r6 + adcs r0,r4 + lsls r6,r5,#16 + lsrs r5,#16 + adds r1, r1, r6 + adcs r0,r5 + eors r1,r7 + eors r0,r7 + eors r4, r4 + asrs r7, r7, #1 + adcs r1, r2 + adcs r2, r0 + adcs r7, r4 mov r0, r8 - add r1, r0 - adc r2, r3 - adc r3, r7 + adds r1, r1, r0 + adcs r2, r3 + adcs r3, r7 mov r4, r10 mov r5, r11 - eor r6, r6 - add r0, r4 - adc r1, r5 - adc r2, r6 - adc r3, r6 + eors r6, r6 + adds r0, r0, r4 + adcs r1, r5 + adcs r2, r6 + adcs r3, r6 mov r10, r2 mov r11, r3 pop {r2-r5} push {r0, r1} - mov r6, r5 - mov r1, r2 - sub r5, r4 - sbc r0, r0 - eor r5, r0 - sub r5, r0 - sub r1, r3 - sbc r7, r7 - eor r1, r7 - sub r1, r7 - eor r7, r0 + adds r6, r5, #0 + adds r1, r2, #0 + subs r5, r4 + sbcs r0, r0 + eors r5, r0 + subs r5, r0 + subs r1, r3 + sbcs r7, r7 + eors r1, r7 + subs r1, r7 + eors r7, r0 mov r9, r1 mov r8, r5 - lsr r1,r4,#16 + lsrs r1,r4,#16 uxth r4,r4 - mov r0,r4 + adds r0, r4, #0 uxth r5,r2 - lsr r2,#16 - mul r0,r5//00 - mul r5,r1//10 - mul r4,r2//01 - mul r1,r2//11 - lsl r2,r4,#16 - lsr r4,r4,#16 - add r0,r2 - adc r1,r4 - lsl r2,r5,#16 - lsr r4,r5,#16 - add r0,r2 - adc r1,r4 - lsr r4, r6,#16 + lsrs r2,#16 + muls r0,r5//00 + muls r5,r1//10 + muls r4,r2//01 + muls r1,r2//11 + lsls r2,r4,#16 + lsrs r4,r4,#16 + adds r0, r0, r2 + adcs r1,r4 + lsls r2,r5,#16 + lsrs r4,r5,#16 + adds r0, r0, r2 + adcs r1,r4 + lsrs r4, r6,#16 uxth r6, r6 uxth r5, r3 - lsr r3, r3, #16 - mov r2, r6 - mul r2, r5 - mul r5, r4 - mul r6, r3 - mul r3, r4 - lsl r4,r5,#16 - lsr r5,r5,#16 - add r2,r4 - adc r3,r5 - lsl r4,r6,#16 - lsr r5,r6,#16 - add r2,r4 - adc r3,r5 - eor r6, r6 - add r2, r1 - adc r3, r6 + lsrs r3, r3, #16 + adds r2, r6, #0 + muls r2, r5 + muls r5, r4 + muls r6, r3 + muls r3, r4 + lsls r4,r5,#16 + lsrs r5,r5,#16 + adds r2, r2, r4 + adcs r3,r5 + lsls r4,r6,#16 + lsrs r5,r6,#16 + adds r2, r2, r4 + adcs r3,r5 + eors r6, r6 + adds r2, r2, r1 + adcs r3, r6 mov r1, r9 mov r5, r8 mov r8, r0 - lsr r0, r1,#16 + lsrs r0, r1,#16 uxth r1,r1 - mov r4,r1 - lsr r6,r5,#16 + adds r4, r1, #0 + lsrs r6,r5,#16 uxth r5,r5 - mul r1,r5 - mul r4,r6 - mul r5,r0 - mul r0,r6 - lsl r6,r4,#16 - lsr r4,#16 - add r1,r6 - adc r0,r4 - lsl r6,r5,#16 - lsr r5,#16 - add r1,r6 - adc r0,r5 - eor r1,r7 - eor r0,r7 - eor r4, r4 - asr r7, r7, #1 - adc r1, r2 - adc r2, r0 - adc r7, r4 + muls r1,r5 + muls r4,r6 + muls r5,r0 + muls r0,r6 + lsls r6,r4,#16 + lsrs r4,#16 + adds r1, r1, r6 + adcs r0,r4 + lsls r6,r5,#16 + lsrs r5,#16 + adds r1, r1, r6 + adcs r0,r5 + eors r1,r7 + eors r0,r7 + eors r4, r4 + asrs r7, r7, #1 + adcs r1, r2 + adcs r2, r0 + adcs r7, r4 mov r0, r8 - add r1, r0 - adc r2, r3 - adc r3, r7 + adds r1, r1, r0 + adcs r2, r3 + adcs r3, r7 pop {r4, r5} mov r6, r12 mov r7, r12 - eor r0, r6 - eor r1, r6 - eor r2, r6 - eor r3, r6 - asr r6, r6, #1 - adc r0, r4 - adc r1, r5 - adc r4, r2 - adc r5, r3 - eor r2, r2 - adc r6,r2 //0,1 - adc r7,r2 + eors r0, r6 + eors r1, r6 + eors r2, r6 + eors r3, r6 + asrs r6, r6, #1 + adcs r0, r4 + adcs r1, r5 + adcs r4, r2 + adcs r5, r3 + eors r2, r2 + adcs r6,r2 //0,1 + adcs r7,r2 pop {r2, r3} mov r8, r2 mov r9, r3 - add r2, r0 - adc r3, r1 + adds r2, r2, r0 + adcs r3, r1 mov r0, r10 mov r1, r11 - adc r4, r0 - adc r5, r1 - adc r6, r0 - adc r7, r1 + adcs r4, r0 + adcs r5, r1 + adcs r6, r0 + adcs r7, r1 //////////END MIDDLE PART//////////////// pop {r0,r1} //r0,r1 mov r12, r0 //negative - eor r2, r0 - eor r3, r0 - eor r4, r0 - eor r5, r0 - eor r6, r0 - eor r7, r0 + eors r2, r0 + eors r3, r0 + eors r4, r0 + eors r5, r0 + eors r6, r0 + eors r7, r0 push {r4-r7} ldm r1!, {r4-r7} mov r11, r1 //reference mov r1, r9 - eor r1, r0 + eors r1, r0 mov r10, r4 mov r4, r8 - asr r0, #1 - eor r0, r4 + asrs r0, #1 + eors r0, r4 mov r4, r10 - adc r0, r4 - adc r1, r5 - adc r2, r6 - adc r3, r7 - eor r4, r4 - adc r4, r4 + adcs r0, r4 + adcs r1, r5 + adcs r2, r6 + adcs r3, r7 + eors r4, r4 + adcs r4, r4 mov r10, r4 //carry mov r4, r11 ldm r4, {r4-r7} - add r0, r4 - adc r1, r5 - adc r2, r6 - adc r3, r7 + adds r0, r0, r4 + adcs r1, r5 + adcs r2, r6 + adcs r3, r7 mov r9, r4 mov r4, r11 stm r4!, {r0-r3} mov r11, r4 pop {r0-r3} mov r4, r9 - adc r4, r0 - adc r5, r1 - adc r6, r2 - adc r7, r3 - mov r1, #0 - adc r1, r1 + adcs r4, r0 + adcs r5, r1 + adcs r6, r2 + adcs r7, r3 + movs r1, #0 + adcs r1, r1 mov r0, r10 mov r10, r1 //carry - asr r0, #1 + asrs r0, #1 pop {r0-r3} - adc r4, r0 - adc r5, r1 - adc r6, r2 - adc r7, r3 + adcs r4, r0 + adcs r5, r1 + adcs r6, r2 + adcs r7, r3 mov r8, r0 mov r0, r11 stm r0!, {r4-r7} @@ -1091,13 +1092,13 @@ multiply256x256_asm: mov r0, r8 mov r6, r12 mov r5, r10 - eor r4, r4 - adc r5, r6 - adc r6, r4 - add r0, r5 - adc r1, r6 - adc r2, r6 - adc r3, r6 + eors r4, r4 + adcs r5, r6 + adcs r6, r4 + adds r0, r0, r5 + adcs r1, r6 + adcs r2, r6 + adcs r3, r6 mov r7, r11 stm r7!, {r0-r3} pop {r3-r6} diff --git a/third_party/unacl-curve25519/core/cortex-m0/curve25519/reduce25519.S b/third_party/unacl-curve25519/core/cortex-m0/curve25519/reduce25519.S index 73e613330f..15e68373ea 100644 --- a/third_party/unacl-curve25519/core/cortex-m0/curve25519/reduce25519.S +++ b/third_party/unacl-curve25519/core/cortex-m0/curve25519/reduce25519.S @@ -8,6 +8,7 @@ // Generated and tested with C++ functions in the test subdirectory and on the target. // + .syntax unified .code 16 .text @@ -21,141 +22,141 @@ fe25519_reduceTo256Bits_asm: push {r4,r5,r6,r7,r14} ldr r2,[r1,#60] - lsr r3,r2,#16 + lsrs r3,r2,#16 uxth r2,r2 - mov r7,#38 - mul r2,r7 - mul r3,r7 + movs r7,#38 + muls r2,r7 + muls r3,r7 ldr r4,[r1,#28] - lsr r5,r3,#16 - lsl r3,r3,#16 - mov r6,#0 - add r4,r2 - adc r5,r6 - add r4,r3 - adc r5,r6 - lsl r2,r4,#1 - lsr r2,r2,#1 + lsrs r5,r3,#16 + lsls r3,r3,#16 + movs r6,#0 + adds r4,r4,r2 + adcs r5,r6 + adds r4,r4,r3 + adcs r5,r6 + lsls r2,r4,#1 + lsrs r2,r2,#1 str r2,[r0,#28] - lsr r4,r4,#31 - lsl r5,r5,#1 - orr r4,r5 - mov r2,#19 - mul r2,r4 + lsrs r4,r4,#31 + lsls r5,r5,#1 + orrs r4,r5 + movs r2,#19 + muls r2,r4 ldr r4,[r1,#0] - add r2,r4 - mov r3,#0 - adc r3,r6 + adds r2,r2,r4 + movs r3,#0 + adcs r3,r6 ldr r4,[r1,#32] - lsr r5,r4,#16 + lsrs r5,r4,#16 uxth r4,r4 - mul r5,r7 - mul r4,r7 - add r2,r4 - adc r3,r6 - lsl r4,r5,#16 - lsr r5,r5,#16 - add r2,r4 - adc r3,r5 + muls r5,r7 + muls r4,r7 + adds r2,r2,r4 + adcs r3,r6 + lsls r4,r5,#16 + lsrs r5,r5,#16 + adds r2,r2,r4 + adcs r3,r5 str r2,[r0,#0] ldr r4,[r1,#4] - add r3,r4 - mov r2,#0 - adc r2,r6 + adds r3,r3,r4 + movs r2,#0 + adcs r2,r6 ldr r4,[r1,#36] - lsr r5,r4,#16 + lsrs r5,r4,#16 uxth r4,r4 - mul r5,r7 - mul r4,r7 - add r3,r4 - adc r2,r6 - lsl r4,r5,#16 - lsr r5,r5,#16 - add r3,r4 - adc r2,r5 + muls r5,r7 + muls r4,r7 + adds r3,r3,r4 + adcs r2,r6 + lsls r4,r5,#16 + lsrs r5,r5,#16 + adds r3,r3,r4 + adcs r2,r5 str r3,[r0,#4] ldr r4,[r1,#8] - add r2,r4 - mov r3,#0 - adc r3,r6 + adds r2,r2,r4 + movs r3,#0 + adcs r3,r6 ldr r4,[r1,#40] - lsr r5,r4,#16 + lsrs r5,r4,#16 uxth r4,r4 - mul r5,r7 - mul r4,r7 - add r2,r4 - adc r3,r6 - lsl r4,r5,#16 - lsr r5,r5,#16 - add r2,r4 - adc r3,r5 + muls r5,r7 + muls r4,r7 + adds r2,r2,r4 + adcs r3,r6 + lsls r4,r5,#16 + lsrs r5,r5,#16 + adds r2,r2,r4 + adcs r3,r5 str r2,[r0,#8] ldr r4,[r1,#12] - add r3,r4 - mov r2,#0 - adc r2,r6 + adds r3,r3,r4 + movs r2,#0 + adcs r2,r6 ldr r4,[r1,#44] - lsr r5,r4,#16 + lsrs r5,r4,#16 uxth r4,r4 - mul r5,r7 - mul r4,r7 - add r3,r4 - adc r2,r6 - lsl r4,r5,#16 - lsr r5,r5,#16 - add r3,r4 - adc r2,r5 + muls r5,r7 + muls r4,r7 + adds r3,r3,r4 + adcs r2,r6 + lsls r4,r5,#16 + lsrs r5,r5,#16 + adds r3,r3,r4 + adcs r2,r5 str r3,[r0,#12] ldr r4,[r1,#16] - add r2,r4 - mov r3,#0 - adc r3,r6 + adds r2,r2,r4 + movs r3,#0 + adcs r3,r6 ldr r4,[r1,#48] - lsr r5,r4,#16 + lsrs r5,r4,#16 uxth r4,r4 - mul r5,r7 - mul r4,r7 - add r2,r4 - adc r3,r6 - lsl r4,r5,#16 - lsr r5,r5,#16 - add r2,r4 - adc r3,r5 + muls r5,r7 + muls r4,r7 + adds r2,r2,r4 + adcs r3,r6 + lsls r4,r5,#16 + lsrs r5,r5,#16 + adds r2,r2,r4 + adcs r3,r5 str r2,[r0,#16] ldr r4,[r1,#20] - add r3,r4 - mov r2,#0 - adc r2,r6 + adds r3,r3,r4 + movs r2,#0 + adcs r2,r6 ldr r4,[r1,#52] - lsr r5,r4,#16 + lsrs r5,r4,#16 uxth r4,r4 - mul r5,r7 - mul r4,r7 - add r3,r4 - adc r2,r6 - lsl r4,r5,#16 - lsr r5,r5,#16 - add r3,r4 - adc r2,r5 + muls r5,r7 + muls r4,r7 + adds r3,r3,r4 + adcs r2,r6 + lsls r4,r5,#16 + lsrs r5,r5,#16 + adds r3,r3,r4 + adcs r2,r5 str r3,[r0,#20] ldr r4,[r1,#24] - add r2,r4 - mov r3,#0 - adc r3,r6 + adds r2,r2,r4 + movs r3,#0 + adcs r3,r6 ldr r4,[r1,#56] - lsr r5,r4,#16 + lsrs r5,r4,#16 uxth r4,r4 - mul r5,r7 - mul r4,r7 - add r2,r4 - adc r3,r6 - lsl r4,r5,#16 - lsr r5,r5,#16 - add r2,r4 - adc r3,r5 + muls r5,r7 + muls r4,r7 + adds r2,r2,r4 + adcs r3,r6 + lsls r4,r5,#16 + lsrs r5,r5,#16 + adds r2,r2,r4 + adcs r3,r5 str r2,[r0,#24] ldr r4,[r0,#28] - add r4,r3 + adds r4,r4,r3 str r4,[r0,#28] pop {r4,r5,r6,r7,r15} diff --git a/third_party/unacl-curve25519/core/cortex-m0/curve25519/sqr.S b/third_party/unacl-curve25519/core/cortex-m0/curve25519/sqr.S index b62121adb7..340564e87b 100644 --- a/third_party/unacl-curve25519/core/cortex-m0/curve25519/sqr.S +++ b/third_party/unacl-curve25519/core/cortex-m0/curve25519/sqr.S @@ -3,6 +3,7 @@ // public domain // + .syntax unified .align 2 .global square256_asm .type square256_asm, %function @@ -15,38 +16,30 @@ square256_asm: // pResult in r0 // adheres to arm eabi calling convention. push {r1,r4,r5,r6,r7,r14} - .syntax unified mov r3,r8 mov r4,r9 mov r5,r10 mov r6,r11 mov r7,r12 - .syntax divided push {r3,r4,r5,r6,r7} - .syntax unified mov r14,r0 - .syntax divided - ldm r1!,{r4,r5,r6,r7} + ldmia r1!,{r4,r5,r6,r7} // sqr 128 Refined Karatsuba // Input in r4 ... r7 // Result in r0 ... r7 // clobbers all registers except for r14 - .syntax unified mov r0,r4 mov r1,r5 - .syntax divided - sub r0,r6 - sbc r1,r7 - sbc r2,r2 - eor r0,r2 - eor r1,r2 - sub r0,r2 - sbc r1,r2 - .syntax unified + subs r0,r6 + sbcs r1,r7 + sbcs r2,r2 + eors r0,r2 + eors r1,r2 + subs r0,r2 + sbcs r1,r2 mov r8,r0 mov r9,r1 mov r10,r6 - .syntax divided // START: sqr 64 Refined Karatsuba // Input operands in r4,r5 // Result in r0,r1,r2,r3 @@ -56,85 +49,73 @@ square256_asm: // Result in r0 ,r1 // Clobbers: r2, r3 uxth r0,r4 - lsr r1,r4,#16 - .syntax unified + lsrs r1,r4,#16 mov r2,r0 - .syntax divided - mul r2,r1 - mul r0,r0 - mul r1,r1 - lsr r3,r2,#15 - lsl r2,r2,#17 - add r0,r2 - adc r1,r3 + muls r2,r1 + muls r0,r0 + muls r1,r1 + lsrs r3,r2,#15 + lsls r2,r2,#17 + adds r0,r0,r2 + adcs r1,r3 // End: sqr 32 // Result in r0 ,r1 - sub r4,r5 - sbc r6,r6 - eor r4,r6 - sub r4,r6 + subs r4,r5 + sbcs r6,r6 + eors r4,r6 + subs r4,r6 // START: sqr 32 // Input operand in r5 // Result in r2 ,r3 // Clobbers: r5, r6 uxth r2,r5 - lsr r3,r5,#16 - .syntax unified + lsrs r3,r5,#16 mov r5,r2 - .syntax divided - mul r5,r3 - mul r2,r2 - mul r3,r3 - lsr r6,r5,#15 - lsl r5,r5,#17 - add r2,r5 - adc r3,r6 + muls r5,r3 + muls r2,r2 + muls r3,r3 + lsrs r6,r5,#15 + lsls r5,r5,#17 + adds r2,r2,r5 + adcs r3,r6 // End: sqr 32 // Result in r2 ,r3 - mov r6,#0 - add r2,r1 - adc r3,r6 + movs r6,#0 + adds r2,r2,r1 + adcs r3,r6 // START: sqr 32 // Input operand in r4 // Result in r4 ,r5 // Clobbers: r1, r6 - lsr r5,r4,#16 + lsrs r5,r4,#16 uxth r4,r4 - .syntax unified mov r1,r4 - .syntax divided - mul r1,r5 - mul r4,r4 - mul r5,r5 - lsr r6,r1,#15 - lsl r1,r1,#17 - add r4,r1 - adc r5,r6 + muls r1,r5 + muls r4,r4 + muls r5,r5 + lsrs r6,r1,#15 + lsls r1,r1,#17 + adds r4,r4,r1 + adcs r5,r6 // End: sqr 32 // Result in r4 ,r5 - .syntax unified mov r1,r2 - .syntax divided - sub r1,r4 - sbc r2,r5 - .syntax unified + subs r1,r4 + sbcs r2,r5 mov r5,r3 - .syntax divided - mov r6,#0 - sbc r3,r6 - add r1,r0 - adc r2,r5 - adc r3,r6 + movs r6,#0 + sbcs r3,r6 + adds r1,r1,r0 + adcs r2,r5 + adcs r3,r6 // END: sqr 64 Refined Karatsuba // Result in r0,r1,r2,r3 // Leaves r6 zero. - .syntax unified mov r6,r10 mov r10,r0 mov r11,r1 mov r12,r2 mov r1,r3 - .syntax divided // START: sqr 64 Refined Karatsuba // Input operands in r6,r7 // Result in r2,r3,r4,r5 @@ -144,92 +125,78 @@ square256_asm: // Result in r2 ,r3 // Clobbers: r4, r5 uxth r2,r6 - lsr r3,r6,#16 - .syntax unified + lsrs r3,r6,#16 mov r4,r2 - .syntax divided - mul r4,r3 - mul r2,r2 - mul r3,r3 - lsr r5,r4,#15 - lsl r4,r4,#17 - add r2,r4 - adc r3,r5 + muls r4,r3 + muls r2,r2 + muls r3,r3 + lsrs r5,r4,#15 + lsls r4,r4,#17 + adds r2,r2,r4 + adcs r3,r5 // End: sqr 32 // Result in r2 ,r3 - sub r6,r7 - sbc r4,r4 - eor r6,r4 - sub r6,r4 + subs r6,r7 + sbcs r4,r4 + eors r6,r4 + subs r6,r4 // START: sqr 32 // Input operand in r7 // Result in r4 ,r5 // Clobbers: r0, r7 uxth r4,r7 - lsr r5,r7,#16 - .syntax unified + lsrs r5,r7,#16 mov r0,r4 - .syntax divided - mul r0,r5 - mul r4,r4 - mul r5,r5 - lsr r7,r0,#15 - lsl r0,r0,#17 - add r4,r0 - adc r5,r7 + muls r0,r5 + muls r4,r4 + muls r5,r5 + lsrs r7,r0,#15 + lsls r0,r0,#17 + adds r4,r4,r0 + adcs r5,r7 // End: sqr 32 // Result in r4 ,r5 - mov r7,#0 - add r4,r3 - adc r5,r7 + movs r7,#0 + adds r4,r4,r3 + adcs r5,r7 // START: sqr 32 // Input operand in r6 // Result in r7 ,r0 // Clobbers: r6, r3 uxth r7,r6 - lsr r0,r6,#16 - .syntax unified + lsrs r0,r6,#16 mov r6,r7 - .syntax divided - mul r6,r0 - mul r7,r7 - mul r0,r0 - lsr r3,r6,#15 - lsl r6,r6,#17 - add r7,r6 - adc r0,r3 + muls r6,r0 + muls r7,r7 + muls r0,r0 + lsrs r3,r6,#15 + lsls r6,r6,#17 + adds r7,r7,r6 + adcs r0,r3 // End: sqr 32 // Result in r7 ,r0 - .syntax unified mov r3,r4 - .syntax divided - sub r3,r7 - sbc r4,r0 - .syntax unified + subs r3,r7 + sbcs r4,r0 mov r0,r5 - .syntax divided - mov r6,#0 - sbc r5,r6 - add r3,r2 - adc r4,r0 - adc r5,r6 + movs r6,#0 + sbcs r5,r6 + adds r3,r3,r2 + adcs r4,r0 + adcs r5,r6 // END: sqr 64 Refined Karatsuba // Result in r2,r3,r4,r5 // Leaves r6 zero. - .syntax unified mov r0,r12 - .syntax divided - add r2,r0 - adc r3,r1 - adc r4,r6 - adc r5,r6 - .syntax unified + adds r2,r2,r0 + adcs r3,r1 + adcs r4,r6 + adcs r5,r6 mov r12,r2 mov r2,r8 mov r8,r3 mov r3,r9 mov r9,r4 - .syntax divided // START: sqr 64 Refined Karatsuba // Input operands in r2,r3 // Result in r6,r7,r0,r1 @@ -239,146 +206,116 @@ square256_asm: // Result in r6 ,r7 // Clobbers: r0, r1 uxth r6,r2 - lsr r7,r2,#16 - .syntax unified + lsrs r7,r2,#16 mov r0,r6 - .syntax divided - mul r0,r7 - mul r6,r6 - mul r7,r7 - lsr r1,r0,#15 - lsl r0,r0,#17 - add r6,r0 - adc r7,r1 + muls r0,r7 + muls r6,r6 + muls r7,r7 + lsrs r1,r0,#15 + lsls r0,r0,#17 + adds r6,r6,r0 + adcs r7,r1 // End: sqr 32 // Result in r6 ,r7 - sub r2,r3 - sbc r4,r4 - eor r2,r4 - sub r2,r4 + subs r2,r3 + sbcs r4,r4 + eors r2,r4 + subs r2,r4 // START: sqr 32 // Input operand in r3 // Result in r0 ,r1 // Clobbers: r3, r4 uxth r0,r3 - lsr r1,r3,#16 - .syntax unified + lsrs r1,r3,#16 mov r3,r0 - .syntax divided - mul r3,r1 - mul r0,r0 - mul r1,r1 - lsr r4,r3,#15 - lsl r3,r3,#17 - add r0,r3 - adc r1,r4 + muls r3,r1 + muls r0,r0 + muls r1,r1 + lsrs r4,r3,#15 + lsls r3,r3,#17 + adds r0,r0,r3 + adcs r1,r4 // End: sqr 32 // Result in r0 ,r1 - mov r4,#0 - add r0,r7 - adc r1,r4 + movs r4,#0 + adds r0,r0,r7 + adcs r1,r4 // START: sqr 32 // Input operand in r2 // Result in r3 ,r4 // Clobbers: r2, r7 uxth r3,r2 - lsr r4,r2,#16 - .syntax unified + lsrs r4,r2,#16 mov r2,r3 - .syntax divided - mul r2,r4 - mul r3,r3 - mul r4,r4 - lsr r7,r2,#15 - lsl r2,r2,#17 - add r3,r2 - adc r4,r7 + muls r2,r4 + muls r3,r3 + muls r4,r4 + lsrs r7,r2,#15 + lsls r2,r2,#17 + adds r3,r3,r2 + adcs r4,r7 // End: sqr 32 // Result in r3 ,r4 - .syntax unified mov r7,r0 - .syntax divided - sub r7,r3 - sbc r0,r4 - .syntax unified + subs r7,r3 + sbcs r0,r4 mov r2,r1 - .syntax divided - mov r4,#0 - sbc r1,r4 - add r7,r6 - adc r0,r2 - adc r1,r4 + movs r4,#0 + sbcs r1,r4 + adds r7,r7,r6 + adcs r0,r2 + adcs r1,r4 // END: sqr 64 Refined Karatsuba // Result in r6,r7,r0,r1 // Returns r4 as zero. - .syntax unified mov r2,r12 mov r3,r8 mov r4,r9 - .syntax divided - sub r2,r6 - sbc r3,r7 - .syntax unified + subs r2,r6 + sbcs r3,r7 mov r6,r4 mov r7,r5 - .syntax divided - sbc r4,r0 - sbc r5,r1 - mov r0,#0 - sbc r6,r0 - sbc r7,r0 - .syntax unified + sbcs r4,r0 + sbcs r5,r1 + movs r0,#0 + sbcs r6,r0 + sbcs r7,r0 mov r0,r10 - .syntax divided - add r2,r0 - .syntax unified + adds r2,r2,r0 mov r1,r11 - .syntax divided - adc r3,r1 - .syntax unified + adcs r3,r1 mov r0,r12 - .syntax divided - adc r4,r0 - .syntax unified + adcs r4,r0 mov r0,r8 - .syntax divided - adc r5,r0 - mov r0,#0 - adc r6,r0 - adc r7,r0 - .syntax unified + adcs r5,r0 + movs r0,#0 + adcs r6,r0 + adcs r7,r0 mov r0,r10 - .syntax divided // END: sqr 128 Refined Karatsuba // Result in r0 ... r7 push {r4,r5,r6,r7} - .syntax unified mov r4,r14 - .syntax divided stm r4!,{r0,r1,r2,r3} ldr r4,[SP,#36] - add r4,#16 + adds r4,#16 ldm r4,{r4,r5,r6,r7} // sqr 128 Refined Karatsuba // Input in r4 ... r7 // Result in r0 ... r7 // clobbers all registers except for r14 - .syntax unified mov r0,r4 mov r1,r5 - .syntax divided - sub r0,r6 - sbc r1,r7 - sbc r2,r2 - eor r0,r2 - eor r1,r2 - sub r0,r2 - sbc r1,r2 - .syntax unified + subs r0,r6 + sbcs r1,r7 + sbcs r2,r2 + eors r0,r2 + eors r1,r2 + subs r0,r2 + sbcs r1,r2 mov r8,r0 mov r9,r1 mov r10,r6 - .syntax divided // START: sqr 64 Refined Karatsuba // Input operands in r4,r5 // Result in r0,r1,r2,r3 @@ -388,85 +325,73 @@ square256_asm: // Result in r0 ,r1 // Clobbers: r2, r3 uxth r0,r4 - lsr r1,r4,#16 - .syntax unified + lsrs r1,r4,#16 mov r2,r0 - .syntax divided - mul r2,r1 - mul r0,r0 - mul r1,r1 - lsr r3,r2,#15 - lsl r2,r2,#17 - add r0,r2 - adc r1,r3 + muls r2,r1 + muls r0,r0 + muls r1,r1 + lsrs r3,r2,#15 + lsls r2,r2,#17 + adds r0,r0,r2 + adcs r1,r3 // End: sqr 32 // Result in r0 ,r1 - sub r4,r5 - sbc r6,r6 - eor r4,r6 - sub r4,r6 + subs r4,r5 + sbcs r6,r6 + eors r4,r6 + subs r4,r6 // START: sqr 32 // Input operand in r5 // Result in r2 ,r3 // Clobbers: r5, r6 uxth r2,r5 - lsr r3,r5,#16 - .syntax unified + lsrs r3,r5,#16 mov r5,r2 - .syntax divided - mul r5,r3 - mul r2,r2 - mul r3,r3 - lsr r6,r5,#15 - lsl r5,r5,#17 - add r2,r5 - adc r3,r6 + muls r5,r3 + muls r2,r2 + muls r3,r3 + lsrs r6,r5,#15 + lsls r5,r5,#17 + adds r2,r2,r5 + adcs r3,r6 // End: sqr 32 // Result in r2 ,r3 - mov r6,#0 - add r2,r1 - adc r3,r6 + movs r6,#0 + adds r2,r2,r1 + adcs r3,r6 // START: sqr 32 // Input operand in r4 // Result in r4 ,r5 // Clobbers: r1, r6 - lsr r5,r4,#16 + lsrs r5,r4,#16 uxth r4,r4 - .syntax unified mov r1,r4 - .syntax divided - mul r1,r5 - mul r4,r4 - mul r5,r5 - lsr r6,r1,#15 - lsl r1,r1,#17 - add r4,r1 - adc r5,r6 + muls r1,r5 + muls r4,r4 + muls r5,r5 + lsrs r6,r1,#15 + lsls r1,r1,#17 + adds r4,r4,r1 + adcs r5,r6 // End: sqr 32 // Result in r4 ,r5 - .syntax unified mov r1,r2 - .syntax divided - sub r1,r4 - sbc r2,r5 - .syntax unified + subs r1,r4 + sbcs r2,r5 mov r5,r3 - .syntax divided - mov r6,#0 - sbc r3,r6 - add r1,r0 - adc r2,r5 - adc r3,r6 + movs r6,#0 + sbcs r3,r6 + adds r1,r1,r0 + adcs r2,r5 + adcs r3,r6 // END: sqr 64 Refined Karatsuba // Result in r0,r1,r2,r3 // Leaves r6 zero. - .syntax unified mov r6,r10 mov r10,r0 mov r11,r1 mov r12,r2 mov r1,r3 - .syntax divided // START: sqr 64 Refined Karatsuba // Input operands in r6,r7 // Result in r2,r3,r4,r5 @@ -476,92 +401,78 @@ square256_asm: // Result in r2 ,r3 // Clobbers: r4, r5 uxth r2,r6 - lsr r3,r6,#16 - .syntax unified + lsrs r3,r6,#16 mov r4,r2 - .syntax divided - mul r4,r3 - mul r2,r2 - mul r3,r3 - lsr r5,r4,#15 - lsl r4,r4,#17 - add r2,r4 - adc r3,r5 + muls r4,r3 + muls r2,r2 + muls r3,r3 + lsrs r5,r4,#15 + lsls r4,r4,#17 + adds r2,r2,r4 + adcs r3,r5 // End: sqr 32 // Result in r2 ,r3 - sub r6,r7 - sbc r4,r4 - eor r6,r4 - sub r6,r4 + subs r6,r7 + sbcs r4,r4 + eors r6,r4 + subs r6,r4 // START: sqr 32 // Input operand in r7 // Result in r4 ,r5 // Clobbers: r0, r7 uxth r4,r7 - lsr r5,r7,#16 - .syntax unified + lsrs r5,r7,#16 mov r0,r4 - .syntax divided - mul r0,r5 - mul r4,r4 - mul r5,r5 - lsr r7,r0,#15 - lsl r0,r0,#17 - add r4,r0 - adc r5,r7 + muls r0,r5 + muls r4,r4 + muls r5,r5 + lsrs r7,r0,#15 + lsls r0,r0,#17 + adds r4,r4,r0 + adcs r5,r7 // End: sqr 32 // Result in r4 ,r5 - mov r7,#0 - add r4,r3 - adc r5,r7 + movs r7,#0 + adds r4,r4,r3 + adcs r5,r7 // START: sqr 32 // Input operand in r6 // Result in r7 ,r0 // Clobbers: r6, r3 uxth r7,r6 - lsr r0,r6,#16 - .syntax unified + lsrs r0,r6,#16 mov r6,r7 - .syntax divided - mul r6,r0 - mul r7,r7 - mul r0,r0 - lsr r3,r6,#15 - lsl r6,r6,#17 - add r7,r6 - adc r0,r3 + muls r6,r0 + muls r7,r7 + muls r0,r0 + lsrs r3,r6,#15 + lsls r6,r6,#17 + adds r7,r7,r6 + adcs r0,r3 // End: sqr 32 // Result in r7 ,r0 - .syntax unified mov r3,r4 - .syntax divided - sub r3,r7 - sbc r4,r0 - .syntax unified + subs r3,r7 + sbcs r4,r0 mov r0,r5 - .syntax divided - mov r6,#0 - sbc r5,r6 - add r3,r2 - adc r4,r0 - adc r5,r6 + movs r6,#0 + sbcs r5,r6 + adds r3,r3,r2 + adcs r4,r0 + adcs r5,r6 // END: sqr 64 Refined Karatsuba // Result in r2,r3,r4,r5 // Leaves r6 zero. - .syntax unified mov r0,r12 - .syntax divided - add r2,r0 - adc r3,r1 - adc r4,r6 - adc r5,r6 - .syntax unified + adds r2,r2,r0 + adcs r3,r1 + adcs r4,r6 + adcs r5,r6 mov r12,r2 mov r2,r8 mov r8,r3 mov r3,r9 mov r9,r4 - .syntax divided // START: sqr 64 Refined Karatsuba // Input operands in r2,r3 // Result in r6,r7,r0,r1 @@ -571,180 +482,146 @@ square256_asm: // Result in r6 ,r7 // Clobbers: r0, r1 uxth r6,r2 - lsr r7,r2,#16 - .syntax unified + lsrs r7,r2,#16 mov r0,r6 - .syntax divided - mul r0,r7 - mul r6,r6 - mul r7,r7 - lsr r1,r0,#15 - lsl r0,r0,#17 - add r6,r0 - adc r7,r1 + muls r0,r7 + muls r6,r6 + muls r7,r7 + lsrs r1,r0,#15 + lsls r0,r0,#17 + adds r6,r6,r0 + adcs r7,r1 // End: sqr 32 // Result in r6 ,r7 - sub r2,r3 - sbc r4,r4 - eor r2,r4 - sub r2,r4 + subs r2,r3 + sbcs r4,r4 + eors r2,r4 + subs r2,r4 // START: sqr 32 // Input operand in r3 // Result in r0 ,r1 // Clobbers: r3, r4 uxth r0,r3 - lsr r1,r3,#16 - .syntax unified + lsrs r1,r3,#16 mov r3,r0 - .syntax divided - mul r3,r1 - mul r0,r0 - mul r1,r1 - lsr r4,r3,#15 - lsl r3,r3,#17 - add r0,r3 - adc r1,r4 + muls r3,r1 + muls r0,r0 + muls r1,r1 + lsrs r4,r3,#15 + lsls r3,r3,#17 + adds r0,r0,r3 + adcs r1,r4 // End: sqr 32 // Result in r0 ,r1 - mov r4,#0 - add r0,r7 - adc r1,r4 + movs r4,#0 + adds r0,r0,r7 + adcs r1,r4 // START: sqr 32 // Input operand in r2 // Result in r3 ,r4 // Clobbers: r2, r7 uxth r3,r2 - lsr r4,r2,#16 - .syntax unified + lsrs r4,r2,#16 mov r2,r3 - .syntax divided - mul r2,r4 - mul r3,r3 - mul r4,r4 - lsr r7,r2,#15 - lsl r2,r2,#17 - add r3,r2 - adc r4,r7 + muls r2,r4 + muls r3,r3 + muls r4,r4 + lsrs r7,r2,#15 + lsls r2,r2,#17 + adds r3,r3,r2 + adcs r4,r7 // End: sqr 32 // Result in r3 ,r4 - .syntax unified mov r7,r0 - .syntax divided - sub r7,r3 - sbc r0,r4 - .syntax unified + subs r7,r3 + sbcs r0,r4 mov r2,r1 - .syntax divided - mov r4,#0 - sbc r1,r4 - add r7,r6 - adc r0,r2 - adc r1,r4 + movs r4,#0 + sbcs r1,r4 + adds r7,r7,r6 + adcs r0,r2 + adcs r1,r4 // END: sqr 64 Refined Karatsuba // Result in r6,r7,r0,r1 // Returns r4 as zero. - .syntax unified mov r2,r12 mov r3,r8 mov r4,r9 - .syntax divided - sub r2,r6 - sbc r3,r7 - .syntax unified + subs r2,r6 + sbcs r3,r7 mov r6,r4 mov r7,r5 - .syntax divided - sbc r4,r0 - sbc r5,r1 - mov r0,#0 - sbc r6,r0 - sbc r7,r0 - .syntax unified + sbcs r4,r0 + sbcs r5,r1 + movs r0,#0 + sbcs r6,r0 + sbcs r7,r0 mov r0,r10 - .syntax divided - add r2,r0 - .syntax unified + adds r2,r2,r0 mov r1,r11 - .syntax divided - adc r3,r1 - .syntax unified + adcs r3,r1 mov r0,r12 - .syntax divided - adc r4,r0 - .syntax unified + adcs r4,r0 mov r0,r8 - .syntax divided - adc r5,r0 - mov r0,#0 - adc r6,r0 - adc r7,r0 - .syntax unified + adcs r5,r0 + movs r0,#0 + adcs r6,r0 + adcs r7,r0 mov r0,r10 - .syntax divided // END: sqr 128 Refined Karatsuba // Result in r0 ... r7 - .syntax unified mov r8,r4 mov r9,r5 mov r10,r6 mov r11,r7 - .syntax divided pop {r4,r5,r6,r7} - add r0,r4 - adc r1,r5 - adc r2,r6 - adc r3,r7 - .syntax unified + adds r0,r0,r4 + adcs r1,r5 + adcs r2,r6 + adcs r3,r7 mov r4,r8 mov r5,r9 mov r6,r10 mov r7,r11 mov r8,r0 - .syntax divided - mov r0,#0 - adc r4,r0 - adc r5,r0 - adc r6,r0 - adc r7,r0 - .syntax unified + movs r0,#0 + adcs r4,r0 + adcs r5,r0 + adcs r6,r0 + adcs r7,r0 mov r0,r8 - .syntax divided push {r0,r1,r2,r3,r4,r5,r6,r7} ldr r4,[SP,#52] ldm r4,{r0,r1,r2,r3,r4,r5,r6,r7} - sub r4,r0 - sbc r5,r1 - sbc r6,r2 - sbc r7,r3 - sbc r0,r0 - eor r4,r0 - eor r5,r0 - eor r6,r0 - eor r7,r0 - sub r4,r0 - sbc r5,r0 - sbc r6,r0 - sbc r7,r0 + subs r4,r0 + sbcs r5,r1 + sbcs r6,r2 + sbcs r7,r3 + sbcs r0,r0 + eors r4,r0 + eors r5,r0 + eors r6,r0 + eors r7,r0 + subs r4,r0 + sbcs r5,r0 + sbcs r6,r0 + sbcs r7,r0 // sqr 128 Refined Karatsuba // Input in r4 ... r7 // Result in r0 ... r7 // clobbers all registers except for r14 - .syntax unified mov r0,r4 mov r1,r5 - .syntax divided - sub r0,r6 - sbc r1,r7 - sbc r2,r2 - eor r0,r2 - eor r1,r2 - sub r0,r2 - sbc r1,r2 - .syntax unified + subs r0,r6 + sbcs r1,r7 + sbcs r2,r2 + eors r0,r2 + eors r1,r2 + subs r0,r2 + sbcs r1,r2 mov r8,r0 mov r9,r1 mov r10,r6 - .syntax divided // START: sqr 64 Refined Karatsuba // Input operands in r4,r5 // Result in r0,r1,r2,r3 @@ -754,85 +631,73 @@ square256_asm: // Result in r0 ,r1 // Clobbers: r2, r3 uxth r0,r4 - lsr r1,r4,#16 - .syntax unified + lsrs r1,r4,#16 mov r2,r0 - .syntax divided - mul r2,r1 - mul r0,r0 - mul r1,r1 - lsr r3,r2,#15 - lsl r2,r2,#17 - add r0,r2 - adc r1,r3 + muls r2,r1 + muls r0,r0 + muls r1,r1 + lsrs r3,r2,#15 + lsls r2,r2,#17 + adds r0,r0,r2 + adcs r1,r3 // End: sqr 32 // Result in r0 ,r1 - sub r4,r5 - sbc r6,r6 - eor r4,r6 - sub r4,r6 + subs r4,r5 + sbcs r6,r6 + eors r4,r6 + subs r4,r6 // START: sqr 32 // Input operand in r5 // Result in r2 ,r3 // Clobbers: r5, r6 uxth r2,r5 - lsr r3,r5,#16 - .syntax unified + lsrs r3,r5,#16 mov r5,r2 - .syntax divided - mul r5,r3 - mul r2,r2 - mul r3,r3 - lsr r6,r5,#15 - lsl r5,r5,#17 - add r2,r5 - adc r3,r6 + muls r5,r3 + muls r2,r2 + muls r3,r3 + lsrs r6,r5,#15 + lsls r5,r5,#17 + adds r2,r2,r5 + adcs r3,r6 // End: sqr 32 // Result in r2 ,r3 - mov r6,#0 - add r2,r1 - adc r3,r6 + movs r6,#0 + adds r2,r2,r1 + adcs r3,r6 // START: sqr 32 // Input operand in r4 // Result in r4 ,r5 // Clobbers: r1, r6 - lsr r5,r4,#16 + lsrs r5,r4,#16 uxth r4,r4 - .syntax unified mov r1,r4 - .syntax divided - mul r1,r5 - mul r4,r4 - mul r5,r5 - lsr r6,r1,#15 - lsl r1,r1,#17 - add r4,r1 - adc r5,r6 + muls r1,r5 + muls r4,r4 + muls r5,r5 + lsrs r6,r1,#15 + lsls r1,r1,#17 + adds r4,r4,r1 + adcs r5,r6 // End: sqr 32 // Result in r4 ,r5 - .syntax unified mov r1,r2 - .syntax divided - sub r1,r4 - sbc r2,r5 - .syntax unified + subs r1,r4 + sbcs r2,r5 mov r5,r3 - .syntax divided - mov r6,#0 - sbc r3,r6 - add r1,r0 - adc r2,r5 - adc r3,r6 + movs r6,#0 + sbcs r3,r6 + adds r1,r1,r0 + adcs r2,r5 + adcs r3,r6 // END: sqr 64 Refined Karatsuba // Result in r0,r1,r2,r3 // Leaves r6 zero. - .syntax unified mov r6,r10 mov r10,r0 mov r11,r1 mov r12,r2 mov r1,r3 - .syntax divided // START: sqr 64 Refined Karatsuba // Input operands in r6,r7 // Result in r2,r3,r4,r5 @@ -842,92 +707,78 @@ square256_asm: // Result in r2 ,r3 // Clobbers: r4, r5 uxth r2,r6 - lsr r3,r6,#16 - .syntax unified + lsrs r3,r6,#16 mov r4,r2 - .syntax divided - mul r4,r3 - mul r2,r2 - mul r3,r3 - lsr r5,r4,#15 - lsl r4,r4,#17 - add r2,r4 - adc r3,r5 + muls r4,r3 + muls r2,r2 + muls r3,r3 + lsrs r5,r4,#15 + lsls r4,r4,#17 + adds r2,r2,r4 + adcs r3,r5 // End: sqr 32 // Result in r2 ,r3 - sub r6,r7 - sbc r4,r4 - eor r6,r4 - sub r6,r4 + subs r6,r7 + sbcs r4,r4 + eors r6,r4 + subs r6,r4 // START: sqr 32 // Input operand in r7 // Result in r4 ,r5 // Clobbers: r0, r7 uxth r4,r7 - lsr r5,r7,#16 - .syntax unified + lsrs r5,r7,#16 mov r0,r4 - .syntax divided - mul r0,r5 - mul r4,r4 - mul r5,r5 - lsr r7,r0,#15 - lsl r0,r0,#17 - add r4,r0 - adc r5,r7 + muls r0,r5 + muls r4,r4 + muls r5,r5 + lsrs r7,r0,#15 + lsls r0,r0,#17 + adds r4,r4,r0 + adcs r5,r7 // End: sqr 32 // Result in r4 ,r5 - mov r7,#0 - add r4,r3 - adc r5,r7 + movs r7,#0 + adds r4,r4,r3 + adcs r5,r7 // START: sqr 32 // Input operand in r6 // Result in r7 ,r0 // Clobbers: r6, r3 uxth r7,r6 - lsr r0,r6,#16 - .syntax unified + lsrs r0,r6,#16 mov r6,r7 - .syntax divided - mul r6,r0 - mul r7,r7 - mul r0,r0 - lsr r3,r6,#15 - lsl r6,r6,#17 - add r7,r6 - adc r0,r3 + muls r6,r0 + muls r7,r7 + muls r0,r0 + lsrs r3,r6,#15 + lsls r6,r6,#17 + adds r7,r7,r6 + adcs r0,r3 // End: sqr 32 // Result in r7 ,r0 - .syntax unified mov r3,r4 - .syntax divided - sub r3,r7 - sbc r4,r0 - .syntax unified + subs r3,r7 + sbcs r4,r0 mov r0,r5 - .syntax divided - mov r6,#0 - sbc r5,r6 - add r3,r2 - adc r4,r0 - adc r5,r6 + movs r6,#0 + sbcs r5,r6 + adds r3,r3,r2 + adcs r4,r0 + adcs r5,r6 // END: sqr 64 Refined Karatsuba // Result in r2,r3,r4,r5 // Leaves r6 zero. - .syntax unified mov r0,r12 - .syntax divided - add r2,r0 - adc r3,r1 - adc r4,r6 - adc r5,r6 - .syntax unified + adds r2,r2,r0 + adcs r3,r1 + adcs r4,r6 + adcs r5,r6 mov r12,r2 mov r2,r8 mov r8,r3 mov r3,r9 mov r9,r4 - .syntax divided // START: sqr 64 Refined Karatsuba // Input operands in r2,r3 // Result in r6,r7,r0,r1 @@ -937,228 +788,172 @@ square256_asm: // Result in r6 ,r7 // Clobbers: r0, r1 uxth r6,r2 - lsr r7,r2,#16 - .syntax unified + lsrs r7,r2,#16 mov r0,r6 - .syntax divided - mul r0,r7 - mul r6,r6 - mul r7,r7 - lsr r1,r0,#15 - lsl r0,r0,#17 - add r6,r0 - adc r7,r1 + muls r0,r7 + muls r6,r6 + muls r7,r7 + lsrs r1,r0,#15 + lsls r0,r0,#17 + adds r6,r6,r0 + adcs r7,r1 // End: sqr 32 // Result in r6 ,r7 - sub r2,r3 - sbc r4,r4 - eor r2,r4 - sub r2,r4 + subs r2,r3 + sbcs r4,r4 + eors r2,r4 + subs r2,r4 // START: sqr 32 // Input operand in r3 // Result in r0 ,r1 // Clobbers: r3, r4 uxth r0,r3 - lsr r1,r3,#16 - .syntax unified + lsrs r1,r3,#16 mov r3,r0 - .syntax divided - mul r3,r1 - mul r0,r0 - mul r1,r1 - lsr r4,r3,#15 - lsl r3,r3,#17 - add r0,r3 - adc r1,r4 + muls r3,r1 + muls r0,r0 + muls r1,r1 + lsrs r4,r3,#15 + lsls r3,r3,#17 + adds r0,r0,r3 + adcs r1,r4 // End: sqr 32 // Result in r0 ,r1 - mov r4,#0 - add r0,r7 - adc r1,r4 + movs r4,#0 + adds r0,r0,r7 + adcs r1,r4 // START: sqr 32 // Input operand in r2 // Result in r3 ,r4 // Clobbers: r2, r7 uxth r3,r2 - lsr r4,r2,#16 - .syntax unified + lsrs r4,r2,#16 mov r2,r3 - .syntax divided - mul r2,r4 - mul r3,r3 - mul r4,r4 - lsr r7,r2,#15 - lsl r2,r2,#17 - add r3,r2 - adc r4,r7 + muls r2,r4 + muls r3,r3 + muls r4,r4 + lsrs r7,r2,#15 + lsls r2,r2,#17 + adds r3,r3,r2 + adcs r4,r7 // End: sqr 32 // Result in r3 ,r4 - .syntax unified mov r7,r0 - .syntax divided - sub r7,r3 - sbc r0,r4 - .syntax unified + subs r7,r3 + sbcs r0,r4 mov r2,r1 - .syntax divided - mov r4,#0 - sbc r1,r4 - add r7,r6 - adc r0,r2 - adc r1,r4 + movs r4,#0 + sbcs r1,r4 + adds r7,r7,r6 + adcs r0,r2 + adcs r1,r4 // END: sqr 64 Refined Karatsuba // Result in r6,r7,r0,r1 // Returns r4 as zero. - .syntax unified mov r2,r12 mov r3,r8 mov r4,r9 - .syntax divided - sub r2,r6 - sbc r3,r7 - .syntax unified + subs r2,r6 + sbcs r3,r7 mov r6,r4 mov r7,r5 - .syntax divided - sbc r4,r0 - sbc r5,r1 - mov r0,#0 - sbc r6,r0 - sbc r7,r0 - .syntax unified + sbcs r4,r0 + sbcs r5,r1 + movs r0,#0 + sbcs r6,r0 + sbcs r7,r0 mov r0,r10 - .syntax divided - add r2,r0 - .syntax unified + adds r2,r2,r0 mov r1,r11 - .syntax divided - adc r3,r1 - .syntax unified + adcs r3,r1 mov r0,r12 - .syntax divided - adc r4,r0 - .syntax unified + adcs r4,r0 mov r0,r8 - .syntax divided - adc r5,r0 - mov r0,#0 - adc r6,r0 - adc r7,r0 - .syntax unified + adcs r5,r0 + movs r0,#0 + adcs r6,r0 + adcs r7,r0 mov r0,r10 - .syntax divided // END: sqr 128 Refined Karatsuba // Result in r0 ... r7 - mvn r0,r0 - mvn r1,r1 - mvn r2,r2 - mvn r3,r3 - mvn r4,r4 - mvn r5,r5 - mvn r6,r6 - mvn r7,r7 - .syntax unified + mvns r0,r0 + mvns r1,r1 + mvns r2,r2 + mvns r3,r3 + mvns r4,r4 + mvns r5,r5 + mvns r6,r6 + mvns r7,r7 mov r8,r4 mov r9,r5 mov r10,r6 mov r11,r7 - .syntax divided - mov r4,#143 - asr r4,r4,#1 + movs r4,#143 + asrs r4,r4,#1 pop {r4,r5,r6,r7} - adc r0,r4 - adc r1,r5 - adc r2,r6 - adc r3,r7 - .syntax unified + adcs r0,r4 + adcs r1,r5 + adcs r2,r6 + adcs r3,r7 mov r12,r4 - .syntax divided - mov r4,#16 + movs r4,#16 add r4,r14 stm r4!,{r0,r1,r2,r3} - .syntax unified mov r4,r12 mov r0,r8 - .syntax divided - adc r0,r4 - .syntax unified + adcs r0,r4 mov r8,r0 mov r1,r9 - .syntax divided - adc r1,r5 - .syntax unified + adcs r1,r5 mov r9,r1 mov r2,r10 - .syntax divided - adc r2,r6 - .syntax unified + adcs r2,r6 mov r10,r2 mov r3,r11 - .syntax divided - adc r3,r7 - .syntax unified + adcs r3,r7 mov r11,r3 - .syntax divided - mov r0,#0 - adc r0,r0 - .syntax unified + movs r0,#0 + adcs r0,r0 mov r12,r0 mov r0,r14 - .syntax divided ldm r0,{r0,r1,r2,r3,r4,r5,r6,r7} - add r0,r4 - adc r1,r5 - adc r2,r6 - adc r3,r7 - mov r4,#16 + adds r0,r0,r4 + adcs r1,r5 + adcs r2,r6 + adcs r3,r7 + movs r4,#16 add r4,r14 stm r4!,{r0,r1,r2,r3} - .syntax unified mov r14,r4 mov r0,r13 - .syntax divided ldm r0!,{r4,r5,r6,r7} - .syntax unified mov r1,r8 - .syntax divided - adc r4,r1 - .syntax unified + adcs r4,r1 mov r1,r9 - .syntax divided - adc r5,r1 - .syntax unified + adcs r5,r1 mov r1,r10 - .syntax divided - adc r6,r1 - .syntax unified + adcs r6,r1 mov r1,r11 - .syntax divided - adc r7,r1 - .syntax unified + adcs r7,r1 mov r0,r14 - .syntax divided stm r0!,{r4,r5,r6,r7} pop {r4,r5,r6,r7} - .syntax unified mov r1,r12 - .syntax divided - mov r2,#0 - mvn r2,r2 - adc r1,r2 - asr r2,r1,#4 - add r4,r1 - adc r5,r2 - adc r6,r2 - adc r7,r2 + movs r2,#0 + mvns r2,r2 + adcs r1,r2 + asrs r2,r1,#4 + adds r4,r4,r1 + adcs r5,r2 + adcs r6,r2 + adcs r7,r2 stm r0!,{r4,r5,r6,r7} pop {r3,r4,r5,r6,r7} - .syntax unified mov r8,r3 mov r9,r4 mov r10,r5 mov r11,r6 mov r12,r7 - .syntax divided pop {r0,r4,r5,r6,r7,r15} //Cycle Count ASM-Version of 256 sqr (Refined Karatsuba) (Cortex M0): 793 (697 instructions). .size square256_asm, .-square256_asm |