summaryrefslogtreecommitdiff
path: root/arm
diff options
context:
space:
mode:
authorNiels Möller <nisse@lysator.liu.se>2020-07-13 22:49:32 +0200
committerNiels Möller <nisse@lysator.liu.se>2020-07-13 22:49:32 +0200
commitc23a5f17ab3ec653267cd9131949ee7b5b5c5c27 (patch)
treec3066aa8575c5aa2242c4beb6e8b0e3c6c121a3d /arm
parent4f2ee038f161f26452e001bbf48f8e7d103d8874 (diff)
downloadnettle-c23a5f17ab3ec653267cd9131949ee7b5b5c5c27.tar.gz
Keep counter words in registers
Diffstat (limited to 'arm')
-rw-r--r--arm/neon/chacha-3core.asm30
1 files changed, 17 insertions, 13 deletions
diff --git a/arm/neon/chacha-3core.asm b/arm/neon/chacha-3core.asm
index f1fa5cb8..9d06d5b1 100644
--- a/arm/neon/chacha-3core.asm
+++ b/arm/neon/chacha-3core.asm
@@ -79,8 +79,10 @@ PROLOGUE(_nettle_chacha_3core)
vmov Z1, X1
vmov Y2, X2
vmov Z2, X2
- vpush {Z3}
- vpush {Y3}
+
+ C Save initial values for the words including the counters.
+ vmov T2, Y3
+ vmov T3, Z3
.Loop:
C Interleave three blocks. Note that with this scheduling,
@@ -207,26 +209,28 @@ PROLOGUE(_nettle_chacha_3core)
bhi .Loop
+ C Add updated counters
+ vadd.i32 Y3, Y3, T2
+ vadd.i32 Z3, Z3, T3
+
vldm SRC, {T0,T1,T2,T3}
vadd.i32 X0, X0, T0
- vadd.i32 Y0, Y0, T0
- vadd.i32 Z0, Z0, T0
vadd.i32 X1, X1, T1
- vadd.i32 Y1, Y1, T1
- vadd.i32 Z1, Z1, T1
vadd.i32 X2, X2, T2
+ vadd.i32 X3, X3, T3
+ vstmia DST!, {X0,X1,X2,X3}
+
+ vadd.i32 Y0, Y0, T0
+ vadd.i32 Y1, Y1, T1
vadd.i32 Y2, Y2, T2
- vadd.i32 Z2, Z2, T2
+ vstmia DST!, {Y0,Y1,Y2,Y3}
- vpop {T0, T1} C updated counters
- vadd.i32 X3, X3, T3
- vadd.i32 Y3, Y3, T0
- vadd.i32 Z3, Z3, T1
+ vadd.i32 Z0, Z0, T0
+ vadd.i32 Z1, Z1, T1
+ vadd.i32 Z2, Z2, T2
vpop {q4,q5,q6,q7}
- vstmia DST!, {X0,X1,X2,X3}
- vstmia DST!, {Y0,Y1,Y2,Y3}
vstm DST, {Z0,Z1,Z2,Z3}
bx lr
EPILOGUE(_nettle_chacha_3core)