diff options
author | Niels Möller <nisse@lysator.liu.se> | 2020-07-13 22:49:32 +0200 |
---|---|---|
committer | Niels Möller <nisse@lysator.liu.se> | 2020-07-13 22:49:32 +0200 |
commit | c23a5f17ab3ec653267cd9131949ee7b5b5c5c27 (patch) | |
tree | c3066aa8575c5aa2242c4beb6e8b0e3c6c121a3d /arm | |
parent | 4f2ee038f161f26452e001bbf48f8e7d103d8874 (diff) | |
download | nettle-c23a5f17ab3ec653267cd9131949ee7b5b5c5c27.tar.gz |
Keep counter words in registers
Diffstat (limited to 'arm')
-rw-r--r-- | arm/neon/chacha-3core.asm | 30 |
1 files changed, 17 insertions, 13 deletions
diff --git a/arm/neon/chacha-3core.asm b/arm/neon/chacha-3core.asm index f1fa5cb8..9d06d5b1 100644 --- a/arm/neon/chacha-3core.asm +++ b/arm/neon/chacha-3core.asm @@ -79,8 +79,10 @@ PROLOGUE(_nettle_chacha_3core) vmov Z1, X1 vmov Y2, X2 vmov Z2, X2 - vpush {Z3} - vpush {Y3} + + C Save initial values for the words including the counters. + vmov T2, Y3 + vmov T3, Z3 .Loop: C Interleave three blocks. Note that with this scheduling, @@ -207,26 +209,28 @@ PROLOGUE(_nettle_chacha_3core) bhi .Loop + C Add updated counters + vadd.i32 Y3, Y3, T2 + vadd.i32 Z3, Z3, T3 + vldm SRC, {T0,T1,T2,T3} vadd.i32 X0, X0, T0 - vadd.i32 Y0, Y0, T0 - vadd.i32 Z0, Z0, T0 vadd.i32 X1, X1, T1 - vadd.i32 Y1, Y1, T1 - vadd.i32 Z1, Z1, T1 vadd.i32 X2, X2, T2 + vadd.i32 X3, X3, T3 + vstmia DST!, {X0,X1,X2,X3} + + vadd.i32 Y0, Y0, T0 + vadd.i32 Y1, Y1, T1 vadd.i32 Y2, Y2, T2 - vadd.i32 Z2, Z2, T2 + vstmia DST!, {Y0,Y1,Y2,Y3} - vpop {T0, T1} C updated counters - vadd.i32 X3, X3, T3 - vadd.i32 Y3, Y3, T0 - vadd.i32 Z3, Z3, T1 + vadd.i32 Z0, Z0, T0 + vadd.i32 Z1, Z1, T1 + vadd.i32 Z2, Z2, T2 vpop {q4,q5,q6,q7} - vstmia DST!, {X0,X1,X2,X3} - vstmia DST!, {Y0,Y1,Y2,Y3} vstm DST, {Z0,Z1,Z2,Z3} bx lr EPILOGUE(_nettle_chacha_3core) |