diff options
author | Niels Möller <nisse@lysator.liu.se> | 2020-09-25 21:05:48 +0200 |
---|---|---|
committer | Niels Möller <nisse@lysator.liu.se> | 2020-09-25 21:05:48 +0200 |
commit | 5c19a1cd72315b2c8a2a08754b1b4950ca118943 (patch) | |
tree | ab8d6df1734c8cbc758732022e0a27cd604c0034 /powerpc64 | |
parent | 04117efd723675a4302d75c8bc1eb8cdad48c63b (diff) | |
download | nettle-5c19a1cd72315b2c8a2a08754b1b4950ca118943.tar.gz |
ppc: Assembly implementation of _chacha_core.
Diffstat (limited to 'powerpc64')
-rw-r--r-- | powerpc64/p7/chacha-core-internal.asm | 140 |
1 files changed, 140 insertions, 0 deletions
diff --git a/powerpc64/p7/chacha-core-internal.asm b/powerpc64/p7/chacha-core-internal.asm new file mode 100644 index 00000000..33c721c1 --- /dev/null +++ b/powerpc64/p7/chacha-core-internal.asm @@ -0,0 +1,140 @@ +C powerpc64/p7/chacha-core-internal.asm + +ifelse(` + Copyright (C) 2020 Niels Möller and Torbjörn Granlund + This file is part of GNU Nettle. + + GNU Nettle is free software: you can redistribute it and/or + modify it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your + option) any later version. + + or both in parallel, as here. + + GNU Nettle is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see http://www.gnu.org/licenses/. +') + +C Register usage: + +C Argments +define(`DST', `r3') +define(`SRC', `r4') +define(`ROUNDS', `r5') + +C Working state +define(`X0', `v0') +define(`X1', `v1') +define(`X2', `v2') +define(`X3', `v3') + +define(`ROT16', `v4') +define(`ROT12', `v5') +define(`ROT8', `v6') +define(`ROT7', `v7') + +C Original input state +define(`S0', `v8') +define(`S1', `v9') +define(`S2', `v10') +define(`S3', `v11') + +C QROUND(X0, X1, X2, X3) +define(`QROUND', ` + C x0 += x1, x3 ^= x0, x3 lrot 16 + C x2 += x3, x1 ^= x2, x1 lrot 12 + C x0 += x1, x3 ^= x0, x3 lrot 8 + C x2 += x3, x1 ^= x2, x1 lrot 7 + + vadduwm $1, $1, $2 + vxor $4, $4, $1 + vrlw $4, $4, ROT16 + + vadduwm $3, $3, $4 + vxor $2, $2, $3 + vrlw $2, $2, ROT12 + + vadduwm $1, $1, $2 + vxor $4, $4, $1 + vrlw $4, $4, ROT8 + + vadduwm $3, $3, $4 + vxor $2, $2, $3 + vrlw $2, $2, ROT7 +') + + .text + .align 4 + C _chacha_core(uint32_t *dst, const uint32_t *src, unsigned rounds) + +PROLOGUE(_nettle_chacha_core) + + li r6, 0x10 C set up some... + li r7, 0x20 C ...useful... + li r8, 0x30 C ...offsets + + vspltisw ROT16, -16 C -16 instead of 16 actually works! + vspltisw ROT12, 12 + vspltisw ROT8, 8 + vspltisw ROT7, 7 + + lxvw4x VSR(X0), 0, SRC + lxvw4x VSR(X1), r6, SRC + lxvw4x VSR(X2), r7, SRC + lxvw4x VSR(X3), r8, SRC + + vor S0, X0, X0 + vor S1, X1, X1 + vor S2, X2, X2 + vor S3, X3, X3 + + srdi ROUNDS, ROUNDS, 1 + mtctr ROUNDS + +.Loop: + QROUND(X0, X1, X2, X3) + C Rotate rows, to get + C 0 1 2 3 + C 5 6 7 4 <<< 1 + C 10 11 8 9 <<< 2 + C 15 12 13 14 <<< 3 + + vsldoi X1, X1, X1, 4 + vsldoi X2, X2, X2, 8 + vsldoi X3, X3, X3, 12 + + QROUND(X0, X1, X2, X3) + + C Inverse rotation + vsldoi X1, X1, X1, 12 + vsldoi X2, X2, X2, 8 + vsldoi X3, X3, X3, 4 + + bdnz .Loop + + vadduwm X0, X0, S0 + vadduwm X1, X1, S1 + vadduwm X2, X2, S2 + vadduwm X3, X3, S3 + + stxvw4x VSR(X0), 0, DST + stxvw4x VSR(X1), r6, DST + stxvw4x VSR(X2), r7, DST + stxvw4x VSR(X3), r8, DST + + blr +EPILOGUE(_nettle_chacha_core) |