summaryrefslogtreecommitdiff
path: root/powerpc64
diff options
context:
space:
mode:
authorNiels Möller <nisse@lysator.liu.se>2020-09-25 21:05:48 +0200
committerNiels Möller <nisse@lysator.liu.se>2020-09-25 21:05:48 +0200
commit5c19a1cd72315b2c8a2a08754b1b4950ca118943 (patch)
treeab8d6df1734c8cbc758732022e0a27cd604c0034 /powerpc64
parent04117efd723675a4302d75c8bc1eb8cdad48c63b (diff)
downloadnettle-5c19a1cd72315b2c8a2a08754b1b4950ca118943.tar.gz
ppc: Assembly implementation of _chacha_core.
Diffstat (limited to 'powerpc64')
-rw-r--r--powerpc64/p7/chacha-core-internal.asm140
1 files changed, 140 insertions, 0 deletions
diff --git a/powerpc64/p7/chacha-core-internal.asm b/powerpc64/p7/chacha-core-internal.asm
new file mode 100644
index 00000000..33c721c1
--- /dev/null
+++ b/powerpc64/p7/chacha-core-internal.asm
@@ -0,0 +1,140 @@
+C powerpc64/p7/chacha-core-internal.asm
+
+ifelse(`
+ Copyright (C) 2020 Niels Möller and Torbjörn Granlund
+ This file is part of GNU Nettle.
+
+ GNU Nettle is free software: you can redistribute it and/or
+ modify it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+ or
+
+ * the GNU General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your
+ option) any later version.
+
+ or both in parallel, as here.
+
+ GNU Nettle is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received copies of the GNU General Public License and
+ the GNU Lesser General Public License along with this program. If
+ not, see http://www.gnu.org/licenses/.
+')
+
+C Register usage:
+
+C Argments
+define(`DST', `r3')
+define(`SRC', `r4')
+define(`ROUNDS', `r5')
+
+C Working state
+define(`X0', `v0')
+define(`X1', `v1')
+define(`X2', `v2')
+define(`X3', `v3')
+
+define(`ROT16', `v4')
+define(`ROT12', `v5')
+define(`ROT8', `v6')
+define(`ROT7', `v7')
+
+C Original input state
+define(`S0', `v8')
+define(`S1', `v9')
+define(`S2', `v10')
+define(`S3', `v11')
+
+C QROUND(X0, X1, X2, X3)
+define(`QROUND', `
+ C x0 += x1, x3 ^= x0, x3 lrot 16
+ C x2 += x3, x1 ^= x2, x1 lrot 12
+ C x0 += x1, x3 ^= x0, x3 lrot 8
+ C x2 += x3, x1 ^= x2, x1 lrot 7
+
+ vadduwm $1, $1, $2
+ vxor $4, $4, $1
+ vrlw $4, $4, ROT16
+
+ vadduwm $3, $3, $4
+ vxor $2, $2, $3
+ vrlw $2, $2, ROT12
+
+ vadduwm $1, $1, $2
+ vxor $4, $4, $1
+ vrlw $4, $4, ROT8
+
+ vadduwm $3, $3, $4
+ vxor $2, $2, $3
+ vrlw $2, $2, ROT7
+')
+
+ .text
+ .align 4
+ C _chacha_core(uint32_t *dst, const uint32_t *src, unsigned rounds)
+
+PROLOGUE(_nettle_chacha_core)
+
+ li r6, 0x10 C set up some...
+ li r7, 0x20 C ...useful...
+ li r8, 0x30 C ...offsets
+
+ vspltisw ROT16, -16 C -16 instead of 16 actually works!
+ vspltisw ROT12, 12
+ vspltisw ROT8, 8
+ vspltisw ROT7, 7
+
+ lxvw4x VSR(X0), 0, SRC
+ lxvw4x VSR(X1), r6, SRC
+ lxvw4x VSR(X2), r7, SRC
+ lxvw4x VSR(X3), r8, SRC
+
+ vor S0, X0, X0
+ vor S1, X1, X1
+ vor S2, X2, X2
+ vor S3, X3, X3
+
+ srdi ROUNDS, ROUNDS, 1
+ mtctr ROUNDS
+
+.Loop:
+ QROUND(X0, X1, X2, X3)
+ C Rotate rows, to get
+ C 0 1 2 3
+ C 5 6 7 4 <<< 1
+ C 10 11 8 9 <<< 2
+ C 15 12 13 14 <<< 3
+
+ vsldoi X1, X1, X1, 4
+ vsldoi X2, X2, X2, 8
+ vsldoi X3, X3, X3, 12
+
+ QROUND(X0, X1, X2, X3)
+
+ C Inverse rotation
+ vsldoi X1, X1, X1, 12
+ vsldoi X2, X2, X2, 8
+ vsldoi X3, X3, X3, 4
+
+ bdnz .Loop
+
+ vadduwm X0, X0, S0
+ vadduwm X1, X1, S1
+ vadduwm X2, X2, S2
+ vadduwm X3, X3, S3
+
+ stxvw4x VSR(X0), 0, DST
+ stxvw4x VSR(X1), r6, DST
+ stxvw4x VSR(X2), r7, DST
+ stxvw4x VSR(X3), r8, DST
+
+ blr
+EPILOGUE(_nettle_chacha_core)