summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNiels Möller <nisse@lysator.liu.se>2020-11-25 15:38:47 +0100
committerNiels Möller <nisse@lysator.liu.se>2020-11-25 15:38:47 +0100
commit3e2e2b15ba43d6c2cb7e7a7b1db663be727b1071 (patch)
tree3159cfb11bc24ffab9eb2079bd0a417ad206092f
parent87ecdf43c145b76583a3201ccfa1cfa9b93d68d4 (diff)
downloadnettle-ppc-chacha-2core.tar.gz
ppc: Support big-endian for _chacha_2core.ppc-chacha-2core
-rw-r--r--ChangeLog5
-rw-r--r--powerpc64/p7/chacha-2core.asm17
2 files changed, 22 insertions, 0 deletions
diff --git a/ChangeLog b/ChangeLog
index 4c91ccbc..2bff6ccc 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+2020-11-25 Niels Möller <nisse@lysator.liu.se>
+
+ * powerpc64/p7/chacha-2core.asm: Add byte swapping of output, for
+ big-endian builds.
+
2020-11-24 Niels Möller <nisse@lysator.liu.se>
Enable ppc chacha_2core in fat builds.
diff --git a/powerpc64/p7/chacha-2core.asm b/powerpc64/p7/chacha-2core.asm
index 725d72af..265918b6 100644
--- a/powerpc64/p7/chacha-2core.asm
+++ b/powerpc64/p7/chacha-2core.asm
@@ -212,6 +212,23 @@ C Y3 A15 B15 A13 B13 X3 A12 B12 A14 B14 (Y3 swapped)
vadduwm X2, X2, S3
vadduwm Y3, Y3, S3p1
+IF_BE(`
+ C Output always stored in little-endian byte order.
+ C Can reuse S0 and S1 to construct permutation mask mask.
+ li r9, 0
+ lvsl S0, r9, r9 C 00 01 02 03 ... 0c 0d 0e 0f
+ vspltisb S1, 0x03 C 03 03 03 03 ... 03 03 03 03
+ vxor S1, S1, S0 C 03 02 01 00 ... 0f 0e 0d 0c
+
+ vperm T0, T0, T0, S1
+ vperm X0, X0, X0, S1
+ vperm X1, X1, X1, S1
+ vperm X2, X2, X2, S1
+ vperm Y0, Y0, Y0, S1
+ vperm Y1, Y1, Y1, S1
+ vperm Y2, Y2, Y2, S1
+ vperm Y3, Y3, Y3, S1
+')
stxvw4x VSR(T0), 0, DST
stxvw4x VSR(X0), r6, DST
stxvw4x VSR(X1), r7, DST