summaryrefslogtreecommitdiff
path: root/powerpc64/p7/chacha-2core.asm
diff options
context:
space:
mode:
Diffstat (limited to 'powerpc64/p7/chacha-2core.asm')
-rw-r--r--powerpc64/p7/chacha-2core.asm17
1 files changed, 17 insertions, 0 deletions
diff --git a/powerpc64/p7/chacha-2core.asm b/powerpc64/p7/chacha-2core.asm
index 725d72af..265918b6 100644
--- a/powerpc64/p7/chacha-2core.asm
+++ b/powerpc64/p7/chacha-2core.asm
@@ -212,6 +212,23 @@ C Y3 A15 B15 A13 B13 X3 A12 B12 A14 B14 (Y3 swapped)
vadduwm X2, X2, S3
vadduwm Y3, Y3, S3p1
+IF_BE(`
+ C Output always stored in little-endian byte order.
+ C Can reuse S0 and S1 to construct permutation mask mask.
+ li r9, 0
+ lvsl S0, r9, r9 C 00 01 02 03 ... 0c 0d 0e 0f
+ vspltisb S1, 0x03 C 03 03 03 03 ... 03 03 03 03
+ vxor S1, S1, S0 C 03 02 01 00 ... 0f 0e 0d 0c
+
+ vperm T0, T0, T0, S1
+ vperm X0, X0, X0, S1
+ vperm X1, X1, X1, S1
+ vperm X2, X2, X2, S1
+ vperm Y0, Y0, Y0, S1
+ vperm Y1, Y1, Y1, S1
+ vperm Y2, Y2, Y2, S1
+ vperm Y3, Y3, Y3, S1
+')
stxvw4x VSR(T0), 0, DST
stxvw4x VSR(X0), r6, DST
stxvw4x VSR(X1), r7, DST