summaryrefslogtreecommitdiff
path: root/cipher/poly1305-s390x.S
diff options
context:
space:
mode:
authorJussi Kivilinna <jussi.kivilinna@iki.fi>2020-12-30 17:46:07 +0200
committerJussi Kivilinna <jussi.kivilinna@iki.fi>2020-12-30 17:46:07 +0200
commit1f75681cbba895ea2f7ea0637900721f4522e729 (patch)
tree19eb7a48b5513f9f5811b1e515a3d4c8e637641c /cipher/poly1305-s390x.S
parent6a0bb9ab7f886087d7edb0725c90485086a1c0b4 (diff)
downloadlibgcrypt-1f75681cbba895ea2f7ea0637900721f4522e729.tar.gz
Add s390x/zSeries implementation of Poly1305cipher-s390x-optimizations
* cipher/Makefile.am: Add 'poly1305-s390x.S' and 'asm-poly1305-s390x.h'. * cipher/asm-poly1305-s390x.h: New * cipher/chacha20-s390x.S (_gcry_chacha20_poly1305_s390x_vx_blocks8) (_gcry_chacha20_poly1305_s390x_vx_blocks4_2_1): New, stitched chacha20-poly1305 implementation. * cipher/chacha20.c (USE_S390X_VX_POLY1305): New. (_gcry_chacha20_poly1305_s390x_vx_blocks8) (_gcry_chacha20_poly1305_s390x_vx_blocks4_2_1): New prototypes. (_gcry_chacha20_poly1305_encrypt, _gcry_chacha20_poly1305_decrypt): Add s390x/VX stitched chacha20-poly1305 code-path. * cipher/poly1305-s390x.S: New. * cipher/poly1305.c (USE_S390X_ASM, HAVE_ASM_POLY1305_BLOCKS): New. [USE_S390X_ASM] (_gcry_poly1305_s390x_blocks1, poly1305_blocks): New. * configure.ac (gcry_cv_gcc_inline_asm_s390x): Check for 'risbgn' and 'algrk' instructions. * tests/basic.c (_check_poly1305_cipher): Add large chacha20-poly1305 test vector. -- Patch adds Poly1305 and stitched ChaCha20-Poly1305 implementation for zSeries. Stitched implementation interleaves ChaCha20 and Poly1305 processing for higher instruction level parallelism and better utilization of execution units. Benchmark on z15 (4504 Mhz): Before: CHACHA20 | nanosecs/byte mebibytes/sec cycles/byte POLY1305 enc | 1.16 ns/B 823.2 MiB/s 5.22 c/B POLY1305 dec | 1.16 ns/B 823.2 MiB/s 5.22 c/B POLY1305 auth | 0.736 ns/B 1295 MiB/s 3.32 c/B After (chacha20-poly1305 ~71% faster, poly1305 ~29% faster): CHACHA20 | nanosecs/byte mebibytes/sec cycles/byte POLY1305 enc | 0.677 ns/B 1409 MiB/s 3.05 c/B POLY1305 dec | 0.655 ns/B 1456 MiB/s 2.95 c/B POLY1305 auth | 0.569 ns/B 1675 MiB/s 2.56 c/B GnuPG-bug-id: 5202 Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Diffstat (limited to 'cipher/poly1305-s390x.S')
-rw-r--r--cipher/poly1305-s390x.S87
1 files changed, 87 insertions, 0 deletions
diff --git a/cipher/poly1305-s390x.S b/cipher/poly1305-s390x.S
new file mode 100644
index 00000000..844245f6
--- /dev/null
+++ b/cipher/poly1305-s390x.S
@@ -0,0 +1,87 @@
+/* poly1305-s390x.S - zSeries implementation of Poly1305
+ *
+ * Copyright (C) 2020 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#if defined (__s390x__) && __GNUC__ >= 4 && __ARCH__ >= 9
+#include <config.h>
+#if defined(HAVE_GCC_INLINE_ASM_S390X)
+
+#include "asm-poly1305-s390x.h"
+
+.text
+
+.balign 8
+.globl _gcry_poly1305_s390x_blocks1
+ELF(.type _gcry_poly1305_s390x_blocks1,@function;)
+
+_gcry_poly1305_s390x_blocks1:
+ /* input:
+ * %r2: poly1305-state
+ * %r3: src
+ * %r4: len
+ * %r5: high_pad
+ */
+ CFI_STARTPROC();
+
+ stmg %r6, %r14, 6 * 8(%r15);
+
+ lgr POLY_RSTATE, %r2;
+ lgr POLY_RSRC, %r3;
+ srlg %r0, %r4, 4;
+
+ cgije %r5, 0, .Lpoly_high0;
+
+ POLY1305_LOAD_STATE();
+
+.balign 4
+.Lpoly_loop_high1:
+ POLY1305_BLOCK_PART1(0 * 16);
+ INC_POLY1305_SRC(1 * 16);
+.Lpoly_block_part2:
+ POLY1305_BLOCK_PART2();
+ POLY1305_BLOCK_PART3();
+ POLY1305_BLOCK_PART4();
+ POLY1305_BLOCK_PART5();
+ POLY1305_BLOCK_PART6();
+ POLY1305_BLOCK_PART7();
+ POLY1305_BLOCK_PART8();
+
+ brctg %r0, .Lpoly_loop_high1;
+
+.balign 4
+.Lpoly_done:
+ POLY1305_STORE_STATE();
+
+ lmg %r6, %r14, 6 * 8(%r15);
+ xgr %r2, %r2;
+ br %r14;
+
+.balign 4
+.Lpoly_high0:
+ lghi %r0, 1;
+ POLY1305_LOAD_STATE();
+ POLY1305_BLOCK_PART1_HB(0 * 16, 0);
+ j .Lpoly_block_part2;
+
+ CFI_ENDPROC();
+ELF(.size _gcry_poly1305_s390x_blocks1,
+ .-_gcry_poly1305_s390x_blocks1;)
+
+#endif /*HAVE_GCC_INLINE_ASM_S390X*/
+#endif /*__s390x__*/