summaryrefslogtreecommitdiff
path: root/cipher/poly1305.c
diff options
context:
space:
mode:
authorJussi Kivilinna <jussi.kivilinna@iki.fi>2020-12-30 17:46:07 +0200
committerJussi Kivilinna <jussi.kivilinna@iki.fi>2020-12-30 17:46:07 +0200
commit1f75681cbba895ea2f7ea0637900721f4522e729 (patch)
tree19eb7a48b5513f9f5811b1e515a3d4c8e637641c /cipher/poly1305.c
parent6a0bb9ab7f886087d7edb0725c90485086a1c0b4 (diff)
downloadlibgcrypt-1f75681cbba895ea2f7ea0637900721f4522e729.tar.gz
Add s390x/zSeries implementation of Poly1305cipher-s390x-optimizations
* cipher/Makefile.am: Add 'poly1305-s390x.S' and 'asm-poly1305-s390x.h'. * cipher/asm-poly1305-s390x.h: New * cipher/chacha20-s390x.S (_gcry_chacha20_poly1305_s390x_vx_blocks8) (_gcry_chacha20_poly1305_s390x_vx_blocks4_2_1): New, stitched chacha20-poly1305 implementation. * cipher/chacha20.c (USE_S390X_VX_POLY1305): New. (_gcry_chacha20_poly1305_s390x_vx_blocks8) (_gcry_chacha20_poly1305_s390x_vx_blocks4_2_1): New prototypes. (_gcry_chacha20_poly1305_encrypt, _gcry_chacha20_poly1305_decrypt): Add s390x/VX stitched chacha20-poly1305 code-path. * cipher/poly1305-s390x.S: New. * cipher/poly1305.c (USE_S390X_ASM, HAVE_ASM_POLY1305_BLOCKS): New. [USE_S390X_ASM] (_gcry_poly1305_s390x_blocks1, poly1305_blocks): New. * configure.ac (gcry_cv_gcc_inline_asm_s390x): Check for 'risbgn' and 'algrk' instructions. * tests/basic.c (_check_poly1305_cipher): Add large chacha20-poly1305 test vector. -- Patch adds Poly1305 and stitched ChaCha20-Poly1305 implementation for zSeries. Stitched implementation interleaves ChaCha20 and Poly1305 processing for higher instruction level parallelism and better utilization of execution units. Benchmark on z15 (4504 Mhz): Before: CHACHA20 | nanosecs/byte mebibytes/sec cycles/byte POLY1305 enc | 1.16 ns/B 823.2 MiB/s 5.22 c/B POLY1305 dec | 1.16 ns/B 823.2 MiB/s 5.22 c/B POLY1305 auth | 0.736 ns/B 1295 MiB/s 3.32 c/B After (chacha20-poly1305 ~71% faster, poly1305 ~29% faster): CHACHA20 | nanosecs/byte mebibytes/sec cycles/byte POLY1305 enc | 0.677 ns/B 1409 MiB/s 3.05 c/B POLY1305 dec | 0.655 ns/B 1456 MiB/s 2.95 c/B POLY1305 auth | 0.569 ns/B 1675 MiB/s 2.56 c/B GnuPG-bug-id: 5202 Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Diffstat (limited to 'cipher/poly1305.c')
-rw-r--r--cipher/poly1305.c40
1 files changed, 40 insertions, 0 deletions
diff --git a/cipher/poly1305.c b/cipher/poly1305.c
index adcb6792..6cb4d2b7 100644
--- a/cipher/poly1305.c
+++ b/cipher/poly1305.c
@@ -35,6 +35,9 @@
static const char *selftest (void);
+#undef HAVE_ASM_POLY1305_BLOCKS
+
+
#undef USE_MPI_64BIT
#undef USE_MPI_32BIT
#if BYTES_PER_MPI_LIMB == 8 && defined(HAVE_TYPE_U64)
@@ -46,6 +49,35 @@ static const char *selftest (void);
#endif
+/* USE_S390X_ASM indicates whether to enable zSeries code. */
+#undef USE_S390X_ASM
+#if BYTES_PER_MPI_LIMB == 8
+# if defined (__s390x__) && __GNUC__ >= 4 && __ARCH__ >= 9
+# if defined(HAVE_GCC_INLINE_ASM_S390X)
+# define USE_S390X_ASM 1
+# endif /* USE_S390X_ASM */
+# endif
+#endif
+
+
+#ifdef USE_S390X_ASM
+
+#define HAVE_ASM_POLY1305_BLOCKS 1
+
+extern unsigned int _gcry_poly1305_s390x_blocks1(void *state,
+ const byte *buf, size_t len,
+ byte high_pad);
+
+static unsigned int
+poly1305_blocks (poly1305_context_t *ctx, const byte *buf, size_t len,
+ byte high_pad)
+{
+ return _gcry_poly1305_s390x_blocks1(&ctx->state, buf, len, high_pad);
+}
+
+#endif /* USE_S390X_ASM */
+
+
static void poly1305_init (poly1305_context_t *ctx,
const byte key[POLY1305_KEYLEN])
{
@@ -146,6 +178,8 @@ static void poly1305_init (poly1305_context_t *ctx,
ADD_1305_64(H2, H1, H0, (u64)0, x0_hi, x0_lo); \
} while (0)
+#ifndef HAVE_ASM_POLY1305_BLOCKS
+
static unsigned int
poly1305_blocks (poly1305_context_t *ctx, const byte *buf, size_t len,
byte high_pad)
@@ -201,6 +235,8 @@ poly1305_blocks (poly1305_context_t *ctx, const byte *buf, size_t len,
return 6 * sizeof (void *) + 18 * sizeof (u64);
}
+#endif /* !HAVE_ASM_POLY1305_BLOCKS */
+
static unsigned int poly1305_final (poly1305_context_t *ctx,
byte mac[POLY1305_TAGLEN])
{
@@ -354,6 +390,8 @@ static unsigned int poly1305_final (poly1305_context_t *ctx,
ADD_1305_32(H4, H3, H2, H1, H0, 0, x3_lo, x2_lo, x1_lo, x0_lo); \
} while (0)
+#ifndef HAVE_ASM_POLY1305_BLOCKS
+
static unsigned int
poly1305_blocks (poly1305_context_t *ctx, const byte *buf, size_t len,
byte high_pad)
@@ -403,6 +441,8 @@ poly1305_blocks (poly1305_context_t *ctx, const byte *buf, size_t len,
return 6 * sizeof (void *) + 28 * sizeof (u32);
}
+#endif /* !HAVE_ASM_POLY1305_BLOCKS */
+
static unsigned int poly1305_final (poly1305_context_t *ctx,
byte mac[POLY1305_TAGLEN])
{