summaryrefslogtreecommitdiff
path: root/cipher/chacha20.c
diff options
context:
space:
mode:
authorJussi Kivilinna <jussi.kivilinna@iki.fi>2018-01-06 18:58:04 +0200
committerJussi Kivilinna <jussi.kivilinna@iki.fi>2018-01-09 18:41:03 +0200
commitb3ec0f752c925cde36f560f0f9309ab6450bbfd9 (patch)
tree5953c3702dc33499b21f21b6f3687d2fff6aecf8 /cipher/chacha20.c
parent172ad09cbedc893f147180875335f4c525393c0b (diff)
downloadlibgcrypt-b3ec0f752c925cde36f560f0f9309ab6450bbfd9.tar.gz
Add ARMv8/AArch64 implementation of chacha20
* cipher/Makefile.am: Add 'chacha20-aarch64.S'. * cipher/chacha20-aarch64.S: New. * cipher/chacha20.c (USE_AARCH64_SIMD): New. (_gcry_chacha20_aarch_blocks4): New. (chacha20_do_setkey): Add HWF selection for Aarch64 implementation. * configure.ac: Add 'chacha20-aarch64.lo'. -- Benchmark on Cortex-A53 (1152 Mhz): Before: CHACHA20 | nanosecs/byte mebibytes/sec cycles/byte STREAM enc | 7.91 ns/B 120.6 MiB/s 9.11 c/B STREAM dec | 7.91 ns/B 120.6 MiB/s 9.11 c/B After (1.66x faster): CHACHA20 | nanosecs/byte mebibytes/sec cycles/byte STREAM enc | 4.74 ns/B 201.2 MiB/s 5.46 c/B STREAM dec | 4.74 ns/B 201.3 MiB/s 5.46 c/B Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Diffstat (limited to 'cipher/chacha20.c')
-rw-r--r--cipher/chacha20.c36
1 files changed, 36 insertions, 0 deletions
diff --git a/cipher/chacha20.c b/cipher/chacha20.c
index ac6cc29e..e89ad2e4 100644
--- a/cipher/chacha20.c
+++ b/cipher/chacha20.c
@@ -73,6 +73,17 @@
# endif
#endif
+/* USE_AARCH64_SIMD indicates whether to enable ARMv8 SIMD assembly
+ * code. */
+#undef USE_AARCH64_SIMD
+#ifdef ENABLE_NEON_SUPPORT
+# if defined(__AARCH64EL__) \
+ && defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) \
+ && defined(HAVE_GCC_INLINE_ASM_AARCH64_NEON)
+# define USE_AARCH64_SIMD 1
+# endif
+#endif
+
/* Assembly implementations use SystemV ABI, ABI conversion and additional
* stack to store XMM6-XMM15 needed on Win64. */
#undef ASM_FUNC_ABI
@@ -119,6 +130,13 @@ unsigned int _gcry_chacha20_armv7_neon_blocks4(u32 *state, byte *dst,
#endif /* USE_ARMV7_NEON */
+#ifdef USE_AARCH64_SIMD
+
+unsigned int _gcry_chacha20_aarch64_blocks4(u32 *state, byte *dst,
+ const byte *src, size_t nblks);
+
+#endif /* USE_AARCH64_SIMD */
+
static const char *selftest (void);
@@ -338,6 +356,10 @@ chacha20_do_setkey (CHACHA20_context_t *ctx,
#ifdef USE_ARMV7_NEON
ctx->use_neon = (features & HWF_ARM_NEON) != 0;
#endif
+#ifdef USE_AARCH64_SIMD
+ ctx->use_neon = (features & HWF_ARM_NEON) != 0;
+#endif
+
(void)features;
chacha20_keysetup (ctx, key, keylen);
@@ -434,6 +456,20 @@ chacha20_encrypt_stream (void *context, byte *outbuf, const byte *inbuf,
}
#endif
+#ifdef USE_AARCH64_SIMD
+ if (ctx->use_neon && length >= CHACHA20_BLOCK_SIZE * 4)
+ {
+ size_t nblocks = length / CHACHA20_BLOCK_SIZE;
+ nblocks -= nblocks % 4;
+ nburn = _gcry_chacha20_aarch64_blocks4(ctx->input, outbuf, inbuf,
+ nblocks);
+ burn = nburn > burn ? nburn : burn;
+ length -= nblocks * CHACHA20_BLOCK_SIZE;
+ outbuf += nblocks * CHACHA20_BLOCK_SIZE;
+ inbuf += nblocks * CHACHA20_BLOCK_SIZE;
+ }
+#endif
+
if (length >= CHACHA20_BLOCK_SIZE)
{
size_t nblocks = length / CHACHA20_BLOCK_SIZE;