diff options
author | Jussi Kivilinna <jussi.kivilinna@iki.fi> | 2018-01-06 18:58:04 +0200 |
---|---|---|
committer | Jussi Kivilinna <jussi.kivilinna@iki.fi> | 2018-01-09 18:41:03 +0200 |
commit | b3ec0f752c925cde36f560f0f9309ab6450bbfd9 (patch) | |
tree | 5953c3702dc33499b21f21b6f3687d2fff6aecf8 /cipher/chacha20.c | |
parent | 172ad09cbedc893f147180875335f4c525393c0b (diff) | |
download | libgcrypt-b3ec0f752c925cde36f560f0f9309ab6450bbfd9.tar.gz |
Add ARMv8/AArch64 implementation of chacha20
* cipher/Makefile.am: Add 'chacha20-aarch64.S'.
* cipher/chacha20-aarch64.S: New.
* cipher/chacha20.c (USE_AARCH64_SIMD): New.
(_gcry_chacha20_aarch_blocks4): New.
(chacha20_do_setkey): Add HWF selection for Aarch64 implementation.
* configure.ac: Add 'chacha20-aarch64.lo'.
--
Benchmark on Cortex-A53 (1152 Mhz):
Before:
CHACHA20 | nanosecs/byte mebibytes/sec cycles/byte
STREAM enc | 7.91 ns/B 120.6 MiB/s 9.11 c/B
STREAM dec | 7.91 ns/B 120.6 MiB/s 9.11 c/B
After (1.66x faster):
CHACHA20 | nanosecs/byte mebibytes/sec cycles/byte
STREAM enc | 4.74 ns/B 201.2 MiB/s 5.46 c/B
STREAM dec | 4.74 ns/B 201.3 MiB/s 5.46 c/B
Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Diffstat (limited to 'cipher/chacha20.c')
-rw-r--r-- | cipher/chacha20.c | 36 |
1 files changed, 36 insertions, 0 deletions
diff --git a/cipher/chacha20.c b/cipher/chacha20.c index ac6cc29e..e89ad2e4 100644 --- a/cipher/chacha20.c +++ b/cipher/chacha20.c @@ -73,6 +73,17 @@ # endif #endif +/* USE_AARCH64_SIMD indicates whether to enable ARMv8 SIMD assembly + * code. */ +#undef USE_AARCH64_SIMD +#ifdef ENABLE_NEON_SUPPORT +# if defined(__AARCH64EL__) \ + && defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) \ + && defined(HAVE_GCC_INLINE_ASM_AARCH64_NEON) +# define USE_AARCH64_SIMD 1 +# endif +#endif + /* Assembly implementations use SystemV ABI, ABI conversion and additional * stack to store XMM6-XMM15 needed on Win64. */ #undef ASM_FUNC_ABI @@ -119,6 +130,13 @@ unsigned int _gcry_chacha20_armv7_neon_blocks4(u32 *state, byte *dst, #endif /* USE_ARMV7_NEON */ +#ifdef USE_AARCH64_SIMD + +unsigned int _gcry_chacha20_aarch64_blocks4(u32 *state, byte *dst, + const byte *src, size_t nblks); + +#endif /* USE_AARCH64_SIMD */ + static const char *selftest (void); @@ -338,6 +356,10 @@ chacha20_do_setkey (CHACHA20_context_t *ctx, #ifdef USE_ARMV7_NEON ctx->use_neon = (features & HWF_ARM_NEON) != 0; #endif +#ifdef USE_AARCH64_SIMD + ctx->use_neon = (features & HWF_ARM_NEON) != 0; +#endif + (void)features; chacha20_keysetup (ctx, key, keylen); @@ -434,6 +456,20 @@ chacha20_encrypt_stream (void *context, byte *outbuf, const byte *inbuf, } #endif +#ifdef USE_AARCH64_SIMD + if (ctx->use_neon && length >= CHACHA20_BLOCK_SIZE * 4) + { + size_t nblocks = length / CHACHA20_BLOCK_SIZE; + nblocks -= nblocks % 4; + nburn = _gcry_chacha20_aarch64_blocks4(ctx->input, outbuf, inbuf, + nblocks); + burn = nburn > burn ? nburn : burn; + length -= nblocks * CHACHA20_BLOCK_SIZE; + outbuf += nblocks * CHACHA20_BLOCK_SIZE; + inbuf += nblocks * CHACHA20_BLOCK_SIZE; + } +#endif + if (length >= CHACHA20_BLOCK_SIZE) { size_t nblocks = length / CHACHA20_BLOCK_SIZE; |