From 0b520128551054d83fb0bb2db8873394f38de498 Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Sun, 2 Nov 2014 16:01:11 +0200 Subject: Add ARM/NEON implementation of Poly1305 * cipher/Makefile.am: Add 'poly1305-armv7-neon.S'. * cipher/poly1305-armv7-neon.S: New. * cipher/poly1305-internal.h (POLY1305_USE_NEON) (POLY1305_NEON_BLOCKSIZE, POLY1305_NEON_STATESIZE) (POLY1305_NEON_ALIGNMENT): New. * cipher/poly1305.c [POLY1305_USE_NEON] (_gcry_poly1305_armv7_neon_init_ext) (_gcry_poly1305_armv7_neon_finish_ext) (_gcry_poly1305_armv7_neon_blocks, poly1305_armv7_neon_ops): New. (_gcry_poly1305_init) [POLY1305_USE_NEON]: Select NEON implementation if HWF_ARM_NEON set. * configure.ac [neonsupport=yes]: Add 'poly1305-armv7-neon.lo'. -- Add Andrew Moon's public domain NEON implementation of Poly1305. Original source is available at: https://github.com/floodyberry/poly1305-opt Benchmark on Cortex-A8 (--cpu-mhz 1008): Old: | nanosecs/byte mebibytes/sec cycles/byte POLY1305 | 12.34 ns/B 77.27 MiB/s 12.44 c/B New: | nanosecs/byte mebibytes/sec cycles/byte POLY1305 | 2.12 ns/B 450.7 MiB/s 2.13 c/B Signed-off-by: Jussi Kivilinna --- cipher/poly1305.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'cipher/poly1305.c') diff --git a/cipher/poly1305.c b/cipher/poly1305.c index fe241c1f..28dbbf8f 100644 --- a/cipher/poly1305.c +++ b/cipher/poly1305.c @@ -76,6 +76,25 @@ static const poly1305_ops_t poly1305_amd64_avx2_ops = { #endif +#ifdef POLY1305_USE_NEON + +void _gcry_poly1305_armv7_neon_init_ext(void *state, const poly1305_key_t *key); +unsigned int _gcry_poly1305_armv7_neon_finish_ext(void *state, const byte *m, + size_t remaining, + byte mac[16]); +unsigned int _gcry_poly1305_armv7_neon_blocks(void *ctx, const byte *m, + size_t bytes); + +static const poly1305_ops_t poly1305_armv7_neon_ops = { + POLY1305_NEON_BLOCKSIZE, + _gcry_poly1305_armv7_neon_init_ext, + _gcry_poly1305_armv7_neon_blocks, + _gcry_poly1305_armv7_neon_finish_ext +}; + +#endif + + #ifdef HAVE_U64_TYPEDEF /* Reference unoptimized poly1305 implementation using 32 bit * 32 bit = 64 bit @@ -660,6 +679,10 @@ _gcry_poly1305_init (poly1305_context_t * ctx, const byte * key, #ifdef POLY1305_USE_AVX2 if (features & HWF_INTEL_AVX2) ctx->ops = &poly1305_amd64_avx2_ops; +#endif +#ifdef POLY1305_USE_NEON + if (features & HWF_ARM_NEON) + ctx->ops = &poly1305_armv7_neon_ops; #endif (void)features; -- cgit v1.2.1