summaryrefslogtreecommitdiff
path: root/cipher/poly1305.c
diff options
context:
space:
mode:
authorJussi Kivilinna <jussi.kivilinna@iki.fi>2014-11-02 16:01:11 +0200
committerJussi Kivilinna <jussi.kivilinna@iki.fi>2014-11-02 16:26:53 +0200
commit0b520128551054d83fb0bb2db8873394f38de498 (patch)
treecba613c83ce9a044417a2084573211ee254654eb /cipher/poly1305.c
parentc584f44543883346d5a565581ff99a0afce9c5e1 (diff)
downloadlibgcrypt-0b520128551054d83fb0bb2db8873394f38de498.tar.gz
Add ARM/NEON implementation of Poly1305
* cipher/Makefile.am: Add 'poly1305-armv7-neon.S'. * cipher/poly1305-armv7-neon.S: New. * cipher/poly1305-internal.h (POLY1305_USE_NEON) (POLY1305_NEON_BLOCKSIZE, POLY1305_NEON_STATESIZE) (POLY1305_NEON_ALIGNMENT): New. * cipher/poly1305.c [POLY1305_USE_NEON] (_gcry_poly1305_armv7_neon_init_ext) (_gcry_poly1305_armv7_neon_finish_ext) (_gcry_poly1305_armv7_neon_blocks, poly1305_armv7_neon_ops): New. (_gcry_poly1305_init) [POLY1305_USE_NEON]: Select NEON implementation if HWF_ARM_NEON set. * configure.ac [neonsupport=yes]: Add 'poly1305-armv7-neon.lo'. -- Add Andrew Moon's public domain NEON implementation of Poly1305. Original source is available at: https://github.com/floodyberry/poly1305-opt Benchmark on Cortex-A8 (--cpu-mhz 1008): Old: | nanosecs/byte mebibytes/sec cycles/byte POLY1305 | 12.34 ns/B 77.27 MiB/s 12.44 c/B New: | nanosecs/byte mebibytes/sec cycles/byte POLY1305 | 2.12 ns/B 450.7 MiB/s 2.13 c/B Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Diffstat (limited to 'cipher/poly1305.c')
-rw-r--r--cipher/poly1305.c23
1 files changed, 23 insertions, 0 deletions
diff --git a/cipher/poly1305.c b/cipher/poly1305.c
index fe241c1f..28dbbf8f 100644
--- a/cipher/poly1305.c
+++ b/cipher/poly1305.c
@@ -76,6 +76,25 @@ static const poly1305_ops_t poly1305_amd64_avx2_ops = {
#endif
+#ifdef POLY1305_USE_NEON
+
+void _gcry_poly1305_armv7_neon_init_ext(void *state, const poly1305_key_t *key);
+unsigned int _gcry_poly1305_armv7_neon_finish_ext(void *state, const byte *m,
+ size_t remaining,
+ byte mac[16]);
+unsigned int _gcry_poly1305_armv7_neon_blocks(void *ctx, const byte *m,
+ size_t bytes);
+
+static const poly1305_ops_t poly1305_armv7_neon_ops = {
+ POLY1305_NEON_BLOCKSIZE,
+ _gcry_poly1305_armv7_neon_init_ext,
+ _gcry_poly1305_armv7_neon_blocks,
+ _gcry_poly1305_armv7_neon_finish_ext
+};
+
+#endif
+
+
#ifdef HAVE_U64_TYPEDEF
/* Reference unoptimized poly1305 implementation using 32 bit * 32 bit = 64 bit
@@ -661,6 +680,10 @@ _gcry_poly1305_init (poly1305_context_t * ctx, const byte * key,
if (features & HWF_INTEL_AVX2)
ctx->ops = &poly1305_amd64_avx2_ops;
#endif
+#ifdef POLY1305_USE_NEON
+ if (features & HWF_ARM_NEON)
+ ctx->ops = &poly1305_armv7_neon_ops;
+#endif
(void)features;
buf_cpy (keytmp.b, key, POLY1305_KEYLEN);