summaryrefslogtreecommitdiff
path: root/cipher/blake2.c
diff options
context:
space:
mode:
authorJussi Kivilinna <jussi.kivilinna@iki.fi>2018-02-08 19:45:10 +0200
committerJussi Kivilinna <jussi.kivilinna@iki.fi>2018-02-16 19:28:04 +0200
commitda58a62ac1b7a8d97b0895dcb41d15af531e45e5 (patch)
tree73d80d737507cf4fe7f88a93d1c227ff3387af18 /cipher/blake2.c
parentaf7fc732f9a7af7a70276f1e8364d2132db314f1 (diff)
downloadlibgcrypt-da58a62ac1b7a8d97b0895dcb41d15af531e45e5.tar.gz
AVX implementation of BLAKE2s
* cipher/Makefile.am: Add 'blake2s-amd64-avx.S'. * cipher/blake2.c (USE_AVX, _gry_blake2s_transform_amd64_avx): New. (BLAKE2S_CONTEXT) [USE_AVX]: Add 'use_avx'. (blake2s_transform): Rename to ... (blake2s_transform_generic): ... this. (blake2s_transform): New. (blake2s_final): Pass 'ctx' pointer to transform function instead of 'S'. (blake2s_init_ctx): Check HW features and enable AVX implementation if supported. * cipher/blake2s-amd64-avx.S: New. * configure.ac: Add 'blake2s-amd64-avx.lo'. -- Benchmark on Intel Core i7-4790K (4.0 Ghz, no turbo): Before: | nanosecs/byte mebibytes/sec cycles/byte BLAKE2S_256 | 1.77 ns/B 538.2 MiB/s 7.09 c/B After (~1.3x faster): | nanosecs/byte mebibytes/sec cycles/byte BLAKE2S_256 | 1.34 ns/B 711.4 MiB/s 5.36 c/B Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Diffstat (limited to 'cipher/blake2.c')
-rw-r--r--cipher/blake2.c52
1 files changed, 48 insertions, 4 deletions
diff --git a/cipher/blake2.c b/cipher/blake2.c
index f830c799..0f7494f2 100644
--- a/cipher/blake2.c
+++ b/cipher/blake2.c
@@ -30,6 +30,14 @@
#include "cipher.h"
#include "hash-common.h"
+/* USE_AVX indicates whether to compile with Intel AVX code. */
+#undef USE_AVX
+#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_AVX) && \
+ (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
+# define USE_AVX 1
+#endif
+
/* USE_AVX2 indicates whether to compile with Intel AVX2 code. */
#undef USE_AVX2
#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_AVX2) && \
@@ -121,6 +129,9 @@ typedef struct BLAKE2S_CONTEXT_S
byte buf[BLAKE2S_BLOCKBYTES];
size_t buflen;
size_t outlen;
+#ifdef USE_AVX
+ unsigned int use_avx:1;
+#endif
} BLAKE2S_CONTEXT;
typedef unsigned int (*blake2_transform_t)(void *S, const void *inblk,
@@ -479,8 +490,9 @@ static inline void blake2s_increment_counter(BLAKE2S_STATE *S, const int inc)
S->t[1] += (S->t[0] < (u32)inc) - (inc < 0);
}
-static unsigned int blake2s_transform(void *vS, const void *inblks,
- size_t nblks)
+static unsigned int blake2s_transform_generic(BLAKE2S_STATE *S,
+ const void *inblks,
+ size_t nblks)
{
static const byte blake2s_sigma[10][16] =
{
@@ -495,7 +507,6 @@ static unsigned int blake2s_transform(void *vS, const void *inblks,
{ 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 },
{ 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 },
};
- BLAKE2S_STATE *S = vS;
unsigned int burn = 0;
const byte* in = inblks;
u32 m[16];
@@ -594,6 +605,33 @@ static unsigned int blake2s_transform(void *vS, const void *inblks,
return burn;
}
+#ifdef USE_AVX
+unsigned int _gcry_blake2s_transform_amd64_avx(BLAKE2S_STATE *S,
+ const void *inblks,
+ size_t nblks) ASM_FUNC_ABI;
+#endif
+
+static unsigned int blake2s_transform(void *ctx, const void *inblks,
+ size_t nblks)
+{
+ BLAKE2S_CONTEXT *c = ctx;
+ unsigned int nburn;
+
+ if (0)
+ {}
+#ifdef USE_AVX
+ if (c->use_avx)
+ nburn = _gcry_blake2s_transform_amd64_avx(&c->state, inblks, nblks);
+#endif
+ else
+ nburn = blake2s_transform_generic(&c->state, inblks, nblks);
+
+ if (nburn)
+ nburn += ASM_EXTRA_STACK;
+
+ return nburn;
+}
+
static void blake2s_final(void *ctx)
{
BLAKE2S_CONTEXT *c = ctx;
@@ -609,7 +647,7 @@ static void blake2s_final(void *ctx)
memset (c->buf + c->buflen, 0, BLAKE2S_BLOCKBYTES - c->buflen); /* Padding */
blake2s_set_lastblock (S);
blake2s_increment_counter (S, (int)c->buflen - BLAKE2S_BLOCKBYTES);
- burn = blake2s_transform (S, c->buf, 1);
+ burn = blake2s_transform (ctx, c->buf, 1);
/* Output full hash to buffer */
for (i = 0; i < 8; ++i)
@@ -685,11 +723,17 @@ static gcry_err_code_t blake2s_init_ctx(void *ctx, unsigned int flags,
unsigned int dbits)
{
BLAKE2S_CONTEXT *c = ctx;
+ unsigned int features = _gcry_get_hw_features ();
+ (void)features;
(void)flags;
memset (c, 0, sizeof (*c));
+#ifdef USE_AVX
+ c->use_avx = !!(features & HWF_INTEL_AVX);
+#endif
+
c->outlen = dbits / 8;
c->buflen = 0;
return blake2s_init(c, key, keylen);