summaryrefslogtreecommitdiff
path: root/cipher/sha512.c
diff options
context:
space:
mode:
authorJussi Kivilinna <jussi.kivilinna@iki.fi>2019-11-05 22:06:39 +0200
committerJussi Kivilinna <jussi.kivilinna@iki.fi>2019-11-05 22:06:39 +0200
commitb52dde860963c794b12d14b0a9c5848bca7ba51e (patch)
tree3d6a5c5de2318fa347f61b0990eb900d43af4233 /cipher/sha512.c
parentfbb26c3ac514189f50a31f72dde2a02ef7b41f9f (diff)
downloadlibgcrypt-b52dde860963c794b12d14b0a9c5848bca7ba51e.tar.gz
Add i386/SSSE3 implementation of SHA512
* LICENSES: Add 'sha512-ssse3-i386.c'. * configure.ac: Add 'sha512-ssse3-i386.lo'. * cipher/Makefile.am: Add 'sha512-ssse3-i386.c'. * cipher/sha512-ssse3-i386.c: New. * cipher/sha512.c (USE_SSSE3_I386, _gcry_sha512_transform_i386_ssse3) (do_sha512_transform_i386_ssse3): New. (_gcry_sha512_transform_arm) [USE_SSSE3_I386]: Use i386/SSSE3 transform function if supported by CPU. -- Benchmark on AMD Ryzen 7 3700X: Before: | nanosecs/byte mebibytes/sec cycles/byte auto Mhz SHA512 | 12.58 ns/B 75.79 MiB/s 55.06 c/B 4375 After (~4.5x faster): | nanosecs/byte mebibytes/sec cycles/byte auto Mhz SHA512 | 2.78 ns/B 343.3 MiB/s 12.09 c/B 4351 Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Diffstat (limited to 'cipher/sha512.c')
-rw-r--r--cipher/sha512.c26
1 files changed, 26 insertions, 0 deletions
diff --git a/cipher/sha512.c b/cipher/sha512.c
index df9a449c..b603af8d 100644
--- a/cipher/sha512.c
+++ b/cipher/sha512.c
@@ -104,6 +104,14 @@
#endif
+/* USE_SSSE3_I386 indicates whether to compile with Intel SSSE3/i386 code. */
+#undef USE_SSSE3_I386
+#if defined(__i386__) && SIZEOF_UNSIGNED_LONG == 4 && __GNUC__ >= 4 && \
+ defined(HAVE_GCC_INLINE_ASM_SSSE3)
+# define USE_SSSE3_I386 1
+#endif
+
+
/* USE_PPC_CRYPTO indicates whether to enable PowerPC vector crypto
* accelerated code. */
#undef USE_PPC_CRYPTO
@@ -248,6 +256,20 @@ do_sha512_transform_amd64_avx2(void *ctx, const unsigned char *data,
}
#endif
+#ifdef USE_SSSE3_I386
+unsigned int _gcry_sha512_transform_i386_ssse3(u64 state[8],
+ const unsigned char *input_data,
+ size_t num_blks);
+
+static unsigned int
+do_sha512_transform_i386_ssse3(void *ctx, const unsigned char *data,
+ size_t nblks)
+{
+ SHA512_CONTEXT *hd = ctx;
+ return _gcry_sha512_transform_i386_ssse3 (&hd->state.h0, data, nblks);
+}
+#endif
+
#ifdef USE_ARM_ASM
unsigned int _gcry_sha512_transform_arm (SHA512_STATE *hd,
@@ -330,6 +352,10 @@ sha512_init_common (SHA512_CONTEXT *ctx, unsigned int flags)
if ((features & HWF_PPC_VCRYPTO) != 0 && (features & HWF_PPC_ARCH_3_00) != 0)
ctx->bctx.bwrite = do_sha512_transform_ppc9;
#endif
+#ifdef USE_SSSE3_I386
+ if ((features & HWF_INTEL_SSSE3) != 0)
+ ctx->bctx.bwrite = do_sha512_transform_i386_ssse3;
+#endif
(void)features;
}