diff options
author | Jussi Kivilinna <jussi.kivilinna@iki.fi> | 2019-11-05 22:06:39 +0200 |
---|---|---|
committer | Jussi Kivilinna <jussi.kivilinna@iki.fi> | 2019-11-05 22:06:39 +0200 |
commit | b52dde860963c794b12d14b0a9c5848bca7ba51e (patch) | |
tree | 3d6a5c5de2318fa347f61b0990eb900d43af4233 /cipher/sha512.c | |
parent | fbb26c3ac514189f50a31f72dde2a02ef7b41f9f (diff) | |
download | libgcrypt-b52dde860963c794b12d14b0a9c5848bca7ba51e.tar.gz |
Add i386/SSSE3 implementation of SHA512
* LICENSES: Add 'sha512-ssse3-i386.c'.
* configure.ac: Add 'sha512-ssse3-i386.lo'.
* cipher/Makefile.am: Add 'sha512-ssse3-i386.c'.
* cipher/sha512-ssse3-i386.c: New.
* cipher/sha512.c (USE_SSSE3_I386, _gcry_sha512_transform_i386_ssse3)
(do_sha512_transform_i386_ssse3): New.
(_gcry_sha512_transform_arm) [USE_SSSE3_I386]: Use i386/SSSE3 transform
function if supported by CPU.
--
Benchmark on AMD Ryzen 7 3700X:
Before:
| nanosecs/byte mebibytes/sec cycles/byte auto Mhz
SHA512 | 12.58 ns/B 75.79 MiB/s 55.06 c/B 4375
After (~4.5x faster):
| nanosecs/byte mebibytes/sec cycles/byte auto Mhz
SHA512 | 2.78 ns/B 343.3 MiB/s 12.09 c/B 4351
Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Diffstat (limited to 'cipher/sha512.c')
-rw-r--r-- | cipher/sha512.c | 26 |
1 files changed, 26 insertions, 0 deletions
diff --git a/cipher/sha512.c b/cipher/sha512.c index df9a449c..b603af8d 100644 --- a/cipher/sha512.c +++ b/cipher/sha512.c @@ -104,6 +104,14 @@ #endif +/* USE_SSSE3_I386 indicates whether to compile with Intel SSSE3/i386 code. */ +#undef USE_SSSE3_I386 +#if defined(__i386__) && SIZEOF_UNSIGNED_LONG == 4 && __GNUC__ >= 4 && \ + defined(HAVE_GCC_INLINE_ASM_SSSE3) +# define USE_SSSE3_I386 1 +#endif + + /* USE_PPC_CRYPTO indicates whether to enable PowerPC vector crypto * accelerated code. */ #undef USE_PPC_CRYPTO @@ -248,6 +256,20 @@ do_sha512_transform_amd64_avx2(void *ctx, const unsigned char *data, } #endif +#ifdef USE_SSSE3_I386 +unsigned int _gcry_sha512_transform_i386_ssse3(u64 state[8], + const unsigned char *input_data, + size_t num_blks); + +static unsigned int +do_sha512_transform_i386_ssse3(void *ctx, const unsigned char *data, + size_t nblks) +{ + SHA512_CONTEXT *hd = ctx; + return _gcry_sha512_transform_i386_ssse3 (&hd->state.h0, data, nblks); +} +#endif + #ifdef USE_ARM_ASM unsigned int _gcry_sha512_transform_arm (SHA512_STATE *hd, @@ -330,6 +352,10 @@ sha512_init_common (SHA512_CONTEXT *ctx, unsigned int flags) if ((features & HWF_PPC_VCRYPTO) != 0 && (features & HWF_PPC_ARCH_3_00) != 0) ctx->bctx.bwrite = do_sha512_transform_ppc9; #endif +#ifdef USE_SSSE3_I386 + if ((features & HWF_INTEL_SSSE3) != 0) + ctx->bctx.bwrite = do_sha512_transform_i386_ssse3; +#endif (void)features; } |