diff options
author | Jussi Kivilinna <jussi.kivilinna@iki.fi> | 2019-09-15 22:48:38 +0300 |
---|---|---|
committer | Jussi Kivilinna <jussi.kivilinna@iki.fi> | 2019-09-15 22:52:07 +0300 |
commit | 0486b85bd1fb65013e77f858cae9ea4530f868df (patch) | |
tree | 18ba46bd149f34f815f58c104951611cfc4f24d1 /cipher/crc.c | |
parent | 557702f0d53a7ad1cf2ce0333c9df799a8abad59 (diff) | |
download | libgcrypt-0486b85bd1fb65013e77f858cae9ea4530f868df.tar.gz |
Add PowerPC vpmsum implementation of CRC
* cipher/Makefile.am: Add 'crc-ppc.c'.
* cipher/crc-armv8-ce.c: Remove 'USE_INTEL_PCLMUL' comment.
* cipher/crc-ppc.c: New.
* cipher/crc.c (USE_PPC_VPMSUM): New.
(CRC_CONTEXT): Add 'use_vpmsum'.
(_gcry_crc32_ppc8_vpmsum, _gcry_crc24rfc2440_ppc8_vpmsum): New.
(crc32_init, crc24rfc2440_init): Add HWF check for 'use_vpmsum'.
(crc32_write, crc24rfc2440_write): Add 'use_vpmsum' code-path.
* configure.ac: Add 'vpmsumd' instruction to PowerPC VSX inline
assembly check; Add 'crc-ppc.lo'.
--
Benchmark on POWER8 (ppc64le, ~3.8Ghz):
Before:
| nanosecs/byte mebibytes/sec cycles/byte
CRC32 | 0.978 ns/B 975.0 MiB/s 3.72 c/B
CRC24RFC2440 | 0.974 ns/B 978.8 MiB/s 3.70 c/B
After(~22x faster):
| nanosecs/byte mebibytes/sec cycles/byte
CRC32 | 0.044 ns/B 21878 MiB/s 0.166 c/B
CRC24RFC2440 | 0.043 ns/B 22077 MiB/s 0.164 c/B
Benchmark on POWER9 (ppc64le, ~3.8Ghz):
Before:
| nanosecs/byte mebibytes/sec cycles/byte
CRC32 | 1.01 ns/B 943.7 MiB/s 3.84 c/B
CRC24RFC2440 | 0.993 ns/B 960.6 MiB/s 3.77 c/B
After (~20x faster):
| nanosecs/byte mebibytes/sec cycles/byte
CRC32 | 0.046 ns/B 20675 MiB/s 0.175 c/B
CRC24RFC2440 | 0.048 ns/B 19691 MiB/s 0.184 c/B
GnuPG-bug-id: 4460
Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Diffstat (limited to 'cipher/crc.c')
-rw-r--r-- | cipher/crc.c | 46 |
1 files changed, 46 insertions, 0 deletions
diff --git a/cipher/crc.c b/cipher/crc.c index 2abbab28..6d70f644 100644 --- a/cipher/crc.c +++ b/cipher/crc.c @@ -52,6 +52,19 @@ # endif #endif /* USE_ARM_PMULL */ +/* USE_PPC_VPMSUM indicates whether to enable PowerPC vector + * accelerated code. */ +#undef USE_PPC_VPMSUM +#ifdef ENABLE_PPC_CRYPTO_SUPPORT +# if defined(HAVE_COMPATIBLE_CC_PPC_ALTIVEC) && \ + defined(HAVE_GCC_INLINE_ASM_PPC_ALTIVEC) +# if __GNUC__ >= 4 +# define USE_PPC_VPMSUM 1 +# endif +# endif +#endif /* USE_PPC_VPMSUM */ + + typedef struct { u32 CRC; @@ -61,6 +74,9 @@ typedef struct #ifdef USE_ARM_PMULL unsigned int use_pmull:1; /* ARMv8 PMULL shall be used. */ #endif +#ifdef USE_PPC_VPMSUM + unsigned int use_vpmsum:1; /* POWER vpmsum shall be used. */ +#endif byte buf[4]; } CRC_CONTEXT; @@ -80,6 +96,13 @@ void _gcry_crc24rfc2440_armv8_ce_pmull (u32 *pcrc, const byte *inbuf, size_t inlen); #endif +#ifdef USE_PPC_VPMSUM +/*-- crc-ppc.c --*/ +void _gcry_crc32_ppc8_vpmsum (u32 *pcrc, const byte *inbuf, size_t inlen); +void _gcry_crc24rfc2440_ppc8_vpmsum (u32 *pcrc, const byte *inbuf, + size_t inlen); +#endif + /* * Code generated by universal_crc by Danjel McGougan @@ -388,6 +411,9 @@ crc32_init (void *context, unsigned int flags) #ifdef USE_ARM_PMULL ctx->use_pmull = (hwf & HWF_ARM_NEON) && (hwf & HWF_ARM_PMULL); #endif +#ifdef USE_PPC_VPMSUM + ctx->use_vpmsum = !!(hwf & HWF_PPC_ARCH_2_07); +#endif (void)flags; (void)hwf; @@ -416,6 +442,13 @@ crc32_write (void *context, const void *inbuf_arg, size_t inlen) return; } #endif +#ifdef USE_PPC_VPMSUM + if (ctx->use_vpmsum) + { + _gcry_crc32_ppc8_vpmsum(&ctx->CRC, inbuf, inlen); + return; + } +#endif if (!inbuf || !inlen) return; @@ -477,6 +510,9 @@ crc32rfc1510_init (void *context, unsigned int flags) #ifdef USE_ARM_PMULL ctx->use_pmull = (hwf & HWF_ARM_NEON) && (hwf & HWF_ARM_PMULL); #endif +#ifdef USE_PPC_VPMSUM + ctx->use_vpmsum = !!(hwf & HWF_PPC_ARCH_2_07); +#endif (void)flags; (void)hwf; @@ -811,6 +847,9 @@ crc24rfc2440_init (void *context, unsigned int flags) #ifdef USE_ARM_PMULL ctx->use_pmull = (hwf & HWF_ARM_NEON) && (hwf & HWF_ARM_PMULL); #endif +#ifdef USE_PPC_VPMSUM + ctx->use_vpmsum = !!(hwf & HWF_PPC_ARCH_2_07); +#endif (void)hwf; (void)flags; @@ -839,6 +878,13 @@ crc24rfc2440_write (void *context, const void *inbuf_arg, size_t inlen) return; } #endif +#ifdef USE_PPC_VPMSUM + if (ctx->use_vpmsum) + { + _gcry_crc24rfc2440_ppc8_vpmsum(&ctx->CRC, inbuf, inlen); + return; + } +#endif if (!inbuf || !inlen) return; |