summaryrefslogtreecommitdiff
path: root/cipher/crc.c
diff options
context:
space:
mode:
authorJussi Kivilinna <jussi.kivilinna@iki.fi>2019-09-15 22:48:38 +0300
committerJussi Kivilinna <jussi.kivilinna@iki.fi>2019-09-15 22:52:07 +0300
commit0486b85bd1fb65013e77f858cae9ea4530f868df (patch)
tree18ba46bd149f34f815f58c104951611cfc4f24d1 /cipher/crc.c
parent557702f0d53a7ad1cf2ce0333c9df799a8abad59 (diff)
downloadlibgcrypt-0486b85bd1fb65013e77f858cae9ea4530f868df.tar.gz
Add PowerPC vpmsum implementation of CRC
* cipher/Makefile.am: Add 'crc-ppc.c'. * cipher/crc-armv8-ce.c: Remove 'USE_INTEL_PCLMUL' comment. * cipher/crc-ppc.c: New. * cipher/crc.c (USE_PPC_VPMSUM): New. (CRC_CONTEXT): Add 'use_vpmsum'. (_gcry_crc32_ppc8_vpmsum, _gcry_crc24rfc2440_ppc8_vpmsum): New. (crc32_init, crc24rfc2440_init): Add HWF check for 'use_vpmsum'. (crc32_write, crc24rfc2440_write): Add 'use_vpmsum' code-path. * configure.ac: Add 'vpmsumd' instruction to PowerPC VSX inline assembly check; Add 'crc-ppc.lo'. -- Benchmark on POWER8 (ppc64le, ~3.8Ghz): Before: | nanosecs/byte mebibytes/sec cycles/byte CRC32 | 0.978 ns/B 975.0 MiB/s 3.72 c/B CRC24RFC2440 | 0.974 ns/B 978.8 MiB/s 3.70 c/B After(~22x faster): | nanosecs/byte mebibytes/sec cycles/byte CRC32 | 0.044 ns/B 21878 MiB/s 0.166 c/B CRC24RFC2440 | 0.043 ns/B 22077 MiB/s 0.164 c/B Benchmark on POWER9 (ppc64le, ~3.8Ghz): Before: | nanosecs/byte mebibytes/sec cycles/byte CRC32 | 1.01 ns/B 943.7 MiB/s 3.84 c/B CRC24RFC2440 | 0.993 ns/B 960.6 MiB/s 3.77 c/B After (~20x faster): | nanosecs/byte mebibytes/sec cycles/byte CRC32 | 0.046 ns/B 20675 MiB/s 0.175 c/B CRC24RFC2440 | 0.048 ns/B 19691 MiB/s 0.184 c/B GnuPG-bug-id: 4460 Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Diffstat (limited to 'cipher/crc.c')
-rw-r--r--cipher/crc.c46
1 files changed, 46 insertions, 0 deletions
diff --git a/cipher/crc.c b/cipher/crc.c
index 2abbab28..6d70f644 100644
--- a/cipher/crc.c
+++ b/cipher/crc.c
@@ -52,6 +52,19 @@
# endif
#endif /* USE_ARM_PMULL */
+/* USE_PPC_VPMSUM indicates whether to enable PowerPC vector
+ * accelerated code. */
+#undef USE_PPC_VPMSUM
+#ifdef ENABLE_PPC_CRYPTO_SUPPORT
+# if defined(HAVE_COMPATIBLE_CC_PPC_ALTIVEC) && \
+ defined(HAVE_GCC_INLINE_ASM_PPC_ALTIVEC)
+# if __GNUC__ >= 4
+# define USE_PPC_VPMSUM 1
+# endif
+# endif
+#endif /* USE_PPC_VPMSUM */
+
+
typedef struct
{
u32 CRC;
@@ -61,6 +74,9 @@ typedef struct
#ifdef USE_ARM_PMULL
unsigned int use_pmull:1; /* ARMv8 PMULL shall be used. */
#endif
+#ifdef USE_PPC_VPMSUM
+ unsigned int use_vpmsum:1; /* POWER vpmsum shall be used. */
+#endif
byte buf[4];
}
CRC_CONTEXT;
@@ -80,6 +96,13 @@ void _gcry_crc24rfc2440_armv8_ce_pmull (u32 *pcrc, const byte *inbuf,
size_t inlen);
#endif
+#ifdef USE_PPC_VPMSUM
+/*-- crc-ppc.c --*/
+void _gcry_crc32_ppc8_vpmsum (u32 *pcrc, const byte *inbuf, size_t inlen);
+void _gcry_crc24rfc2440_ppc8_vpmsum (u32 *pcrc, const byte *inbuf,
+ size_t inlen);
+#endif
+
/*
* Code generated by universal_crc by Danjel McGougan
@@ -388,6 +411,9 @@ crc32_init (void *context, unsigned int flags)
#ifdef USE_ARM_PMULL
ctx->use_pmull = (hwf & HWF_ARM_NEON) && (hwf & HWF_ARM_PMULL);
#endif
+#ifdef USE_PPC_VPMSUM
+ ctx->use_vpmsum = !!(hwf & HWF_PPC_ARCH_2_07);
+#endif
(void)flags;
(void)hwf;
@@ -416,6 +442,13 @@ crc32_write (void *context, const void *inbuf_arg, size_t inlen)
return;
}
#endif
+#ifdef USE_PPC_VPMSUM
+ if (ctx->use_vpmsum)
+ {
+ _gcry_crc32_ppc8_vpmsum(&ctx->CRC, inbuf, inlen);
+ return;
+ }
+#endif
if (!inbuf || !inlen)
return;
@@ -477,6 +510,9 @@ crc32rfc1510_init (void *context, unsigned int flags)
#ifdef USE_ARM_PMULL
ctx->use_pmull = (hwf & HWF_ARM_NEON) && (hwf & HWF_ARM_PMULL);
#endif
+#ifdef USE_PPC_VPMSUM
+ ctx->use_vpmsum = !!(hwf & HWF_PPC_ARCH_2_07);
+#endif
(void)flags;
(void)hwf;
@@ -811,6 +847,9 @@ crc24rfc2440_init (void *context, unsigned int flags)
#ifdef USE_ARM_PMULL
ctx->use_pmull = (hwf & HWF_ARM_NEON) && (hwf & HWF_ARM_PMULL);
#endif
+#ifdef USE_PPC_VPMSUM
+ ctx->use_vpmsum = !!(hwf & HWF_PPC_ARCH_2_07);
+#endif
(void)hwf;
(void)flags;
@@ -839,6 +878,13 @@ crc24rfc2440_write (void *context, const void *inbuf_arg, size_t inlen)
return;
}
#endif
+#ifdef USE_PPC_VPMSUM
+ if (ctx->use_vpmsum)
+ {
+ _gcry_crc24rfc2440_ppc8_vpmsum(&ctx->CRC, inbuf, inlen);
+ return;
+ }
+#endif
if (!inbuf || !inlen)
return;