summaryrefslogtreecommitdiff
path: root/src/g10lib.h
diff options
context:
space:
mode:
authorJussi Kivilinna <jussi.kivilinna@iki.fi>2017-01-04 10:18:36 +0200
committerJussi Kivilinna <jussi.kivilinna@iki.fi>2017-01-06 12:48:20 +0200
commitc59a8ce51ceb9a80169c44ef86a67e95cf8528c3 (patch)
tree79900afec0b7eaeb7b47d0de95159f11648da4d3 /src/g10lib.h
parent232a129b1f915fc54881506e4b07c89cf84932e6 (diff)
downloadlibgcrypt-c59a8ce51ceb9a80169c44ef86a67e95cf8528c3.tar.gz
Add AVX2/vpgather bulk implementation of Twofish
* cipher/Makefile.am: Add 'twofish-avx2-amd64.S'. * cipher/twofish-avx2-amd64.S: New. * cipher/twofish.c (USE_AVX2): New. (TWOFISH_context) [USE_AVX2]: Add 'use_avx2' member. (ASM_FUNC_ABI): New. (twofish_setkey): Add check for AVX2 and fast VPGATHER HW features. (_gcry_twofish_avx2_ctr_enc, _gcry_twofish_avx2_cbc_dec) (_gcry_twofish_avx2_cfb_dec, _gcry_twofish_avx2_ocb_enc) (_gcry_twofish_avx2_ocb_dec, _gcry_twofish_avx2_ocb_auth): New. (_gcry_twofish_ctr_enc, _gcry_twofish_cbc_dec, _gcry_twofish_cfb_dec) (_gcry_twofish_ocb_crypt, _gcry_twofish_ocb_auth): Add AVX2 bulk handling. (selftest_ctr, selftest_cbc, selftest_cfb): Increase nblocks from 3+X to 16+X. * configure.ac: Add 'twofish-avx2-amd64.lo'. * src/g10lib.h (HWF_INTEL_FAST_VPGATHER): New. * src/hwf-x86.c (detect_x86_gnuc): Add detection for HWF_INTEL_FAST_VPGATHER. * src/hwfeatures.c (HWF_INTEL_FAST_VPGATHER): Add "intel-fast-vpgather" for HWF_INTEL_FAST_VPGATHER. -- Benchmark on Intel Core i3-6100 (3.7 Ghz): Before: TWOFISH | nanosecs/byte mebibytes/sec cycles/byte ECB enc | 4.25 ns/B 224.5 MiB/s 15.71 c/B ECB dec | 4.16 ns/B 229.5 MiB/s 15.38 c/B CBC enc | 4.53 ns/B 210.4 MiB/s 16.77 c/B CBC dec | 2.71 ns/B 351.6 MiB/s 10.04 c/B CFB enc | 4.60 ns/B 207.3 MiB/s 17.02 c/B CFB dec | 2.70 ns/B 353.5 MiB/s 9.98 c/B OFB enc | 4.25 ns/B 224.2 MiB/s 15.74 c/B OFB dec | 4.24 ns/B 225.0 MiB/s 15.68 c/B CTR enc | 2.72 ns/B 350.6 MiB/s 10.06 c/B CTR dec | 2.72 ns/B 350.7 MiB/s 10.06 c/B CCM enc | 7.25 ns/B 131.5 MiB/s 26.83 c/B CCM dec | 7.25 ns/B 131.5 MiB/s 26.83 c/B CCM auth | 4.57 ns/B 208.9 MiB/s 16.89 c/B GCM enc | 3.02 ns/B 315.3 MiB/s 11.19 c/B GCM dec | 3.02 ns/B 315.6 MiB/s 11.18 c/B GCM auth | 0.297 ns/B 3208.4 MiB/s 1.10 c/B OCB enc | 2.73 ns/B 349.7 MiB/s 10.09 c/B OCB dec | 2.82 ns/B 338.3 MiB/s 10.43 c/B OCB auth | 2.77 ns/B 343.7 MiB/s 10.27 c/B After (CBC-dec & CFB-dec & CTR & OCB, ~1.5x faster): TWOFISH | nanosecs/byte mebibytes/sec cycles/byte ECB enc | 4.25 ns/B 224.2 MiB/s 15.74 c/B ECB dec | 4.15 ns/B 229.5 MiB/s 15.37 c/B CBC enc | 4.61 ns/B 206.8 MiB/s 17.06 c/B CBC dec | 1.75 ns/B 544.0 MiB/s 6.49 c/B CFB enc | 4.52 ns/B 211.0 MiB/s 16.72 c/B CFB dec | 1.72 ns/B 554.1 MiB/s 6.37 c/B OFB enc | 4.27 ns/B 223.3 MiB/s 15.80 c/B OFB dec | 4.28 ns/B 222.7 MiB/s 15.84 c/B CTR enc | 1.73 ns/B 549.9 MiB/s 6.42 c/B CTR dec | 1.75 ns/B 545.1 MiB/s 6.47 c/B CCM enc | 6.31 ns/B 151.2 MiB/s 23.34 c/B CCM dec | 6.42 ns/B 148.5 MiB/s 23.76 c/B CCM auth | 4.56 ns/B 208.9 MiB/s 16.89 c/B GCM enc | 1.90 ns/B 502.8 MiB/s 7.02 c/B GCM dec | 2.00 ns/B 477.8 MiB/s 7.38 c/B GCM auth | 0.300 ns/B 3178.6 MiB/s 1.11 c/B OCB enc | 1.76 ns/B 542.2 MiB/s 6.51 c/B OCB dec | 1.76 ns/B 540.7 MiB/s 6.53 c/B OCB auth | 1.76 ns/B 542.8 MiB/s 6.50 c/B Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Diffstat (limited to 'src/g10lib.h')
-rw-r--r--src/g10lib.h43
1 files changed, 22 insertions, 21 deletions
diff --git a/src/g10lib.h b/src/g10lib.h
index f0a46289..1308cffb 100644
--- a/src/g10lib.h
+++ b/src/g10lib.h
@@ -196,27 +196,28 @@ char **_gcry_strtokenize (const char *string, const char *delim);
/*-- src/hwfeatures.c --*/
-#define HWF_PADLOCK_RNG (1 << 0)
-#define HWF_PADLOCK_AES (1 << 1)
-#define HWF_PADLOCK_SHA (1 << 2)
-#define HWF_PADLOCK_MMUL (1 << 3)
-
-#define HWF_INTEL_CPU (1 << 4)
-#define HWF_INTEL_FAST_SHLD (1 << 5)
-#define HWF_INTEL_BMI2 (1 << 6)
-#define HWF_INTEL_SSSE3 (1 << 7)
-#define HWF_INTEL_SSE4_1 (1 << 8)
-#define HWF_INTEL_PCLMUL (1 << 9)
-#define HWF_INTEL_AESNI (1 << 10)
-#define HWF_INTEL_RDRAND (1 << 11)
-#define HWF_INTEL_AVX (1 << 12)
-#define HWF_INTEL_AVX2 (1 << 13)
-
-#define HWF_ARM_NEON (1 << 14)
-#define HWF_ARM_AES (1 << 15)
-#define HWF_ARM_SHA1 (1 << 16)
-#define HWF_ARM_SHA2 (1 << 17)
-#define HWF_ARM_PMULL (1 << 18)
+#define HWF_PADLOCK_RNG (1 << 0)
+#define HWF_PADLOCK_AES (1 << 1)
+#define HWF_PADLOCK_SHA (1 << 2)
+#define HWF_PADLOCK_MMUL (1 << 3)
+
+#define HWF_INTEL_CPU (1 << 4)
+#define HWF_INTEL_FAST_SHLD (1 << 5)
+#define HWF_INTEL_BMI2 (1 << 6)
+#define HWF_INTEL_SSSE3 (1 << 7)
+#define HWF_INTEL_SSE4_1 (1 << 8)
+#define HWF_INTEL_PCLMUL (1 << 9)
+#define HWF_INTEL_AESNI (1 << 10)
+#define HWF_INTEL_RDRAND (1 << 11)
+#define HWF_INTEL_AVX (1 << 12)
+#define HWF_INTEL_AVX2 (1 << 13)
+#define HWF_INTEL_FAST_VPGATHER (1 << 14)
+
+#define HWF_ARM_NEON (1 << 15)
+#define HWF_ARM_AES (1 << 16)
+#define HWF_ARM_SHA1 (1 << 17)
+#define HWF_ARM_SHA2 (1 << 18)
+#define HWF_ARM_PMULL (1 << 19)
gpg_err_code_t _gcry_disable_hw_feature (const char *name);