summaryrefslogtreecommitdiff
path: root/src/hwfeatures.c
diff options
context:
space:
mode:
authorJussi Kivilinna <jussi.kivilinna@iki.fi>2017-01-04 10:18:36 +0200
committerJussi Kivilinna <jussi.kivilinna@iki.fi>2017-01-06 12:48:20 +0200
commitc59a8ce51ceb9a80169c44ef86a67e95cf8528c3 (patch)
tree79900afec0b7eaeb7b47d0de95159f11648da4d3 /src/hwfeatures.c
parent232a129b1f915fc54881506e4b07c89cf84932e6 (diff)
downloadlibgcrypt-c59a8ce51ceb9a80169c44ef86a67e95cf8528c3.tar.gz
Add AVX2/vpgather bulk implementation of Twofish
* cipher/Makefile.am: Add 'twofish-avx2-amd64.S'. * cipher/twofish-avx2-amd64.S: New. * cipher/twofish.c (USE_AVX2): New. (TWOFISH_context) [USE_AVX2]: Add 'use_avx2' member. (ASM_FUNC_ABI): New. (twofish_setkey): Add check for AVX2 and fast VPGATHER HW features. (_gcry_twofish_avx2_ctr_enc, _gcry_twofish_avx2_cbc_dec) (_gcry_twofish_avx2_cfb_dec, _gcry_twofish_avx2_ocb_enc) (_gcry_twofish_avx2_ocb_dec, _gcry_twofish_avx2_ocb_auth): New. (_gcry_twofish_ctr_enc, _gcry_twofish_cbc_dec, _gcry_twofish_cfb_dec) (_gcry_twofish_ocb_crypt, _gcry_twofish_ocb_auth): Add AVX2 bulk handling. (selftest_ctr, selftest_cbc, selftest_cfb): Increase nblocks from 3+X to 16+X. * configure.ac: Add 'twofish-avx2-amd64.lo'. * src/g10lib.h (HWF_INTEL_FAST_VPGATHER): New. * src/hwf-x86.c (detect_x86_gnuc): Add detection for HWF_INTEL_FAST_VPGATHER. * src/hwfeatures.c (HWF_INTEL_FAST_VPGATHER): Add "intel-fast-vpgather" for HWF_INTEL_FAST_VPGATHER. -- Benchmark on Intel Core i3-6100 (3.7 Ghz): Before: TWOFISH | nanosecs/byte mebibytes/sec cycles/byte ECB enc | 4.25 ns/B 224.5 MiB/s 15.71 c/B ECB dec | 4.16 ns/B 229.5 MiB/s 15.38 c/B CBC enc | 4.53 ns/B 210.4 MiB/s 16.77 c/B CBC dec | 2.71 ns/B 351.6 MiB/s 10.04 c/B CFB enc | 4.60 ns/B 207.3 MiB/s 17.02 c/B CFB dec | 2.70 ns/B 353.5 MiB/s 9.98 c/B OFB enc | 4.25 ns/B 224.2 MiB/s 15.74 c/B OFB dec | 4.24 ns/B 225.0 MiB/s 15.68 c/B CTR enc | 2.72 ns/B 350.6 MiB/s 10.06 c/B CTR dec | 2.72 ns/B 350.7 MiB/s 10.06 c/B CCM enc | 7.25 ns/B 131.5 MiB/s 26.83 c/B CCM dec | 7.25 ns/B 131.5 MiB/s 26.83 c/B CCM auth | 4.57 ns/B 208.9 MiB/s 16.89 c/B GCM enc | 3.02 ns/B 315.3 MiB/s 11.19 c/B GCM dec | 3.02 ns/B 315.6 MiB/s 11.18 c/B GCM auth | 0.297 ns/B 3208.4 MiB/s 1.10 c/B OCB enc | 2.73 ns/B 349.7 MiB/s 10.09 c/B OCB dec | 2.82 ns/B 338.3 MiB/s 10.43 c/B OCB auth | 2.77 ns/B 343.7 MiB/s 10.27 c/B After (CBC-dec & CFB-dec & CTR & OCB, ~1.5x faster): TWOFISH | nanosecs/byte mebibytes/sec cycles/byte ECB enc | 4.25 ns/B 224.2 MiB/s 15.74 c/B ECB dec | 4.15 ns/B 229.5 MiB/s 15.37 c/B CBC enc | 4.61 ns/B 206.8 MiB/s 17.06 c/B CBC dec | 1.75 ns/B 544.0 MiB/s 6.49 c/B CFB enc | 4.52 ns/B 211.0 MiB/s 16.72 c/B CFB dec | 1.72 ns/B 554.1 MiB/s 6.37 c/B OFB enc | 4.27 ns/B 223.3 MiB/s 15.80 c/B OFB dec | 4.28 ns/B 222.7 MiB/s 15.84 c/B CTR enc | 1.73 ns/B 549.9 MiB/s 6.42 c/B CTR dec | 1.75 ns/B 545.1 MiB/s 6.47 c/B CCM enc | 6.31 ns/B 151.2 MiB/s 23.34 c/B CCM dec | 6.42 ns/B 148.5 MiB/s 23.76 c/B CCM auth | 4.56 ns/B 208.9 MiB/s 16.89 c/B GCM enc | 1.90 ns/B 502.8 MiB/s 7.02 c/B GCM dec | 2.00 ns/B 477.8 MiB/s 7.38 c/B GCM auth | 0.300 ns/B 3178.6 MiB/s 1.11 c/B OCB enc | 1.76 ns/B 542.2 MiB/s 6.51 c/B OCB dec | 1.76 ns/B 540.7 MiB/s 6.53 c/B OCB auth | 1.76 ns/B 542.8 MiB/s 6.50 c/B Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Diffstat (limited to 'src/hwfeatures.c')
-rw-r--r--src/hwfeatures.c39
1 files changed, 20 insertions, 19 deletions
diff --git a/src/hwfeatures.c b/src/hwfeatures.c
index 82f8bf25..b2ae7c3b 100644
--- a/src/hwfeatures.c
+++ b/src/hwfeatures.c
@@ -42,25 +42,26 @@ static struct
const char *desc;
} hwflist[] =
{
- { HWF_PADLOCK_RNG, "padlock-rng" },
- { HWF_PADLOCK_AES, "padlock-aes" },
- { HWF_PADLOCK_SHA, "padlock-sha" },
- { HWF_PADLOCK_MMUL, "padlock-mmul"},
- { HWF_INTEL_CPU, "intel-cpu" },
- { HWF_INTEL_FAST_SHLD, "intel-fast-shld" },
- { HWF_INTEL_BMI2, "intel-bmi2" },
- { HWF_INTEL_SSSE3, "intel-ssse3" },
- { HWF_INTEL_SSE4_1, "intel-sse4.1" },
- { HWF_INTEL_PCLMUL, "intel-pclmul" },
- { HWF_INTEL_AESNI, "intel-aesni" },
- { HWF_INTEL_RDRAND, "intel-rdrand" },
- { HWF_INTEL_AVX, "intel-avx" },
- { HWF_INTEL_AVX2, "intel-avx2" },
- { HWF_ARM_NEON, "arm-neon" },
- { HWF_ARM_AES, "arm-aes" },
- { HWF_ARM_SHA1, "arm-sha1" },
- { HWF_ARM_SHA2, "arm-sha2" },
- { HWF_ARM_PMULL, "arm-pmull" }
+ { HWF_PADLOCK_RNG, "padlock-rng" },
+ { HWF_PADLOCK_AES, "padlock-aes" },
+ { HWF_PADLOCK_SHA, "padlock-sha" },
+ { HWF_PADLOCK_MMUL, "padlock-mmul"},
+ { HWF_INTEL_CPU, "intel-cpu" },
+ { HWF_INTEL_FAST_SHLD, "intel-fast-shld" },
+ { HWF_INTEL_BMI2, "intel-bmi2" },
+ { HWF_INTEL_SSSE3, "intel-ssse3" },
+ { HWF_INTEL_SSE4_1, "intel-sse4.1" },
+ { HWF_INTEL_PCLMUL, "intel-pclmul" },
+ { HWF_INTEL_AESNI, "intel-aesni" },
+ { HWF_INTEL_RDRAND, "intel-rdrand" },
+ { HWF_INTEL_AVX, "intel-avx" },
+ { HWF_INTEL_AVX2, "intel-avx2" },
+ { HWF_INTEL_FAST_VPGATHER, "intel-fast-vpgather" },
+ { HWF_ARM_NEON, "arm-neon" },
+ { HWF_ARM_AES, "arm-aes" },
+ { HWF_ARM_SHA1, "arm-sha1" },
+ { HWF_ARM_SHA2, "arm-sha2" },
+ { HWF_ARM_PMULL, "arm-pmull" }
};
/* A bit vector with the hardware features which shall not be used.