summaryrefslogtreecommitdiff
path: root/cipher/rijndael.c
diff options
context:
space:
mode:
authorJussi Kivilinna <jussi.kivilinna@iki.fi>2021-01-19 19:38:15 +0200
committerJussi Kivilinna <jussi.kivilinna@iki.fi>2021-02-28 19:29:26 +0200
commite72498a54fdead503fb0c1c93e2b747cd91e7681 (patch)
tree2551204ad3b601e39da6bd2fe57991f4693b69cd /cipher/rijndael.c
parentffe1d53197031eeb65601c5aeafe55e78079a75e (diff)
downloadlibgcrypt-e72498a54fdead503fb0c1c93e2b747cd91e7681.tar.gz
rijndael: add x86_64 VAES/AVX2 accelerated implementation
* cipher/Makefile.am: Add 'rijndael-vaes.c' and 'rijndael-vaes-avx2-amd64.S'. * cipher/rijndael-internal.h (USE_VAES): New. * cipher/rijndael-vaes-avx2-amd64.S: New. * cipher/rijndael-vaes.c: New. * cipher/rijndael.c (_gcry_aes_vaes_cfb_dec, _gcry_aes_vaes_cbc_dec) (_gcry_aes_vaes_ctr_enc, _gcry_aes_vaes_ocb_crypt) (_gcry_aes_vaes_xts_crypt): New. (do_setkey) [USE_VAES]: Add detection for VAES. (selftest_ctr_128, selftest_cbc_128, selftest_cfb_128) [USE_VAES]: Increase number of selftest blocks. * configure.ac: Add 'rijndael-vaes.lo' and 'rijndael-vaes-avx2-amd64.lo'. -- Patch adds VAES/AVX2 accelerated implementation for CBC-decryption, CFB-decryption, CTR-encryption, OCB-en/decryption and XTS-en/decryption. Benchmarks on AMD Ryzen 5800X: Before: AES | nanosecs/byte mebibytes/sec cycles/byte auto Mhz CBC dec | 0.067 ns/B 14314 MiB/s 0.323 c/B 4850 CFB dec | 0.067 ns/B 14322 MiB/s 0.323 c/B 4850 CTR enc | 0.066 ns/B 14429 MiB/s 0.321 c/B 4850 CTR dec | 0.066 ns/B 14433 MiB/s 0.320 c/B 4850 XTS enc | 0.087 ns/B 10910 MiB/s 0.424 c/B 4850 XTS dec | 0.088 ns/B 10856 MiB/s 0.426 c/B 4850 OCB enc | 0.070 ns/B 13633 MiB/s 0.339 c/B 4850 OCB dec | 0.069 ns/B 13911 MiB/s 0.332 c/B 4850 After (XTS ~1.7x faster, others ~1.9x faster): AES | nanosecs/byte mebibytes/sec cycles/byte auto Mhz CBC dec | 0.034 ns/B 28159 MiB/s 0.164 c/B 4850 CFB dec | 0.034 ns/B 27955 MiB/s 0.165 c/B 4850 CTR enc | 0.034 ns/B 28214 MiB/s 0.164 c/B 4850 CTR dec | 0.034 ns/B 28146 MiB/s 0.164 c/B 4850 XTS enc | 0.051 ns/B 18539 MiB/s 0.249 c/B 4850 XTS dec | 0.051 ns/B 18655 MiB/s 0.248 c/B 4850 GCM auth | 0.088 ns/B 10817 MiB/s 0.428 c/B 4850 OCB enc | 0.037 ns/B 25824 MiB/s 0.179 c/B 4850 OCB dec | 0.038 ns/B 25359 MiB/s 0.182 c/B 4850 Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Diffstat (limited to 'cipher/rijndael.c')
-rw-r--r--cipher/rijndael.c45
1 files changed, 45 insertions, 0 deletions
diff --git a/cipher/rijndael.c b/cipher/rijndael.c
index fe137327..0b529030 100644
--- a/cipher/rijndael.c
+++ b/cipher/rijndael.c
@@ -102,6 +102,26 @@ extern void _gcry_aes_aesni_xts_crypt (void *context, unsigned char *tweak,
size_t nblocks, int encrypt);
#endif
+#ifdef USE_VAES
+/* VAES (AMD64) accelerated implementation of AES */
+
+extern void _gcry_aes_vaes_cfb_dec (void *context, unsigned char *iv,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks);
+extern void _gcry_aes_vaes_cbc_dec (void *context, unsigned char *iv,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks);
+extern void _gcry_aes_vaes_ctr_enc (void *context, unsigned char *ctr,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks);
+extern size_t _gcry_aes_vaes_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
+ const void *inbuf_arg, size_t nblocks,
+ int encrypt);
+extern void _gcry_aes_vaes_xts_crypt (void *context, unsigned char *tweak,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks, int encrypt);
+#endif
+
#ifdef USE_SSSE3
/* SSSE3 (AMD64) vector permutation implementation of AES */
extern void _gcry_aes_ssse3_do_setkey(RIJNDAEL_context *ctx, const byte *key);
@@ -480,6 +500,19 @@ do_setkey (RIJNDAEL_context *ctx, const byte *key, const unsigned keylen,
bulk_ops->ocb_crypt = _gcry_aes_aesni_ocb_crypt;
bulk_ops->ocb_auth = _gcry_aes_aesni_ocb_auth;
bulk_ops->xts_crypt = _gcry_aes_aesni_xts_crypt;
+
+#ifdef USE_VAES
+ if ((hwfeatures & HWF_INTEL_VAES_VPCLMUL) &&
+ (hwfeatures & HWF_INTEL_AVX2))
+ {
+ /* Setup VAES bulk encryption routines. */
+ bulk_ops->cfb_dec = _gcry_aes_vaes_cfb_dec;
+ bulk_ops->cbc_dec = _gcry_aes_vaes_cbc_dec;
+ bulk_ops->ctr_enc = _gcry_aes_vaes_ctr_enc;
+ bulk_ops->ocb_crypt = _gcry_aes_vaes_ocb_crypt;
+ bulk_ops->xts_crypt = _gcry_aes_vaes_xts_crypt;
+ }
+#endif
}
#endif
#ifdef USE_PADLOCK
@@ -1644,7 +1677,11 @@ selftest_basic_256 (void)
static const char*
selftest_ctr_128 (void)
{
+#ifdef USE_VAES
+ const int nblocks = 16+1;
+#else
const int nblocks = 8+1;
+#endif
const int blocksize = BLOCKSIZE;
const int context_size = sizeof(RIJNDAEL_context);
@@ -1658,7 +1695,11 @@ selftest_ctr_128 (void)
static const char*
selftest_cbc_128 (void)
{
+#ifdef USE_VAES
+ const int nblocks = 16+2;
+#else
const int nblocks = 8+2;
+#endif
const int blocksize = BLOCKSIZE;
const int context_size = sizeof(RIJNDAEL_context);
@@ -1672,7 +1713,11 @@ selftest_cbc_128 (void)
static const char*
selftest_cfb_128 (void)
{
+#ifdef USE_VAES
+ const int nblocks = 16+2;
+#else
const int nblocks = 8+2;
+#endif
const int blocksize = BLOCKSIZE;
const int context_size = sizeof(RIJNDAEL_context);