diff options
author | Jussi Kivilinna <jussi.kivilinna@iki.fi> | 2021-01-19 19:38:15 +0200 |
---|---|---|
committer | Jussi Kivilinna <jussi.kivilinna@iki.fi> | 2021-02-28 19:29:26 +0200 |
commit | e72498a54fdead503fb0c1c93e2b747cd91e7681 (patch) | |
tree | 2551204ad3b601e39da6bd2fe57991f4693b69cd /cipher/rijndael.c | |
parent | ffe1d53197031eeb65601c5aeafe55e78079a75e (diff) | |
download | libgcrypt-e72498a54fdead503fb0c1c93e2b747cd91e7681.tar.gz |
rijndael: add x86_64 VAES/AVX2 accelerated implementation
* cipher/Makefile.am: Add 'rijndael-vaes.c' and
'rijndael-vaes-avx2-amd64.S'.
* cipher/rijndael-internal.h (USE_VAES): New.
* cipher/rijndael-vaes-avx2-amd64.S: New.
* cipher/rijndael-vaes.c: New.
* cipher/rijndael.c (_gcry_aes_vaes_cfb_dec, _gcry_aes_vaes_cbc_dec)
(_gcry_aes_vaes_ctr_enc, _gcry_aes_vaes_ocb_crypt)
(_gcry_aes_vaes_xts_crypt): New.
(do_setkey) [USE_VAES]: Add detection for VAES.
(selftest_ctr_128, selftest_cbc_128, selftest_cfb_128)
[USE_VAES]: Increase number of selftest blocks.
* configure.ac: Add 'rijndael-vaes.lo' and
'rijndael-vaes-avx2-amd64.lo'.
--
Patch adds VAES/AVX2 accelerated implementation for CBC-decryption,
CFB-decryption, CTR-encryption, OCB-en/decryption and XTS-en/decryption.
Benchmarks on AMD Ryzen 5800X:
Before:
AES | nanosecs/byte mebibytes/sec cycles/byte auto Mhz
CBC dec | 0.067 ns/B 14314 MiB/s 0.323 c/B 4850
CFB dec | 0.067 ns/B 14322 MiB/s 0.323 c/B 4850
CTR enc | 0.066 ns/B 14429 MiB/s 0.321 c/B 4850
CTR dec | 0.066 ns/B 14433 MiB/s 0.320 c/B 4850
XTS enc | 0.087 ns/B 10910 MiB/s 0.424 c/B 4850
XTS dec | 0.088 ns/B 10856 MiB/s 0.426 c/B 4850
OCB enc | 0.070 ns/B 13633 MiB/s 0.339 c/B 4850
OCB dec | 0.069 ns/B 13911 MiB/s 0.332 c/B 4850
After (XTS ~1.7x faster, others ~1.9x faster):
AES | nanosecs/byte mebibytes/sec cycles/byte auto Mhz
CBC dec | 0.034 ns/B 28159 MiB/s 0.164 c/B 4850
CFB dec | 0.034 ns/B 27955 MiB/s 0.165 c/B 4850
CTR enc | 0.034 ns/B 28214 MiB/s 0.164 c/B 4850
CTR dec | 0.034 ns/B 28146 MiB/s 0.164 c/B 4850
XTS enc | 0.051 ns/B 18539 MiB/s 0.249 c/B 4850
XTS dec | 0.051 ns/B 18655 MiB/s 0.248 c/B 4850
GCM auth | 0.088 ns/B 10817 MiB/s 0.428 c/B 4850
OCB enc | 0.037 ns/B 25824 MiB/s 0.179 c/B 4850
OCB dec | 0.038 ns/B 25359 MiB/s 0.182 c/B 4850
Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Diffstat (limited to 'cipher/rijndael.c')
-rw-r--r-- | cipher/rijndael.c | 45 |
1 files changed, 45 insertions, 0 deletions
diff --git a/cipher/rijndael.c b/cipher/rijndael.c index fe137327..0b529030 100644 --- a/cipher/rijndael.c +++ b/cipher/rijndael.c @@ -102,6 +102,26 @@ extern void _gcry_aes_aesni_xts_crypt (void *context, unsigned char *tweak, size_t nblocks, int encrypt); #endif +#ifdef USE_VAES +/* VAES (AMD64) accelerated implementation of AES */ + +extern void _gcry_aes_vaes_cfb_dec (void *context, unsigned char *iv, + void *outbuf_arg, const void *inbuf_arg, + size_t nblocks); +extern void _gcry_aes_vaes_cbc_dec (void *context, unsigned char *iv, + void *outbuf_arg, const void *inbuf_arg, + size_t nblocks); +extern void _gcry_aes_vaes_ctr_enc (void *context, unsigned char *ctr, + void *outbuf_arg, const void *inbuf_arg, + size_t nblocks); +extern size_t _gcry_aes_vaes_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, + const void *inbuf_arg, size_t nblocks, + int encrypt); +extern void _gcry_aes_vaes_xts_crypt (void *context, unsigned char *tweak, + void *outbuf_arg, const void *inbuf_arg, + size_t nblocks, int encrypt); +#endif + #ifdef USE_SSSE3 /* SSSE3 (AMD64) vector permutation implementation of AES */ extern void _gcry_aes_ssse3_do_setkey(RIJNDAEL_context *ctx, const byte *key); @@ -480,6 +500,19 @@ do_setkey (RIJNDAEL_context *ctx, const byte *key, const unsigned keylen, bulk_ops->ocb_crypt = _gcry_aes_aesni_ocb_crypt; bulk_ops->ocb_auth = _gcry_aes_aesni_ocb_auth; bulk_ops->xts_crypt = _gcry_aes_aesni_xts_crypt; + +#ifdef USE_VAES + if ((hwfeatures & HWF_INTEL_VAES_VPCLMUL) && + (hwfeatures & HWF_INTEL_AVX2)) + { + /* Setup VAES bulk encryption routines. */ + bulk_ops->cfb_dec = _gcry_aes_vaes_cfb_dec; + bulk_ops->cbc_dec = _gcry_aes_vaes_cbc_dec; + bulk_ops->ctr_enc = _gcry_aes_vaes_ctr_enc; + bulk_ops->ocb_crypt = _gcry_aes_vaes_ocb_crypt; + bulk_ops->xts_crypt = _gcry_aes_vaes_xts_crypt; + } +#endif } #endif #ifdef USE_PADLOCK @@ -1644,7 +1677,11 @@ selftest_basic_256 (void) static const char* selftest_ctr_128 (void) { +#ifdef USE_VAES + const int nblocks = 16+1; +#else const int nblocks = 8+1; +#endif const int blocksize = BLOCKSIZE; const int context_size = sizeof(RIJNDAEL_context); @@ -1658,7 +1695,11 @@ selftest_ctr_128 (void) static const char* selftest_cbc_128 (void) { +#ifdef USE_VAES + const int nblocks = 16+2; +#else const int nblocks = 8+2; +#endif const int blocksize = BLOCKSIZE; const int context_size = sizeof(RIJNDAEL_context); @@ -1672,7 +1713,11 @@ selftest_cbc_128 (void) static const char* selftest_cfb_128 (void) { +#ifdef USE_VAES + const int nblocks = 16+2; +#else const int nblocks = 8+2; +#endif const int blocksize = BLOCKSIZE; const int context_size = sizeof(RIJNDAEL_context); |