diff options
author | Jussi Kivilinna <jussi.kivilinna@iki.fi> | 2022-04-24 01:30:30 +0300 |
---|---|---|
committer | Jussi Kivilinna <jussi.kivilinna@iki.fi> | 2022-04-29 23:49:50 +0300 |
commit | 32b18cdb87b771f5c1ec87ef5e0f115f3f2d362f (patch) | |
tree | fc9497449d4872a9d9e287124a27eafae82c3e3b /cipher/bulkhelp.h | |
parent | bacdc1de3f4fe063054af4e36e7fdfa5b00ccb64 (diff) | |
download | libgcrypt-32b18cdb87b771f5c1ec87ef5e0f115f3f2d362f.tar.gz |
camellia-avx2: add bulk processing for XTS mode
* cipher/bulkhelp.h (bulk_xts_crypt_128): New.
* cipher/camellia-glue.c (_gcry_camellia_xts_crypt): New.
(camellia_set_key) [USE_AESNI_AVX2]: Set XTS bulk function if AVX2
implementation is available.
--
Benchmark on AMD Ryzen 5800X:
Before:
CAMELLIA128 | nanosecs/byte mebibytes/sec cycles/byte auto Mhz
XTS enc | 3.79 ns/B 251.8 MiB/s 18.37 c/B 4850
XTS dec | 3.77 ns/B 253.2 MiB/s 18.27 c/B 4850
After (6.8x faster):
CAMELLIA128 | nanosecs/byte mebibytes/sec cycles/byte auto Mhz
XTS enc | 0.554 ns/B 1720 MiB/s 2.69 c/B 4850
XTS dec | 0.541 ns/B 1762 MiB/s 2.63 c/B 4850
Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Diffstat (limited to 'cipher/bulkhelp.h')
-rw-r--r-- | cipher/bulkhelp.h | 68 |
1 files changed, 68 insertions, 0 deletions
diff --git a/cipher/bulkhelp.h b/cipher/bulkhelp.h index c9ecaba6..b1b4b2e1 100644 --- a/cipher/bulkhelp.h +++ b/cipher/bulkhelp.h @@ -325,4 +325,72 @@ bulk_ocb_auth_128 (gcry_cipher_hd_t c, void *priv, bulk_crypt_fn_t crypt_fn, } +static inline unsigned int +bulk_xts_crypt_128 (void *priv, bulk_crypt_fn_t crypt_fn, byte *outbuf, + const byte *inbuf, size_t nblocks, byte *tweak, + byte *tmpbuf, size_t tmpbuf_nblocks, + unsigned int *num_used_tmpblocks) +{ + u64 tweak_lo, tweak_hi, tweak_next_lo, tweak_next_hi, tmp_lo, tmp_hi, carry; + unsigned int tmp_used = 16; + unsigned int burn_depth = 0; + unsigned int nburn; + + tweak_next_lo = buf_get_le64 (tweak + 0); + tweak_next_hi = buf_get_le64 (tweak + 8); + + while (nblocks >= 1) + { + size_t curr_blks = nblocks > tmpbuf_nblocks ? tmpbuf_nblocks : nblocks; + size_t i; + + if (curr_blks * 16 > tmp_used) + tmp_used = curr_blks * 16; + + for (i = 0; i < curr_blks; i++) + { + tweak_lo = tweak_next_lo; + tweak_hi = tweak_next_hi; + + /* Generate next tweak. */ + carry = -(tweak_next_hi >> 63) & 0x87; + tweak_next_hi = (tweak_next_hi << 1) + (tweak_next_lo >> 63); + tweak_next_lo = (tweak_next_lo << 1) ^ carry; + + /* Xor-Encrypt/Decrypt-Xor block. */ + tmp_lo = buf_get_le64 (inbuf + i * 16 + 0) ^ tweak_lo; + tmp_hi = buf_get_le64 (inbuf + i * 16 + 8) ^ tweak_hi; + buf_put_he64 (&tmpbuf[i * 16 + 0], tweak_lo); + buf_put_he64 (&tmpbuf[i * 16 + 8], tweak_hi); + buf_put_le64 (outbuf + i * 16 + 0, tmp_lo); + buf_put_le64 (outbuf + i * 16 + 8, tmp_hi); + } + + nburn = crypt_fn (priv, outbuf, outbuf, curr_blks); + burn_depth = nburn > burn_depth ? nburn : burn_depth; + + for (i = 0; i < curr_blks; i++) + { + /* Xor-Encrypt/Decrypt-Xor block. */ + tweak_lo = buf_get_he64 (&tmpbuf[i * 16 + 0]); + tweak_hi = buf_get_he64 (&tmpbuf[i * 16 + 8]); + tmp_lo = buf_get_le64 (outbuf + i * 16 + 0) ^ tweak_lo; + tmp_hi = buf_get_le64 (outbuf + i * 16 + 8) ^ tweak_hi; + buf_put_le64 (outbuf + i * 16 + 0, tmp_lo); + buf_put_le64 (outbuf + i * 16 + 8, tmp_hi); + } + + inbuf += curr_blks * 16; + outbuf += curr_blks * 16; + nblocks -= curr_blks; + } + + buf_put_le64 (tweak + 0, tweak_next_lo); + buf_put_le64 (tweak + 8, tweak_next_hi); + + *num_used_tmpblocks = tmp_used; + return burn_depth; +} + + #endif /*GCRYPT_BULKHELP_H*/ |