summaryrefslogtreecommitdiff
path: root/cipher/bulkhelp.h
diff options
context:
space:
mode:
authorJussi Kivilinna <jussi.kivilinna@iki.fi>2022-04-24 01:30:30 +0300
committerJussi Kivilinna <jussi.kivilinna@iki.fi>2022-04-29 23:49:50 +0300
commit32b18cdb87b771f5c1ec87ef5e0f115f3f2d362f (patch)
treefc9497449d4872a9d9e287124a27eafae82c3e3b /cipher/bulkhelp.h
parentbacdc1de3f4fe063054af4e36e7fdfa5b00ccb64 (diff)
downloadlibgcrypt-32b18cdb87b771f5c1ec87ef5e0f115f3f2d362f.tar.gz
camellia-avx2: add bulk processing for XTS mode
* cipher/bulkhelp.h (bulk_xts_crypt_128): New. * cipher/camellia-glue.c (_gcry_camellia_xts_crypt): New. (camellia_set_key) [USE_AESNI_AVX2]: Set XTS bulk function if AVX2 implementation is available. -- Benchmark on AMD Ryzen 5800X: Before: CAMELLIA128 | nanosecs/byte mebibytes/sec cycles/byte auto Mhz XTS enc | 3.79 ns/B 251.8 MiB/s 18.37 c/B 4850 XTS dec | 3.77 ns/B 253.2 MiB/s 18.27 c/B 4850 After (6.8x faster): CAMELLIA128 | nanosecs/byte mebibytes/sec cycles/byte auto Mhz XTS enc | 0.554 ns/B 1720 MiB/s 2.69 c/B 4850 XTS dec | 0.541 ns/B 1762 MiB/s 2.63 c/B 4850 Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Diffstat (limited to 'cipher/bulkhelp.h')
-rw-r--r--cipher/bulkhelp.h68
1 files changed, 68 insertions, 0 deletions
diff --git a/cipher/bulkhelp.h b/cipher/bulkhelp.h
index c9ecaba6..b1b4b2e1 100644
--- a/cipher/bulkhelp.h
+++ b/cipher/bulkhelp.h
@@ -325,4 +325,72 @@ bulk_ocb_auth_128 (gcry_cipher_hd_t c, void *priv, bulk_crypt_fn_t crypt_fn,
}
+static inline unsigned int
+bulk_xts_crypt_128 (void *priv, bulk_crypt_fn_t crypt_fn, byte *outbuf,
+ const byte *inbuf, size_t nblocks, byte *tweak,
+ byte *tmpbuf, size_t tmpbuf_nblocks,
+ unsigned int *num_used_tmpblocks)
+{
+ u64 tweak_lo, tweak_hi, tweak_next_lo, tweak_next_hi, tmp_lo, tmp_hi, carry;
+ unsigned int tmp_used = 16;
+ unsigned int burn_depth = 0;
+ unsigned int nburn;
+
+ tweak_next_lo = buf_get_le64 (tweak + 0);
+ tweak_next_hi = buf_get_le64 (tweak + 8);
+
+ while (nblocks >= 1)
+ {
+ size_t curr_blks = nblocks > tmpbuf_nblocks ? tmpbuf_nblocks : nblocks;
+ size_t i;
+
+ if (curr_blks * 16 > tmp_used)
+ tmp_used = curr_blks * 16;
+
+ for (i = 0; i < curr_blks; i++)
+ {
+ tweak_lo = tweak_next_lo;
+ tweak_hi = tweak_next_hi;
+
+ /* Generate next tweak. */
+ carry = -(tweak_next_hi >> 63) & 0x87;
+ tweak_next_hi = (tweak_next_hi << 1) + (tweak_next_lo >> 63);
+ tweak_next_lo = (tweak_next_lo << 1) ^ carry;
+
+ /* Xor-Encrypt/Decrypt-Xor block. */
+ tmp_lo = buf_get_le64 (inbuf + i * 16 + 0) ^ tweak_lo;
+ tmp_hi = buf_get_le64 (inbuf + i * 16 + 8) ^ tweak_hi;
+ buf_put_he64 (&tmpbuf[i * 16 + 0], tweak_lo);
+ buf_put_he64 (&tmpbuf[i * 16 + 8], tweak_hi);
+ buf_put_le64 (outbuf + i * 16 + 0, tmp_lo);
+ buf_put_le64 (outbuf + i * 16 + 8, tmp_hi);
+ }
+
+ nburn = crypt_fn (priv, outbuf, outbuf, curr_blks);
+ burn_depth = nburn > burn_depth ? nburn : burn_depth;
+
+ for (i = 0; i < curr_blks; i++)
+ {
+ /* Xor-Encrypt/Decrypt-Xor block. */
+ tweak_lo = buf_get_he64 (&tmpbuf[i * 16 + 0]);
+ tweak_hi = buf_get_he64 (&tmpbuf[i * 16 + 8]);
+ tmp_lo = buf_get_le64 (outbuf + i * 16 + 0) ^ tweak_lo;
+ tmp_hi = buf_get_le64 (outbuf + i * 16 + 8) ^ tweak_hi;
+ buf_put_le64 (outbuf + i * 16 + 0, tmp_lo);
+ buf_put_le64 (outbuf + i * 16 + 8, tmp_hi);
+ }
+
+ inbuf += curr_blks * 16;
+ outbuf += curr_blks * 16;
+ nblocks -= curr_blks;
+ }
+
+ buf_put_le64 (tweak + 0, tweak_next_lo);
+ buf_put_le64 (tweak + 8, tweak_next_hi);
+
+ *num_used_tmpblocks = tmp_used;
+ return burn_depth;
+}
+
+
#endif /*GCRYPT_BULKHELP_H*/