summaryrefslogtreecommitdiff
path: root/cipher/cipher-ocb.c
diff options
context:
space:
mode:
authorJussi Kivilinna <jussi.kivilinna@iki.fi>2018-11-20 21:16:08 +0200
committerJussi Kivilinna <jussi.kivilinna@iki.fi>2018-11-20 21:16:08 +0200
commitb42de67f34871a2520cfe370af513f2aab6e4f75 (patch)
treebaeb81d5ad5d1a9688f0e7d123f639de7558386b /cipher/cipher-ocb.c
parentaf0bbdb9019e0b4a72e87e8b1b4a55506d349834 (diff)
downloadlibgcrypt-b42de67f34871a2520cfe370af513f2aab6e4f75.tar.gz
Optimizations for AES-NI OCB
* cipher/cipher-internal.h (gcry_cipher_handle): New pre-computed OCB values L0L1 and L0L1L0; Swap dimensions for OCB L table. * cipher/cipher-ocb.c (_gcry_cipher_ocb_set_nonce): Setup L0L1 and L0L1L0 values. (ocb_crypt): Process input in 24KiB chunks for better cache locality for checksumming. * cipher/rijndael-aesni.c (ALWAYS_INLINE): New macro for always inlining functions, change all functions with 'inline' to use ALWAYS_INLINE. (NO_INLINE): New macro. (aesni_prepare_2_6_variable, aesni_prepare_7_15_variable): Rename to... (aesni_prepare_2_7_variable, aesni_prepare_8_15_variable): ...these and adjust accordingly (xmm7 moved from *_7_15 to *_2_7). (aesni_prepare_2_6, aesni_prepare_7_15): Rename to... (aesni_prepare_2_7, aesni_prepare_8_15): ...these and adjust accordingly. (aesni_cleanup_2_6, aesni_cleanup_7_15): Rename to... (aesni_cleanup_2_7, aesni_cleanup_8_15): ...these and adjust accordingly. (aesni_ocb_checksum): New. (aesni_ocb_enc, aesni_ocb_dec): Calculate OCB offsets in parallel with help of pre-computed offsets L0+L1 ja L0+L1+L0; Do checksum calculation as separate pass instead of inline; Use NO_INLINE. (_gcry_aes_aesni_ocb_auth): Calculate OCB offsets in parallel with help of pre-computed offsets L0+L1 ja L0+L1+L0. * cipher/rijndael-internal.h (RIJNDAEL_context_s) [USE_AESNI]: Add 'use_avx2' and 'use_avx'. * cipher/rijndael.c (do_setkey) [USE_AESNI]: Set 'use_avx2' if Intel AVX2 HW feature is available and 'use_avx' if Intel AVX HW feature is available. * tests/basic.c (do_check_ocb_cipher): New test vector; increase size of temporary buffers for new test vector. (check_ocb_cipher_largebuf_split): Make test plaintext non-uniform for better checksum testing. (check_ocb_cipher_checksum): New. (check_ocb_cipher_largebuf): Call check_ocb_cipher_checksum. (check_ocb_cipher): New expected tags for check_ocb_cipher_largebuf test runs. -- Benchmark on Haswell i7-4970k @ 4.0Ghz: Before: AES | nanosecs/byte mebibytes/sec cycles/byte OCB enc | 0.175 ns/B 5436 MiB/s 0.702 c/B OCB dec | 0.184 ns/B 5184 MiB/s 0.736 c/B OCB auth | 0.156 ns/B 6097 MiB/s 0.626 c/B After (enc +2% faster, dec +7% faster): OCB enc | 0.172 ns/B 5547 MiB/s 0.688 c/B OCB dec | 0.171 ns/B 5582 MiB/s 0.683 c/B OCB auth | 0.156 ns/B 6097 MiB/s 0.626 c/B Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Diffstat (limited to 'cipher/cipher-ocb.c')
-rw-r--r--cipher/cipher-ocb.c11
1 files changed, 11 insertions, 0 deletions
diff --git a/cipher/cipher-ocb.c b/cipher/cipher-ocb.c
index f71520ad..58f7be7e 100644
--- a/cipher/cipher-ocb.c
+++ b/cipher/cipher-ocb.c
@@ -170,6 +170,11 @@ _gcry_cipher_ocb_set_nonce (gcry_cipher_hd_t c, const unsigned char *nonce,
double_block_cpy (c->u_mode.ocb.L[0], c->u_mode.ocb.L_dollar);
for (i = 1; i < OCB_L_TABLE_SIZE; i++)
double_block_cpy (c->u_mode.ocb.L[i], c->u_mode.ocb.L[i-1]);
+ /* Precalculated offsets L0+L1, L0+L1+L0 */
+ cipher_block_xor (c->u_mode.ocb.L0L1,
+ c->u_mode.ocb.L[0], c->u_mode.ocb.L[1], OCB_BLOCK_LEN);
+ cipher_block_xor (c->u_mode.ocb.L0L1L0,
+ c->u_mode.ocb.L[0], c->u_mode.ocb.L0L1, OCB_BLOCK_LEN);
/* Prepare the nonce. */
memset (ktop, 0, (OCB_BLOCK_LEN - noncelen));
@@ -519,6 +524,12 @@ ocb_crypt (gcry_cipher_hd_t c, int encrypt,
nblks = nblks < nmaxblks ? nblks : nmaxblks;
+ /* Since checksum xoring is done before/after encryption/decryption,
+ process input in 24KiB chunks to keep data loaded in L1 cache for
+ checksumming. */
+ if (nblks > 24 * 1024 / OCB_BLOCK_LEN)
+ nblks = 24 * 1024 / OCB_BLOCK_LEN;
+
/* Use a bulk method if available. */
if (nblks && c->bulk.ocb_crypt)
{