diff options
author | Jussi Kivilinna <jussi.kivilinna@iki.fi> | 2022-10-23 18:14:40 +0300 |
---|---|---|
committer | Jussi Kivilinna <jussi.kivilinna@iki.fi> | 2022-10-26 21:43:04 +0300 |
commit | d078a928f5c6024fde89388582b83742d2b8638a (patch) | |
tree | 99c9ce4cd35febc6e88c8a0e99660056d6793541 /cipher/twofish.c | |
parent | 14f39993d632815db68a5dca90e021891c9547ab (diff) | |
download | libgcrypt-d078a928f5c6024fde89388582b83742d2b8638a.tar.gz |
twofish: accelerate XTS and ECB modes
* cipher/twofish-amd64.S (_gcry_twofish_amd64_blk3): New.
* cipher/twofish-avx2-amd64.S (_gcry_twofish_avx2_blk16): New.
(_gcry_twofish_xts_crypt, _gcry_twofish_ecb_crypt)
(_gcry_twofish_avx2_blk16, _gcry_twofish_amd64_blk3)
(twofish_crypt_blk1_16, twofish_encrypt_blk1_16)
(twofish_decrypt_blk1_16): New.
(twofish_setkey): Setup XTS and ECB bulk functions.
--
Benchmark on AMD Ryzen 9 7900X:
Before:
TWOFISH | nanosecs/byte mebibytes/sec cycles/byte auto Mhz
ECB enc | 2.52 ns/B 378.2 MiB/s 14.18 c/B 5625
ECB dec | 2.51 ns/B 380.2 MiB/s 14.11 c/B 5625
XTS enc | 2.65 ns/B 359.9 MiB/s 14.91 c/B 5625
XTS dec | 2.63 ns/B 362.0 MiB/s 14.60 c/B 5541
After:
TWOFISH | nanosecs/byte mebibytes/sec cycles/byte auto Mhz
ECB enc | 1.60 ns/B 594.8 MiB/s 9.02 c/B 5625
ECB dec | 1.60 ns/B 594.8 MiB/s 9.02 c/B 5625
XTS enc | 1.66 ns/B 573.9 MiB/s 9.35 c/B 5625
XTS dec | 1.67 ns/B 569.6 MiB/s 9.41 c/B 5619±2
GnuPG-bug-id: T6242
Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Diffstat (limited to 'cipher/twofish.c')
-rw-r--r-- | cipher/twofish.c | 147 |
1 files changed, 144 insertions, 3 deletions
diff --git a/cipher/twofish.c b/cipher/twofish.c index b300715b..92c463fc 100644 --- a/cipher/twofish.c +++ b/cipher/twofish.c @@ -101,7 +101,12 @@ static size_t _gcry_twofish_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, int encrypt); static size_t _gcry_twofish_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, size_t nblocks); - +static void _gcry_twofish_xts_crypt (void *context, unsigned char *tweak, + void *outbuf_arg, const void *inbuf_arg, + size_t nblocks, int encrypt); +static void _gcry_twofish_ecb_crypt (void *context, void *outbuf_arg, + const void *inbuf_arg, size_t nblocks, + int encrypt); /* Structure for an expanded Twofish key. s contains the key-dependent * S-boxes composed with the MDS matrix; w contains the eight "whitening" @@ -775,7 +780,9 @@ twofish_setkey (void *context, const byte *key, unsigned int keylen, bulk_ops->cfb_dec = _gcry_twofish_cfb_dec; bulk_ops->ctr_enc = _gcry_twofish_ctr_enc; bulk_ops->ocb_crypt = _gcry_twofish_ocb_crypt; - bulk_ops->ocb_auth = _gcry_twofish_ocb_auth; + bulk_ops->ocb_auth = _gcry_twofish_ocb_auth; + bulk_ops->xts_crypt = _gcry_twofish_xts_crypt; + bulk_ops->ecb_crypt = _gcry_twofish_ecb_crypt; (void)hwfeatures; @@ -788,6 +795,9 @@ twofish_setkey (void *context, const byte *key, unsigned int keylen, /* Assembler implementations of Twofish using AVX2. Process 16 block in parallel. */ +extern void _gcry_twofish_avx2_blk16 (const TWOFISH_context *c, byte *out, + const byte *in, int encrypt) ASM_FUNC_ABI; + extern void _gcry_twofish_avx2_ctr_enc(const TWOFISH_context *ctx, unsigned char *out, const unsigned char *in, @@ -835,6 +845,9 @@ extern void _gcry_twofish_amd64_decrypt_block(const TWOFISH_context *c, byte *out, const byte *in); /* These assembly implementations process three blocks in parallel. */ +extern void _gcry_twofish_amd64_blk3(const TWOFISH_context *c, byte *out, + const byte *in, int encrypt); + extern void _gcry_twofish_amd64_ctr_enc(const TWOFISH_context *c, byte *out, const byte *in, byte *ctr); @@ -1501,7 +1514,7 @@ _gcry_twofish_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, blkn += 3; twofish_amd64_ocb_auth(ctx, abuf, c->u_mode.ocb.aad_offset, - c->u_mode.ocb.aad_sum, Ls); + c->u_mode.ocb.aad_sum, Ls); nblocks -= 3; abuf += 3 * TWOFISH_BLOCKSIZE; @@ -1527,6 +1540,134 @@ _gcry_twofish_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, } +static unsigned int +twofish_crypt_blk1_16(const void *context, byte *out, const byte *in, + unsigned int num_blks, int encrypt) +{ + const TWOFISH_context *ctx = context; + unsigned int burn, burn_stack_depth = 0; + +#ifdef USE_AVX2 + if (num_blks == 16 && ctx->use_avx2) + { + _gcry_twofish_avx2_blk16 (ctx, out, in, encrypt); + return 0; + } +#endif + +#ifdef USE_AMD64_ASM + while (num_blks >= 3) + { + _gcry_twofish_amd64_blk3 (ctx, out, in, encrypt); + burn = 8 * sizeof(void *); + burn_stack_depth = (burn > burn_stack_depth) ? burn : burn_stack_depth; + out += 3 * TWOFISH_BLOCKSIZE; + in += 3 * TWOFISH_BLOCKSIZE; + num_blks -= 3; + } +#endif + + while (num_blks >= 1) + { + if (encrypt) + burn = twofish_encrypt((void *)ctx, out, in); + else + burn = twofish_decrypt((void *)ctx, out, in); + + burn_stack_depth = (burn > burn_stack_depth) ? burn : burn_stack_depth; + out += TWOFISH_BLOCKSIZE; + in += TWOFISH_BLOCKSIZE; + num_blks--; + } + + return burn_stack_depth; +} + +static unsigned int +twofish_encrypt_blk1_16(const void *ctx, byte *out, const byte *in, + unsigned int num_blks) +{ + return twofish_crypt_blk1_16 (ctx, out, in, num_blks, 1); +} + +static unsigned int +twofish_decrypt_blk1_16(const void *ctx, byte *out, const byte *in, + unsigned int num_blks) +{ + return twofish_crypt_blk1_16 (ctx, out, in, num_blks, 0); +} + + +/* Bulk encryption/decryption of complete blocks in XTS mode. */ +static void +_gcry_twofish_xts_crypt (void *context, unsigned char *tweak, void *outbuf_arg, + const void *inbuf_arg, size_t nblocks, int encrypt) +{ + TWOFISH_context *ctx = context; + unsigned char *outbuf = outbuf_arg; + const unsigned char *inbuf = inbuf_arg; + int burn_stack_depth = 0; + + /* Process remaining blocks. */ + if (nblocks) + { + unsigned char tmpbuf[16 * 16]; + unsigned int tmp_used = 16; + size_t tmpbufsize = 15 * 16; + size_t nburn; + +#ifdef USE_AVX2 + if (ctx->use_avx2) + tmpbufsize = 16 * 16; +#endif + + nburn = bulk_xts_crypt_128(ctx, encrypt ? twofish_encrypt_blk1_16 + : twofish_decrypt_blk1_16, + outbuf, inbuf, nblocks, + tweak, tmpbuf, tmpbufsize / 16, + &tmp_used); + burn_stack_depth = nburn > burn_stack_depth ? nburn : burn_stack_depth; + + wipememory(tmpbuf, tmp_used); + } + + if (burn_stack_depth) + _gcry_burn_stack(burn_stack_depth); +} + + +/* Bulk encryption/decryption in ECB mode. */ +static void +_gcry_twofish_ecb_crypt (void *context, void *outbuf_arg, const void *inbuf_arg, + size_t nblocks, int encrypt) +{ + TWOFISH_context *ctx = context; + unsigned char *outbuf = outbuf_arg; + const unsigned char *inbuf = inbuf_arg; + int burn_stack_depth = 0; + + /* Process remaining blocks. */ + if (nblocks) + { + size_t fn_maxblocks = 15; + size_t nburn; + +#ifdef USE_AVX2 + if (ctx->use_avx2) + fn_maxblocks = 16; +#endif + + nburn = bulk_ecb_crypt_128(ctx, encrypt ? twofish_encrypt_blk1_16 + : twofish_decrypt_blk1_16, + outbuf, inbuf, nblocks, fn_maxblocks); + burn_stack_depth = nburn > burn_stack_depth ? nburn : burn_stack_depth; + } + + if (burn_stack_depth) + _gcry_burn_stack(burn_stack_depth); +} + + /* Test a single encryption and decryption with each key size. */ |