summaryrefslogtreecommitdiff
path: root/cipher/twofish.c
diff options
context:
space:
mode:
authorJussi Kivilinna <jussi.kivilinna@iki.fi>2022-10-23 18:14:40 +0300
committerJussi Kivilinna <jussi.kivilinna@iki.fi>2022-10-26 21:43:04 +0300
commitd078a928f5c6024fde89388582b83742d2b8638a (patch)
tree99c9ce4cd35febc6e88c8a0e99660056d6793541 /cipher/twofish.c
parent14f39993d632815db68a5dca90e021891c9547ab (diff)
downloadlibgcrypt-d078a928f5c6024fde89388582b83742d2b8638a.tar.gz
twofish: accelerate XTS and ECB modes
* cipher/twofish-amd64.S (_gcry_twofish_amd64_blk3): New. * cipher/twofish-avx2-amd64.S (_gcry_twofish_avx2_blk16): New. (_gcry_twofish_xts_crypt, _gcry_twofish_ecb_crypt) (_gcry_twofish_avx2_blk16, _gcry_twofish_amd64_blk3) (twofish_crypt_blk1_16, twofish_encrypt_blk1_16) (twofish_decrypt_blk1_16): New. (twofish_setkey): Setup XTS and ECB bulk functions. -- Benchmark on AMD Ryzen 9 7900X: Before: TWOFISH | nanosecs/byte mebibytes/sec cycles/byte auto Mhz ECB enc | 2.52 ns/B 378.2 MiB/s 14.18 c/B 5625 ECB dec | 2.51 ns/B 380.2 MiB/s 14.11 c/B 5625 XTS enc | 2.65 ns/B 359.9 MiB/s 14.91 c/B 5625 XTS dec | 2.63 ns/B 362.0 MiB/s 14.60 c/B 5541 After: TWOFISH | nanosecs/byte mebibytes/sec cycles/byte auto Mhz ECB enc | 1.60 ns/B 594.8 MiB/s 9.02 c/B 5625 ECB dec | 1.60 ns/B 594.8 MiB/s 9.02 c/B 5625 XTS enc | 1.66 ns/B 573.9 MiB/s 9.35 c/B 5625 XTS dec | 1.67 ns/B 569.6 MiB/s 9.41 c/B 5619±2 GnuPG-bug-id: T6242 Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Diffstat (limited to 'cipher/twofish.c')
-rw-r--r--cipher/twofish.c147
1 files changed, 144 insertions, 3 deletions
diff --git a/cipher/twofish.c b/cipher/twofish.c
index b300715b..92c463fc 100644
--- a/cipher/twofish.c
+++ b/cipher/twofish.c
@@ -101,7 +101,12 @@ static size_t _gcry_twofish_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
int encrypt);
static size_t _gcry_twofish_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
size_t nblocks);
-
+static void _gcry_twofish_xts_crypt (void *context, unsigned char *tweak,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks, int encrypt);
+static void _gcry_twofish_ecb_crypt (void *context, void *outbuf_arg,
+ const void *inbuf_arg, size_t nblocks,
+ int encrypt);
/* Structure for an expanded Twofish key. s contains the key-dependent
* S-boxes composed with the MDS matrix; w contains the eight "whitening"
@@ -775,7 +780,9 @@ twofish_setkey (void *context, const byte *key, unsigned int keylen,
bulk_ops->cfb_dec = _gcry_twofish_cfb_dec;
bulk_ops->ctr_enc = _gcry_twofish_ctr_enc;
bulk_ops->ocb_crypt = _gcry_twofish_ocb_crypt;
- bulk_ops->ocb_auth = _gcry_twofish_ocb_auth;
+ bulk_ops->ocb_auth = _gcry_twofish_ocb_auth;
+ bulk_ops->xts_crypt = _gcry_twofish_xts_crypt;
+ bulk_ops->ecb_crypt = _gcry_twofish_ecb_crypt;
(void)hwfeatures;
@@ -788,6 +795,9 @@ twofish_setkey (void *context, const byte *key, unsigned int keylen,
/* Assembler implementations of Twofish using AVX2. Process 16 block in
parallel.
*/
+extern void _gcry_twofish_avx2_blk16 (const TWOFISH_context *c, byte *out,
+ const byte *in, int encrypt) ASM_FUNC_ABI;
+
extern void _gcry_twofish_avx2_ctr_enc(const TWOFISH_context *ctx,
unsigned char *out,
const unsigned char *in,
@@ -835,6 +845,9 @@ extern void _gcry_twofish_amd64_decrypt_block(const TWOFISH_context *c,
byte *out, const byte *in);
/* These assembly implementations process three blocks in parallel. */
+extern void _gcry_twofish_amd64_blk3(const TWOFISH_context *c, byte *out,
+ const byte *in, int encrypt);
+
extern void _gcry_twofish_amd64_ctr_enc(const TWOFISH_context *c, byte *out,
const byte *in, byte *ctr);
@@ -1501,7 +1514,7 @@ _gcry_twofish_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
blkn += 3;
twofish_amd64_ocb_auth(ctx, abuf, c->u_mode.ocb.aad_offset,
- c->u_mode.ocb.aad_sum, Ls);
+ c->u_mode.ocb.aad_sum, Ls);
nblocks -= 3;
abuf += 3 * TWOFISH_BLOCKSIZE;
@@ -1527,6 +1540,134 @@ _gcry_twofish_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
}
+static unsigned int
+twofish_crypt_blk1_16(const void *context, byte *out, const byte *in,
+ unsigned int num_blks, int encrypt)
+{
+ const TWOFISH_context *ctx = context;
+ unsigned int burn, burn_stack_depth = 0;
+
+#ifdef USE_AVX2
+ if (num_blks == 16 && ctx->use_avx2)
+ {
+ _gcry_twofish_avx2_blk16 (ctx, out, in, encrypt);
+ return 0;
+ }
+#endif
+
+#ifdef USE_AMD64_ASM
+ while (num_blks >= 3)
+ {
+ _gcry_twofish_amd64_blk3 (ctx, out, in, encrypt);
+ burn = 8 * sizeof(void *);
+ burn_stack_depth = (burn > burn_stack_depth) ? burn : burn_stack_depth;
+ out += 3 * TWOFISH_BLOCKSIZE;
+ in += 3 * TWOFISH_BLOCKSIZE;
+ num_blks -= 3;
+ }
+#endif
+
+ while (num_blks >= 1)
+ {
+ if (encrypt)
+ burn = twofish_encrypt((void *)ctx, out, in);
+ else
+ burn = twofish_decrypt((void *)ctx, out, in);
+
+ burn_stack_depth = (burn > burn_stack_depth) ? burn : burn_stack_depth;
+ out += TWOFISH_BLOCKSIZE;
+ in += TWOFISH_BLOCKSIZE;
+ num_blks--;
+ }
+
+ return burn_stack_depth;
+}
+
+static unsigned int
+twofish_encrypt_blk1_16(const void *ctx, byte *out, const byte *in,
+ unsigned int num_blks)
+{
+ return twofish_crypt_blk1_16 (ctx, out, in, num_blks, 1);
+}
+
+static unsigned int
+twofish_decrypt_blk1_16(const void *ctx, byte *out, const byte *in,
+ unsigned int num_blks)
+{
+ return twofish_crypt_blk1_16 (ctx, out, in, num_blks, 0);
+}
+
+
+/* Bulk encryption/decryption of complete blocks in XTS mode. */
+static void
+_gcry_twofish_xts_crypt (void *context, unsigned char *tweak, void *outbuf_arg,
+ const void *inbuf_arg, size_t nblocks, int encrypt)
+{
+ TWOFISH_context *ctx = context;
+ unsigned char *outbuf = outbuf_arg;
+ const unsigned char *inbuf = inbuf_arg;
+ int burn_stack_depth = 0;
+
+ /* Process remaining blocks. */
+ if (nblocks)
+ {
+ unsigned char tmpbuf[16 * 16];
+ unsigned int tmp_used = 16;
+ size_t tmpbufsize = 15 * 16;
+ size_t nburn;
+
+#ifdef USE_AVX2
+ if (ctx->use_avx2)
+ tmpbufsize = 16 * 16;
+#endif
+
+ nburn = bulk_xts_crypt_128(ctx, encrypt ? twofish_encrypt_blk1_16
+ : twofish_decrypt_blk1_16,
+ outbuf, inbuf, nblocks,
+ tweak, tmpbuf, tmpbufsize / 16,
+ &tmp_used);
+ burn_stack_depth = nburn > burn_stack_depth ? nburn : burn_stack_depth;
+
+ wipememory(tmpbuf, tmp_used);
+ }
+
+ if (burn_stack_depth)
+ _gcry_burn_stack(burn_stack_depth);
+}
+
+
+/* Bulk encryption/decryption in ECB mode. */
+static void
+_gcry_twofish_ecb_crypt (void *context, void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks, int encrypt)
+{
+ TWOFISH_context *ctx = context;
+ unsigned char *outbuf = outbuf_arg;
+ const unsigned char *inbuf = inbuf_arg;
+ int burn_stack_depth = 0;
+
+ /* Process remaining blocks. */
+ if (nblocks)
+ {
+ size_t fn_maxblocks = 15;
+ size_t nburn;
+
+#ifdef USE_AVX2
+ if (ctx->use_avx2)
+ fn_maxblocks = 16;
+#endif
+
+ nburn = bulk_ecb_crypt_128(ctx, encrypt ? twofish_encrypt_blk1_16
+ : twofish_decrypt_blk1_16,
+ outbuf, inbuf, nblocks, fn_maxblocks);
+ burn_stack_depth = nburn > burn_stack_depth ? nburn : burn_stack_depth;
+ }
+
+ if (burn_stack_depth)
+ _gcry_burn_stack(burn_stack_depth);
+}
+
+
/* Test a single encryption and decryption with each key size. */