diff options
-rw-r--r-- | cipher/twofish-amd64.S | 74 | ||||
-rw-r--r-- | cipher/twofish-avx2-amd64.S | 46 | ||||
-rw-r--r-- | cipher/twofish.c | 147 |
3 files changed, 264 insertions, 3 deletions
diff --git a/cipher/twofish-amd64.S b/cipher/twofish-amd64.S index a7a60553..8998d296 100644 --- a/cipher/twofish-amd64.S +++ b/cipher/twofish-amd64.S @@ -545,6 +545,80 @@ __twofish_dec_blk3: ELF(.size __twofish_dec_blk3,.-__twofish_dec_blk3;) .align 8 +.globl _gcry_twofish_amd64_blk3 +ELF(.type _gcry_twofish_amd64_blk3,@function;) +_gcry_twofish_amd64_blk3: + /* input: + * %rdi: ctx, CTX + * %rsi: dst (3 blocks) + * %rdx: src (3 blocks) + * %ecx: encrypt (0 or 1) + */ + CFI_STARTPROC(); + ENTER_SYSV_FUNC_PARAMS_0_4 + + subq $(8 * 8), %rsp; + CFI_ADJUST_CFA_OFFSET(8 * 8); + movq %rbp, (0 * 8)(%rsp); + movq %rbx, (1 * 8)(%rsp); + movq %r12, (2 * 8)(%rsp); + movq %r13, (3 * 8)(%rsp); + movq %r14, (4 * 8)(%rsp); + movq %r15, (5 * 8)(%rsp); + CFI_REL_OFFSET(%rbp, 0 * 8); + CFI_REL_OFFSET(%rbx, 1 * 8); + CFI_REL_OFFSET(%r12, 2 * 8); + CFI_REL_OFFSET(%r13, 3 * 8); + CFI_REL_OFFSET(%r14, 4 * 8); + CFI_REL_OFFSET(%r15, 5 * 8); + + testl %ecx, %ecx; + movq %rdx, RX0; + movq %rsi, (6 * 8)(%rsp); + + movq (0 * 8)(RX0), RAB0; + movq (1 * 8)(RX0), RCD0; + movq (2 * 8)(RX0), RAB1; + movq (3 * 8)(RX0), RCD1; + movq (4 * 8)(RX0), RAB2; + movq (5 * 8)(RX0), RCD2; + + jz .Lblk1_3_dec; + call __twofish_enc_blk3; + jmp .Lblk1_3_end; + .Lblk1_3_dec: + call __twofish_dec_blk3; + +.Lblk1_3_end: + movq (6 * 8)(%rsp), RX0; + movq RCD0, (0 * 8)(RX0); + movq RAB0, (1 * 8)(RX0); + movq RCD1, (2 * 8)(RX0); + movq RAB1, (3 * 8)(RX0); + movq RCD2, (4 * 8)(RX0); + movq RAB2, (5 * 8)(RX0); + + movq (0 * 8)(%rsp), %rbp; + movq (1 * 8)(%rsp), %rbx; + movq (2 * 8)(%rsp), %r12; + movq (3 * 8)(%rsp), %r13; + movq (4 * 8)(%rsp), %r14; + movq (5 * 8)(%rsp), %r15; + CFI_RESTORE(%rbp); + CFI_RESTORE(%rbx); + CFI_RESTORE(%r12); + CFI_RESTORE(%r13); + CFI_RESTORE(%r14); + CFI_RESTORE(%r15); + addq $(8 * 8), %rsp; + CFI_ADJUST_CFA_OFFSET(-8 * 8); + + EXIT_SYSV_FUNC + ret_spec_stop; + CFI_ENDPROC(); +ELF(.size _gcry_twofish_amd64_blk3,.-_gcry_twofish_amd64_blk3;) + +.align 8 .globl _gcry_twofish_amd64_ctr_enc ELF(.type _gcry_twofish_amd64_ctr_enc,@function;) _gcry_twofish_amd64_ctr_enc: diff --git a/cipher/twofish-avx2-amd64.S b/cipher/twofish-avx2-amd64.S index 930ac792..0cb9a64c 100644 --- a/cipher/twofish-avx2-amd64.S +++ b/cipher/twofish-avx2-amd64.S @@ -468,6 +468,52 @@ __twofish_dec_blk16: CFI_ENDPROC(); ELF(.size __twofish_dec_blk16,.-__twofish_dec_blk16;) +.align 8 +.globl _gcry_twofish_avx2_blk16 +ELF(.type _gcry_twofish_avx2_blk16,@function;) +_gcry_twofish_avx2_blk16: + /* input: + * %rdi: ctx, CTX + * %rsi: dst (16 blocks) + * %rdx: src (16 blocks) + * %ecx: encrypt + */ + CFI_STARTPROC(); + + vzeroupper; + + vmovdqu (0 * 32)(%rdx), RA0; + vmovdqu (1 * 32)(%rdx), RB0; + vmovdqu (2 * 32)(%rdx), RC0; + vmovdqu (3 * 32)(%rdx), RD0; + vmovdqu (4 * 32)(%rdx), RA1; + vmovdqu (5 * 32)(%rdx), RB1; + vmovdqu (6 * 32)(%rdx), RC1; + vmovdqu (7 * 32)(%rdx), RD1; + + testl %ecx, %ecx; + jz .Lblk16_dec; + call __twofish_enc_blk16; + jmp .Lblk16_end; + .Lblk16_dec: + call __twofish_dec_blk16; + +.Lblk16_end: + vmovdqu RA0, (0 * 32)(%rsi); + vmovdqu RB0, (1 * 32)(%rsi); + vmovdqu RC0, (2 * 32)(%rsi); + vmovdqu RD0, (3 * 32)(%rsi); + vmovdqu RA1, (4 * 32)(%rsi); + vmovdqu RB1, (5 * 32)(%rsi); + vmovdqu RC1, (6 * 32)(%rsi); + vmovdqu RD1, (7 * 32)(%rsi); + + vzeroall; + + ret_spec_stop; + CFI_ENDPROC(); +ELF(.size _gcry_twofish_avx2_blk16,.-_gcry_twofish_avx2_blk16;) + #define inc_le128(x, minus_one, tmp) \ vpcmpeqq minus_one, x, tmp; \ vpsubq minus_one, x, x; \ diff --git a/cipher/twofish.c b/cipher/twofish.c index b300715b..92c463fc 100644 --- a/cipher/twofish.c +++ b/cipher/twofish.c @@ -101,7 +101,12 @@ static size_t _gcry_twofish_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, int encrypt); static size_t _gcry_twofish_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, size_t nblocks); - +static void _gcry_twofish_xts_crypt (void *context, unsigned char *tweak, + void *outbuf_arg, const void *inbuf_arg, + size_t nblocks, int encrypt); +static void _gcry_twofish_ecb_crypt (void *context, void *outbuf_arg, + const void *inbuf_arg, size_t nblocks, + int encrypt); /* Structure for an expanded Twofish key. s contains the key-dependent * S-boxes composed with the MDS matrix; w contains the eight "whitening" @@ -775,7 +780,9 @@ twofish_setkey (void *context, const byte *key, unsigned int keylen, bulk_ops->cfb_dec = _gcry_twofish_cfb_dec; bulk_ops->ctr_enc = _gcry_twofish_ctr_enc; bulk_ops->ocb_crypt = _gcry_twofish_ocb_crypt; - bulk_ops->ocb_auth = _gcry_twofish_ocb_auth; + bulk_ops->ocb_auth = _gcry_twofish_ocb_auth; + bulk_ops->xts_crypt = _gcry_twofish_xts_crypt; + bulk_ops->ecb_crypt = _gcry_twofish_ecb_crypt; (void)hwfeatures; @@ -788,6 +795,9 @@ twofish_setkey (void *context, const byte *key, unsigned int keylen, /* Assembler implementations of Twofish using AVX2. Process 16 block in parallel. */ +extern void _gcry_twofish_avx2_blk16 (const TWOFISH_context *c, byte *out, + const byte *in, int encrypt) ASM_FUNC_ABI; + extern void _gcry_twofish_avx2_ctr_enc(const TWOFISH_context *ctx, unsigned char *out, const unsigned char *in, @@ -835,6 +845,9 @@ extern void _gcry_twofish_amd64_decrypt_block(const TWOFISH_context *c, byte *out, const byte *in); /* These assembly implementations process three blocks in parallel. */ +extern void _gcry_twofish_amd64_blk3(const TWOFISH_context *c, byte *out, + const byte *in, int encrypt); + extern void _gcry_twofish_amd64_ctr_enc(const TWOFISH_context *c, byte *out, const byte *in, byte *ctr); @@ -1501,7 +1514,7 @@ _gcry_twofish_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, blkn += 3; twofish_amd64_ocb_auth(ctx, abuf, c->u_mode.ocb.aad_offset, - c->u_mode.ocb.aad_sum, Ls); + c->u_mode.ocb.aad_sum, Ls); nblocks -= 3; abuf += 3 * TWOFISH_BLOCKSIZE; @@ -1527,6 +1540,134 @@ _gcry_twofish_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, } +static unsigned int +twofish_crypt_blk1_16(const void *context, byte *out, const byte *in, + unsigned int num_blks, int encrypt) +{ + const TWOFISH_context *ctx = context; + unsigned int burn, burn_stack_depth = 0; + +#ifdef USE_AVX2 + if (num_blks == 16 && ctx->use_avx2) + { + _gcry_twofish_avx2_blk16 (ctx, out, in, encrypt); + return 0; + } +#endif + +#ifdef USE_AMD64_ASM + while (num_blks >= 3) + { + _gcry_twofish_amd64_blk3 (ctx, out, in, encrypt); + burn = 8 * sizeof(void *); + burn_stack_depth = (burn > burn_stack_depth) ? burn : burn_stack_depth; + out += 3 * TWOFISH_BLOCKSIZE; + in += 3 * TWOFISH_BLOCKSIZE; + num_blks -= 3; + } +#endif + + while (num_blks >= 1) + { + if (encrypt) + burn = twofish_encrypt((void *)ctx, out, in); + else + burn = twofish_decrypt((void *)ctx, out, in); + + burn_stack_depth = (burn > burn_stack_depth) ? burn : burn_stack_depth; + out += TWOFISH_BLOCKSIZE; + in += TWOFISH_BLOCKSIZE; + num_blks--; + } + + return burn_stack_depth; +} + +static unsigned int +twofish_encrypt_blk1_16(const void *ctx, byte *out, const byte *in, + unsigned int num_blks) +{ + return twofish_crypt_blk1_16 (ctx, out, in, num_blks, 1); +} + +static unsigned int +twofish_decrypt_blk1_16(const void *ctx, byte *out, const byte *in, + unsigned int num_blks) +{ + return twofish_crypt_blk1_16 (ctx, out, in, num_blks, 0); +} + + +/* Bulk encryption/decryption of complete blocks in XTS mode. */ +static void +_gcry_twofish_xts_crypt (void *context, unsigned char *tweak, void *outbuf_arg, + const void *inbuf_arg, size_t nblocks, int encrypt) +{ + TWOFISH_context *ctx = context; + unsigned char *outbuf = outbuf_arg; + const unsigned char *inbuf = inbuf_arg; + int burn_stack_depth = 0; + + /* Process remaining blocks. */ + if (nblocks) + { + unsigned char tmpbuf[16 * 16]; + unsigned int tmp_used = 16; + size_t tmpbufsize = 15 * 16; + size_t nburn; + +#ifdef USE_AVX2 + if (ctx->use_avx2) + tmpbufsize = 16 * 16; +#endif + + nburn = bulk_xts_crypt_128(ctx, encrypt ? twofish_encrypt_blk1_16 + : twofish_decrypt_blk1_16, + outbuf, inbuf, nblocks, + tweak, tmpbuf, tmpbufsize / 16, + &tmp_used); + burn_stack_depth = nburn > burn_stack_depth ? nburn : burn_stack_depth; + + wipememory(tmpbuf, tmp_used); + } + + if (burn_stack_depth) + _gcry_burn_stack(burn_stack_depth); +} + + +/* Bulk encryption/decryption in ECB mode. */ +static void +_gcry_twofish_ecb_crypt (void *context, void *outbuf_arg, const void *inbuf_arg, + size_t nblocks, int encrypt) +{ + TWOFISH_context *ctx = context; + unsigned char *outbuf = outbuf_arg; + const unsigned char *inbuf = inbuf_arg; + int burn_stack_depth = 0; + + /* Process remaining blocks. */ + if (nblocks) + { + size_t fn_maxblocks = 15; + size_t nburn; + +#ifdef USE_AVX2 + if (ctx->use_avx2) + fn_maxblocks = 16; +#endif + + nburn = bulk_ecb_crypt_128(ctx, encrypt ? twofish_encrypt_blk1_16 + : twofish_decrypt_blk1_16, + outbuf, inbuf, nblocks, fn_maxblocks); + burn_stack_depth = nburn > burn_stack_depth ? nburn : burn_stack_depth; + } + + if (burn_stack_depth) + _gcry_burn_stack(burn_stack_depth); +} + + /* Test a single encryption and decryption with each key size. */ |