diff options
author | Jussi Kivilinna <jussi.kivilinna@iki.fi> | 2023-02-22 20:19:07 +0200 |
---|---|---|
committer | Jussi Kivilinna <jussi.kivilinna@iki.fi> | 2023-02-22 21:10:30 +0200 |
commit | 926cc22058a39c7a931e14590eab6fd7a78ba455 (patch) | |
tree | 295bfd3d0e64a7f998facbde897eea854b9bbfd0 /cipher | |
parent | 978b02fca682c9ecb71e30cdeeb6922fc8331f6e (diff) | |
download | libgcrypt-926cc22058a39c7a931e14590eab6fd7a78ba455.tar.gz |
camellia-aesni-avx: add acceleration for ECB/XTS/CTR32LE modes
* cipher/camellia-aesni-avx-amd64.S (_gcry_camellia_aesni_avx_ecb_enc)
(_gcry_camellia_aesni_avx_ecb_dec): New.
* cipher/camellia-glue.c (_gcry_camellia_aesni_avx_ecb_enc)
(_gcry_camellia_aesni_avx_ecb_dec): New.
(camellia_setkey): Always enable XTS/ECB/CTR32LE bulk functions.
(camellia_encrypt_blk1_32, camellia_decrypt_blk1_32)
[USE_AESNI_AVX]: Add AESNI/AVX code-path.
--
Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Diffstat (limited to 'cipher')
-rw-r--r-- | cipher/camellia-aesni-avx-amd64.S | 92 | ||||
-rw-r--r-- | cipher/camellia-glue.c | 59 |
2 files changed, 133 insertions, 18 deletions
diff --git a/cipher/camellia-aesni-avx-amd64.S b/cipher/camellia-aesni-avx-amd64.S index 1f241e03..93c96791 100644 --- a/cipher/camellia-aesni-avx-amd64.S +++ b/cipher/camellia-aesni-avx-amd64.S @@ -1030,6 +1030,98 @@ _gcry_camellia_aesni_avx_ctr_enc: ELF(.size _gcry_camellia_aesni_avx_ctr_enc,.-_gcry_camellia_aesni_avx_ctr_enc;) .align 16 +.globl _gcry_camellia_aesni_avx_ecb_enc +ELF(.type _gcry_camellia_aesni_avx_ecb_enc,@function;) + +_gcry_camellia_aesni_avx_ecb_enc: + /* input: + * %rdi: ctx, CTX + * %rsi: dst (16 blocks) + * %rdx: src (16 blocks) + */ + CFI_STARTPROC(); + + pushq %rbp; + CFI_PUSH(%rbp); + movq %rsp, %rbp; + CFI_DEF_CFA_REGISTER(%rbp); + + vzeroupper; + + cmpl $128, key_bitlength(CTX); + movl $32, %r8d; + movl $24, %eax; + cmovel %eax, %r8d; /* max */ + + inpack16_pre(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, %rdx, (key_table)(CTX)); + + subq $(16 * 16), %rsp; + andq $~31, %rsp; + movq %rsp, %rax; + + call __camellia_enc_blk16; + + write_output(%xmm7, %xmm6, %xmm5, %xmm4, %xmm3, %xmm2, %xmm1, %xmm0, + %xmm15, %xmm14, %xmm13, %xmm12, %xmm11, %xmm10, %xmm9, + %xmm8, %rsi); + + vzeroall; + + leave; + CFI_LEAVE(); + ret_spec_stop; + CFI_ENDPROC(); +ELF(.size _gcry_camellia_aesni_avx_ecb_enc,.-_gcry_camellia_aesni_avx_ecb_enc;) + +.align 16 +.globl _gcry_camellia_aesni_avx_ecb_dec +ELF(.type _gcry_camellia_aesni_avx_ecb_dec,@function;) + +_gcry_camellia_aesni_avx_ecb_dec: + /* input: + * %rdi: ctx, CTX + * %rsi: dst (16 blocks) + * %rdx: src (16 blocks) + */ + CFI_STARTPROC(); + + pushq %rbp; + CFI_PUSH(%rbp); + movq %rsp, %rbp; + CFI_DEF_CFA_REGISTER(%rbp); + + vzeroupper; + + cmpl $128, key_bitlength(CTX); + movl $32, %r8d; + movl $24, %eax; + cmovel %eax, %r8d; /* max */ + + inpack16_pre(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, %rdx, (key_table)(CTX, %r8, 8)); + + subq $(16 * 16), %rsp; + andq $~31, %rsp; + movq %rsp, %rax; + + call __camellia_dec_blk16; + + write_output(%xmm7, %xmm6, %xmm5, %xmm4, %xmm3, %xmm2, %xmm1, %xmm0, + %xmm15, %xmm14, %xmm13, %xmm12, %xmm11, %xmm10, %xmm9, + %xmm8, %rsi); + + vzeroall; + + leave; + CFI_LEAVE(); + ret_spec_stop; + CFI_ENDPROC(); +ELF(.size _gcry_camellia_aesni_avx_ecb_dec,.-_gcry_camellia_aesni_avx_ecb_dec;) + +.align 16 .globl _gcry_camellia_aesni_avx_cbc_dec ELF(.type _gcry_camellia_aesni_avx_cbc_dec,@function;) diff --git a/cipher/camellia-glue.c b/cipher/camellia-glue.c index 2e00f563..8b4b4b3c 100644 --- a/cipher/camellia-glue.c +++ b/cipher/camellia-glue.c @@ -172,15 +172,25 @@ extern void _gcry_camellia_aesni_avx_ocb_dec(CAMELLIA_context *ctx, const u64 Ls[16]) ASM_FUNC_ABI; extern void _gcry_camellia_aesni_avx_ocb_auth(CAMELLIA_context *ctx, - const unsigned char *abuf, - unsigned char *offset, - unsigned char *checksum, - const u64 Ls[16]) ASM_FUNC_ABI; + const unsigned char *abuf, + unsigned char *offset, + unsigned char *checksum, + const u64 Ls[16]) ASM_FUNC_ABI; extern void _gcry_camellia_aesni_avx_keygen(CAMELLIA_context *ctx, const unsigned char *key, unsigned int keylen) ASM_FUNC_ABI; +extern void _gcry_camellia_aesni_avx_ecb_enc(const CAMELLIA_context *ctx, + unsigned char *out, + const unsigned char *in) + ASM_FUNC_ABI; + +extern void _gcry_camellia_aesni_avx_ecb_dec(const CAMELLIA_context *ctx, + unsigned char *out, + const unsigned char *in) + ASM_FUNC_ABI; + static const int avx_burn_stack_depth = 16 * CAMELLIA_BLOCK_SIZE + 16 + 2 * sizeof(void *) + ASM_EXTRA_STACK; @@ -473,18 +483,9 @@ camellia_setkey(void *c, const byte *key, unsigned keylen, bulk_ops->ctr_enc = _gcry_camellia_ctr_enc; bulk_ops->ocb_crypt = _gcry_camellia_ocb_crypt; bulk_ops->ocb_auth = _gcry_camellia_ocb_auth; -#ifdef USE_AESNI_AVX2 - if (ctx->use_aesni_avx2 || ctx->use_vaes_avx2 || ctx->use_gfni_avx2) - { - bulk_ops->xts_crypt = _gcry_camellia_xts_crypt; - bulk_ops->ecb_crypt = _gcry_camellia_ecb_crypt; - bulk_ops->ctr32le_enc = _gcry_camellia_ctr32le_enc; - } -#else - (void)_gcry_camellia_xts_crypt; - (void)_gcry_camellia_ecb_crypt; - (void)_gcry_camellia_ctr32le_enc; -#endif + bulk_ops->xts_crypt = _gcry_camellia_xts_crypt; + bulk_ops->ecb_crypt = _gcry_camellia_ecb_crypt; + bulk_ops->ctr32le_enc = _gcry_camellia_ctr32le_enc; if (0) { } @@ -651,10 +652,21 @@ camellia_encrypt_blk1_32 (void *priv, byte *outbuf, const byte *inbuf, return avx2_burn_stack_depth; } #endif +#ifdef USE_AESNI_AVX + while (ctx->use_aesni_avx && num_blks >= 16) + { + _gcry_camellia_aesni_avx_ecb_enc (ctx, outbuf, inbuf); + stack_burn_size = avx_burn_stack_depth; + outbuf += CAMELLIA_BLOCK_SIZE * 16; + inbuf += CAMELLIA_BLOCK_SIZE * 16; + num_blks -= 16; + } +#endif while (num_blks) { - stack_burn_size = camellia_encrypt((void *)ctx, outbuf, inbuf); + unsigned int nburn = camellia_encrypt((void *)ctx, outbuf, inbuf); + stack_burn_size = nburn > stack_burn_size ? nburn : stack_burn_size; outbuf += CAMELLIA_BLOCK_SIZE; inbuf += CAMELLIA_BLOCK_SIZE; num_blks--; @@ -731,10 +743,21 @@ camellia_decrypt_blk1_32 (void *priv, byte *outbuf, const byte *inbuf, return avx2_burn_stack_depth; } #endif +#ifdef USE_AESNI_AVX + while (ctx->use_aesni_avx && num_blks >= 16) + { + _gcry_camellia_aesni_avx_ecb_dec (ctx, outbuf, inbuf); + stack_burn_size = avx_burn_stack_depth; + outbuf += CAMELLIA_BLOCK_SIZE * 16; + inbuf += CAMELLIA_BLOCK_SIZE * 16; + num_blks -= 16; + } +#endif while (num_blks) { - stack_burn_size = camellia_decrypt((void *)ctx, outbuf, inbuf); + unsigned int nburn = camellia_decrypt((void *)ctx, outbuf, inbuf); + stack_burn_size = nburn > stack_burn_size ? nburn : stack_burn_size; outbuf += CAMELLIA_BLOCK_SIZE; inbuf += CAMELLIA_BLOCK_SIZE; num_blks--; |