summaryrefslogtreecommitdiff
path: root/cipher
diff options
context:
space:
mode:
authorJussi Kivilinna <jussi.kivilinna@iki.fi>2023-02-22 20:19:07 +0200
committerJussi Kivilinna <jussi.kivilinna@iki.fi>2023-02-22 21:10:30 +0200
commit926cc22058a39c7a931e14590eab6fd7a78ba455 (patch)
tree295bfd3d0e64a7f998facbde897eea854b9bbfd0 /cipher
parent978b02fca682c9ecb71e30cdeeb6922fc8331f6e (diff)
downloadlibgcrypt-926cc22058a39c7a931e14590eab6fd7a78ba455.tar.gz
camellia-aesni-avx: add acceleration for ECB/XTS/CTR32LE modes
* cipher/camellia-aesni-avx-amd64.S (_gcry_camellia_aesni_avx_ecb_enc) (_gcry_camellia_aesni_avx_ecb_dec): New. * cipher/camellia-glue.c (_gcry_camellia_aesni_avx_ecb_enc) (_gcry_camellia_aesni_avx_ecb_dec): New. (camellia_setkey): Always enable XTS/ECB/CTR32LE bulk functions. (camellia_encrypt_blk1_32, camellia_decrypt_blk1_32) [USE_AESNI_AVX]: Add AESNI/AVX code-path. -- Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Diffstat (limited to 'cipher')
-rw-r--r--cipher/camellia-aesni-avx-amd64.S92
-rw-r--r--cipher/camellia-glue.c59
2 files changed, 133 insertions, 18 deletions
diff --git a/cipher/camellia-aesni-avx-amd64.S b/cipher/camellia-aesni-avx-amd64.S
index 1f241e03..93c96791 100644
--- a/cipher/camellia-aesni-avx-amd64.S
+++ b/cipher/camellia-aesni-avx-amd64.S
@@ -1030,6 +1030,98 @@ _gcry_camellia_aesni_avx_ctr_enc:
ELF(.size _gcry_camellia_aesni_avx_ctr_enc,.-_gcry_camellia_aesni_avx_ctr_enc;)
.align 16
+.globl _gcry_camellia_aesni_avx_ecb_enc
+ELF(.type _gcry_camellia_aesni_avx_ecb_enc,@function;)
+
+_gcry_camellia_aesni_avx_ecb_enc:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst (16 blocks)
+ * %rdx: src (16 blocks)
+ */
+ CFI_STARTPROC();
+
+ pushq %rbp;
+ CFI_PUSH(%rbp);
+ movq %rsp, %rbp;
+ CFI_DEF_CFA_REGISTER(%rbp);
+
+ vzeroupper;
+
+ cmpl $128, key_bitlength(CTX);
+ movl $32, %r8d;
+ movl $24, %eax;
+ cmovel %eax, %r8d; /* max */
+
+ inpack16_pre(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
+ %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
+ %xmm15, %rdx, (key_table)(CTX));
+
+ subq $(16 * 16), %rsp;
+ andq $~31, %rsp;
+ movq %rsp, %rax;
+
+ call __camellia_enc_blk16;
+
+ write_output(%xmm7, %xmm6, %xmm5, %xmm4, %xmm3, %xmm2, %xmm1, %xmm0,
+ %xmm15, %xmm14, %xmm13, %xmm12, %xmm11, %xmm10, %xmm9,
+ %xmm8, %rsi);
+
+ vzeroall;
+
+ leave;
+ CFI_LEAVE();
+ ret_spec_stop;
+ CFI_ENDPROC();
+ELF(.size _gcry_camellia_aesni_avx_ecb_enc,.-_gcry_camellia_aesni_avx_ecb_enc;)
+
+.align 16
+.globl _gcry_camellia_aesni_avx_ecb_dec
+ELF(.type _gcry_camellia_aesni_avx_ecb_dec,@function;)
+
+_gcry_camellia_aesni_avx_ecb_dec:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst (16 blocks)
+ * %rdx: src (16 blocks)
+ */
+ CFI_STARTPROC();
+
+ pushq %rbp;
+ CFI_PUSH(%rbp);
+ movq %rsp, %rbp;
+ CFI_DEF_CFA_REGISTER(%rbp);
+
+ vzeroupper;
+
+ cmpl $128, key_bitlength(CTX);
+ movl $32, %r8d;
+ movl $24, %eax;
+ cmovel %eax, %r8d; /* max */
+
+ inpack16_pre(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
+ %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
+ %xmm15, %rdx, (key_table)(CTX, %r8, 8));
+
+ subq $(16 * 16), %rsp;
+ andq $~31, %rsp;
+ movq %rsp, %rax;
+
+ call __camellia_dec_blk16;
+
+ write_output(%xmm7, %xmm6, %xmm5, %xmm4, %xmm3, %xmm2, %xmm1, %xmm0,
+ %xmm15, %xmm14, %xmm13, %xmm12, %xmm11, %xmm10, %xmm9,
+ %xmm8, %rsi);
+
+ vzeroall;
+
+ leave;
+ CFI_LEAVE();
+ ret_spec_stop;
+ CFI_ENDPROC();
+ELF(.size _gcry_camellia_aesni_avx_ecb_dec,.-_gcry_camellia_aesni_avx_ecb_dec;)
+
+.align 16
.globl _gcry_camellia_aesni_avx_cbc_dec
ELF(.type _gcry_camellia_aesni_avx_cbc_dec,@function;)
diff --git a/cipher/camellia-glue.c b/cipher/camellia-glue.c
index 2e00f563..8b4b4b3c 100644
--- a/cipher/camellia-glue.c
+++ b/cipher/camellia-glue.c
@@ -172,15 +172,25 @@ extern void _gcry_camellia_aesni_avx_ocb_dec(CAMELLIA_context *ctx,
const u64 Ls[16]) ASM_FUNC_ABI;
extern void _gcry_camellia_aesni_avx_ocb_auth(CAMELLIA_context *ctx,
- const unsigned char *abuf,
- unsigned char *offset,
- unsigned char *checksum,
- const u64 Ls[16]) ASM_FUNC_ABI;
+ const unsigned char *abuf,
+ unsigned char *offset,
+ unsigned char *checksum,
+ const u64 Ls[16]) ASM_FUNC_ABI;
extern void _gcry_camellia_aesni_avx_keygen(CAMELLIA_context *ctx,
const unsigned char *key,
unsigned int keylen) ASM_FUNC_ABI;
+extern void _gcry_camellia_aesni_avx_ecb_enc(const CAMELLIA_context *ctx,
+ unsigned char *out,
+ const unsigned char *in)
+ ASM_FUNC_ABI;
+
+extern void _gcry_camellia_aesni_avx_ecb_dec(const CAMELLIA_context *ctx,
+ unsigned char *out,
+ const unsigned char *in)
+ ASM_FUNC_ABI;
+
static const int avx_burn_stack_depth = 16 * CAMELLIA_BLOCK_SIZE + 16 +
2 * sizeof(void *) + ASM_EXTRA_STACK;
@@ -473,18 +483,9 @@ camellia_setkey(void *c, const byte *key, unsigned keylen,
bulk_ops->ctr_enc = _gcry_camellia_ctr_enc;
bulk_ops->ocb_crypt = _gcry_camellia_ocb_crypt;
bulk_ops->ocb_auth = _gcry_camellia_ocb_auth;
-#ifdef USE_AESNI_AVX2
- if (ctx->use_aesni_avx2 || ctx->use_vaes_avx2 || ctx->use_gfni_avx2)
- {
- bulk_ops->xts_crypt = _gcry_camellia_xts_crypt;
- bulk_ops->ecb_crypt = _gcry_camellia_ecb_crypt;
- bulk_ops->ctr32le_enc = _gcry_camellia_ctr32le_enc;
- }
-#else
- (void)_gcry_camellia_xts_crypt;
- (void)_gcry_camellia_ecb_crypt;
- (void)_gcry_camellia_ctr32le_enc;
-#endif
+ bulk_ops->xts_crypt = _gcry_camellia_xts_crypt;
+ bulk_ops->ecb_crypt = _gcry_camellia_ecb_crypt;
+ bulk_ops->ctr32le_enc = _gcry_camellia_ctr32le_enc;
if (0)
{ }
@@ -651,10 +652,21 @@ camellia_encrypt_blk1_32 (void *priv, byte *outbuf, const byte *inbuf,
return avx2_burn_stack_depth;
}
#endif
+#ifdef USE_AESNI_AVX
+ while (ctx->use_aesni_avx && num_blks >= 16)
+ {
+ _gcry_camellia_aesni_avx_ecb_enc (ctx, outbuf, inbuf);
+ stack_burn_size = avx_burn_stack_depth;
+ outbuf += CAMELLIA_BLOCK_SIZE * 16;
+ inbuf += CAMELLIA_BLOCK_SIZE * 16;
+ num_blks -= 16;
+ }
+#endif
while (num_blks)
{
- stack_burn_size = camellia_encrypt((void *)ctx, outbuf, inbuf);
+ unsigned int nburn = camellia_encrypt((void *)ctx, outbuf, inbuf);
+ stack_burn_size = nburn > stack_burn_size ? nburn : stack_burn_size;
outbuf += CAMELLIA_BLOCK_SIZE;
inbuf += CAMELLIA_BLOCK_SIZE;
num_blks--;
@@ -731,10 +743,21 @@ camellia_decrypt_blk1_32 (void *priv, byte *outbuf, const byte *inbuf,
return avx2_burn_stack_depth;
}
#endif
+#ifdef USE_AESNI_AVX
+ while (ctx->use_aesni_avx && num_blks >= 16)
+ {
+ _gcry_camellia_aesni_avx_ecb_dec (ctx, outbuf, inbuf);
+ stack_burn_size = avx_burn_stack_depth;
+ outbuf += CAMELLIA_BLOCK_SIZE * 16;
+ inbuf += CAMELLIA_BLOCK_SIZE * 16;
+ num_blks -= 16;
+ }
+#endif
while (num_blks)
{
- stack_burn_size = camellia_decrypt((void *)ctx, outbuf, inbuf);
+ unsigned int nburn = camellia_decrypt((void *)ctx, outbuf, inbuf);
+ stack_burn_size = nburn > stack_burn_size ? nburn : stack_burn_size;
outbuf += CAMELLIA_BLOCK_SIZE;
inbuf += CAMELLIA_BLOCK_SIZE;
num_blks--;