diff options
author | Jussi Kivilinna <jussi.kivilinna@iki.fi> | 2023-02-22 20:19:47 +0200 |
---|---|---|
committer | Jussi Kivilinna <jussi.kivilinna@iki.fi> | 2023-02-22 21:12:58 +0200 |
commit | 5f25ad09fdb5eb5f83f7cc4cefe79bbeab29fec8 (patch) | |
tree | 3d0083f117daab0ec61cde22d0c8036a66f982f6 /cipher | |
parent | 87ae2a660d59751ddd7da40da05cfaee73f35ea7 (diff) | |
download | libgcrypt-5f25ad09fdb5eb5f83f7cc4cefe79bbeab29fec8.tar.gz |
camellia-avx2: add fast path for full 32 block ECB input
* cipher/camellia-aesni-avx2-amd64.h (enc_blk1_32, dec_blk1_32): Add
fast path for 32 block input.
--
Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Diffstat (limited to 'cipher')
-rw-r--r-- | cipher/camellia-aesni-avx2-amd64.h | 41 |
1 files changed, 33 insertions, 8 deletions
diff --git a/cipher/camellia-aesni-avx2-amd64.h b/cipher/camellia-aesni-avx2-amd64.h index 7d451c09..92f0ce5f 100644 --- a/cipher/camellia-aesni-avx2-amd64.h +++ b/cipher/camellia-aesni-avx2-amd64.h @@ -2127,12 +2127,9 @@ FUNC_NAME(enc_blk1_32): cmpl $31, %ecx; vpxor %xmm0, %xmm0, %xmm0; - ja 1f; + ja .Lenc_blk32; jb 2f; vmovdqu 15 * 32(%rdx), %xmm0; - jmp 2f; - 1: - vmovdqu 15 * 32(%rdx), %ymm0; 2: vmovdqu %ymm0, (%rax); @@ -2195,13 +2192,29 @@ FUNC_NAME(enc_blk1_32): STORE_OUTPUT(ymm9, 14); STORE_OUTPUT(ymm8, 15); +.align 8 2: +.Lenc_blk32_done: vzeroall; leave; CFI_LEAVE(); ret_spec_stop; CFI_ENDPROC(); + +.align 8 +.Lenc_blk32: + inpack32_pre(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, + %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, + %ymm15, %rdx, (key_table)(CTX)); + + call FUNC_NAME(enc_blk32); + + write_output(%ymm7, %ymm6, %ymm5, %ymm4, %ymm3, %ymm2, %ymm1, %ymm0, + %ymm15, %ymm14, %ymm13, %ymm12, %ymm11, %ymm10, %ymm9, + %ymm8, %rsi); + jmp .Lenc_blk32_done; + CFI_ENDPROC(); ELF(.size FUNC_NAME(enc_blk1_32),.-FUNC_NAME(enc_blk1_32);) .align 16 @@ -2235,12 +2248,9 @@ FUNC_NAME(dec_blk1_32): cmpl $31, %ecx; vpxor %xmm0, %xmm0, %xmm0; - ja 1f; + ja .Ldec_blk32; jb 2f; vmovdqu 15 * 32(%rdx), %xmm0; - jmp 2f; - 1: - vmovdqu 15 * 32(%rdx), %ymm0; 2: vmovdqu %ymm0, (%rax); @@ -2284,12 +2294,27 @@ FUNC_NAME(dec_blk1_32): STORE_OUTPUT(ymm9, 14); STORE_OUTPUT(ymm8, 15); +.align 8 2: +.Ldec_blk32_done: vzeroall; leave; CFI_LEAVE(); ret_spec_stop; + +.align 8 +.Ldec_blk32: + inpack32_pre(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, + %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, + %ymm15, %rdx, (key_table)(CTX, %r8, 8)); + + call FUNC_NAME(dec_blk32); + + write_output(%ymm7, %ymm6, %ymm5, %ymm4, %ymm3, %ymm2, %ymm1, %ymm0, + %ymm15, %ymm14, %ymm13, %ymm12, %ymm11, %ymm10, %ymm9, + %ymm8, %rsi); + jmp .Ldec_blk32_done; CFI_ENDPROC(); ELF(.size FUNC_NAME(dec_blk1_32),.-FUNC_NAME(dec_blk1_32);) |