diff options
author | Jussi Kivilinna <jussi.kivilinna@iki.fi> | 2023-02-19 18:39:36 +0200 |
---|---|---|
committer | Jussi Kivilinna <jussi.kivilinna@iki.fi> | 2023-02-22 20:27:56 +0200 |
commit | 978b02fca682c9ecb71e30cdeeb6922fc8331f6e (patch) | |
tree | 58e53a100647fe286ecb50994a66f156cffcece4 /cipher/sm4-aesni-avx-amd64.S | |
parent | 8f7f5a9fc63968304bacedbc2f22b9f7188bbd53 (diff) | |
download | libgcrypt-978b02fca682c9ecb71e30cdeeb6922fc8331f6e.tar.gz |
sm4: add CTR-mode byte addition for AVX/AVX2/AVX512 implementations
* cipher/sm4-aesni-avx-amd64.S
(_gcry_sm4_aesni_avx_ctr_enc): Add byte addition fast-path.
* cipher/sm4-aesni-avx2-amd64.S
(_gcry_sm4_aesni_avx2_ctr_enc): Likewise.
* cipher/sm4-gfni-avx2-amd64.S
(_gcry_sm4_gfni_avx2_ctr_enc): Likewise.
* cipher/sm4-gfni-avx512-amd64.S
(_gcry_sm4_gfni_avx512_ctr_enc)
(_gcry_sm4_gfni_avx512_ctr_enc_blk32): Likewise.
--
Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Diffstat (limited to 'cipher/sm4-aesni-avx-amd64.S')
-rw-r--r-- | cipher/sm4-aesni-avx-amd64.S | 68 |
1 files changed, 67 insertions, 1 deletions
diff --git a/cipher/sm4-aesni-avx-amd64.S b/cipher/sm4-aesni-avx-amd64.S index c09b205d..ca9be44a 100644 --- a/cipher/sm4-aesni-avx-amd64.S +++ b/cipher/sm4-aesni-avx-amd64.S @@ -1,6 +1,6 @@ /* sm4-avx-aesni-amd64.S - AES-NI/AVX implementation of SM4 cipher * - * Copyright (C) 2020 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * Copyright (C) 2020,2023 Jussi Kivilinna <jussi.kivilinna@iki.fi> * * This file is part of Libgcrypt. * @@ -150,6 +150,38 @@ _sm4_aesni_avx_consts: .Lbswap32_mask: .byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 +/* CTR byte addition constants */ +.Lbige_addb_1: + .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 +.Lbige_addb_2: + .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2 +.Lbige_addb_3: + .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3 +.Lbige_addb_4: + .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4 +.Lbige_addb_5: + .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5 +.Lbige_addb_6: + .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6 +.Lbige_addb_7: + .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7 +.Lbige_addb_8: + .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8 +.Lbige_addb_9: + .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9 +.Lbige_addb_10: + .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10 +.Lbige_addb_11: + .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11 +.Lbige_addb_12: + .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12 +.Lbige_addb_13: + .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 13 +.Lbige_addb_14: + .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14 +.Lbige_addb_15: + .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15 + .align 4 /* 4-bit mask */ .L0f0f0f0f: @@ -529,6 +561,9 @@ _gcry_sm4_aesni_avx_ctr_enc: */ CFI_STARTPROC(); + cmpb $(0x100 - 8), 15(%rcx); + jbe .Lctr_byteadd; + /* load IV and byteswap */ vmovdqu (%rcx), RA0; @@ -565,6 +600,8 @@ _gcry_sm4_aesni_avx_ctr_enc: /* store new IV */ vmovdqu RTMP1, (%rcx); +.align 8 +.Lload_ctr_done: call __sm4_crypt_blk8; vpxor (0 * 16)(%rdx), RA0, RA0; @@ -588,6 +625,35 @@ _gcry_sm4_aesni_avx_ctr_enc: vzeroall; ret_spec_stop; + .align 8 + +.Lctr_byteadd_full_ctr_carry: + movq 8(%rcx), %r11; + movq (%rcx), %r10; + bswapq %r11; + bswapq %r10; + addq $8, %r11; + adcq $0, %r10; + bswapq %r11; + bswapq %r10; + movq %r11, 8(%rcx); + movq %r10, (%rcx); + jmp .Lctr_byteadd_xmm; +.align 8 +.Lctr_byteadd: + vmovdqu (%rcx), RA0; + je .Lctr_byteadd_full_ctr_carry; + addb $8, 15(%rcx); +.Lctr_byteadd_xmm: + vpaddb .Lbige_addb_1 rRIP, RA0, RA1; + vpaddb .Lbige_addb_2 rRIP, RA0, RA2; + vpaddb .Lbige_addb_3 rRIP, RA0, RA3; + vpaddb .Lbige_addb_4 rRIP, RA0, RB0; + vpaddb .Lbige_addb_5 rRIP, RA0, RB1; + vpaddb .Lbige_addb_6 rRIP, RA0, RB2; + vpaddb .Lbige_addb_7 rRIP, RA0, RB3; + + jmp .Lload_ctr_done; CFI_ENDPROC(); ELF(.size _gcry_sm4_aesni_avx_ctr_enc,.-_gcry_sm4_aesni_avx_ctr_enc;) |