summaryrefslogtreecommitdiff
path: root/cipher/sha512-avx512-amd64.S
diff options
context:
space:
mode:
authorJussi Kivilinna <jussi.kivilinna@iki.fi>2023-01-16 19:24:33 +0200
committerJussi Kivilinna <jussi.kivilinna@iki.fi>2023-01-17 18:30:23 +0200
commit7de2fb66e065a97f121bd16ab37efba32983a6bd (patch)
treebc3bce8cdb71c4946e5b12326dd9a379c8863671 /cipher/sha512-avx512-amd64.S
parent5e1a04f77933a8295df69d818e9effc076dc68cd (diff)
downloadlibgcrypt-7de2fb66e065a97f121bd16ab37efba32983a6bd.tar.gz
avx512: tweak zmm16-zmm31 register clearing
* cipher/asm-common-amd64.h (spec_stop_avx512): Clear ymm16 before and after vpopcntb. * cipher/camellia-gfni-avx512-amd64.S (clear_zmm16_zmm31): Clear YMM16-YMM31 registers instead of XMM16-XMM31. * cipher/chacha20-amd64-avx512.S (clear_zmm16_zmm31): Likewise. * cipher/keccak-amd64-avx512.S (clear_regs): Likewise. (clear_avx512_4regs): Clear all 4 registers with XOR. * cipher/cipher-gcm-intel-pclmul.c (_gcry_ghash_intel_pclmul) (_gcry_polyval_intel_pclmul): Clear YMM16-YMM19 registers instead of ZMM16-ZMM19. * cipher/poly1305-amd64-avx512.S (POLY1305_BLOCKS): Clear YMM16-YMM31 registers after vector processing instead of XMM16-XMM31. * cipher/sha512-avx512-amd64.S (_gcry_sha512_transform_amd64_avx512): Likewise. -- Clear zmm16-zmm31 registers with 256bit XOR instead of 128bit as this is better for AMD Zen4. Also clear xmm16 register after vpopcnt in avx512 spec-stop so we do not leave any zmm register state which might end up unnecessarily using CPU resources. Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Diffstat (limited to 'cipher/sha512-avx512-amd64.S')
-rw-r--r--cipher/sha512-avx512-amd64.S14
1 files changed, 7 insertions, 7 deletions
diff --git a/cipher/sha512-avx512-amd64.S b/cipher/sha512-avx512-amd64.S
index 65475422..431fb3e9 100644
--- a/cipher/sha512-avx512-amd64.S
+++ b/cipher/sha512-avx512-amd64.S
@@ -384,13 +384,13 @@ _gcry_sha512_transform_amd64_avx512:
vmovdqa [rsp + frame_XFER + 1*32], ymm0 /* burn stack */
vmovdqa [rsp + frame_XFER + 2*32], ymm0 /* burn stack */
vmovdqa [rsp + frame_XFER + 3*32], ymm0 /* burn stack */
- clear_reg(xmm16);
- clear_reg(xmm17);
- clear_reg(xmm18);
- clear_reg(xmm19);
- clear_reg(xmm20);
- clear_reg(xmm21);
- clear_reg(xmm22);
+ clear_reg(ymm16);
+ clear_reg(ymm17);
+ clear_reg(ymm18);
+ clear_reg(ymm19);
+ clear_reg(ymm20);
+ clear_reg(ymm21);
+ clear_reg(ymm22);
/* Restore Stack Pointer */
mov rsp, RSP_SAVE