diff options
author | Jussi Kivilinna <jussi.kivilinna@iki.fi> | 2022-07-21 11:24:13 +0300 |
---|---|---|
committer | Jussi Kivilinna <jussi.kivilinna@iki.fi> | 2022-07-25 16:11:09 +0300 |
commit | e51d3b8330a1d4b15e3484df90646e075c02f54b (patch) | |
tree | 6c5920be2a32528688f99e381661c16b24ee3d20 /configure.ac | |
parent | 909daa700e4b45d75469df298ee564b8fc2f4b72 (diff) | |
download | libgcrypt-e51d3b8330a1d4b15e3484df90646e075c02f54b.tar.gz |
sha512: add AArch64 crypto/SHA512 extension implementation
* cipher/Makefile.am: Add 'sha512-armv8-aarch64-ce.S'.
* cipher/sha512-armv8-aarch64-ce.S: New.
* cipher/sha512.c (ATTR_ALIGNED_64, USE_ARM64_SHA512): New.
(k): Make array aligned to 64 bytes.
[USE_ARM64_SHA512] (_gcry_sha512_transform_armv8_ce): New.
[USE_ARM64_SHA512] (do_sha512_transform_armv8_ce): New.
(sha512_init_common) [USE_ARM64_SHA512]: Use ARMv8-SHA512 accelerated
implementation if HW feature available.
* configure.ac: Add 'sha512-armv8-aarch64-ce.lo'.
(gcry_cv_gcc_inline_asm_aarch64_sha3_sha512_sm3_sm4)
(HAVE_GCC_INLINE_ASM_AARCH64_SHA3_SHA512_SM3_SM4): New.
--
Benchmark on AWS Graviton3:
Before:
| nanosecs/byte mebibytes/sec cycles/byte auto Mhz
SHA512 | 2.36 ns/B 404.2 MiB/s 6.13 c/B 2600
After (2.4x faster):
| nanosecs/byte mebibytes/sec cycles/byte auto Mhz
SHA512 | 0.977 ns/B 976.6 MiB/s 2.54 c/B 2600
Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Diffstat (limited to 'configure.ac')
-rw-r--r-- | configure.ac | 54 |
1 files changed, 54 insertions, 0 deletions
diff --git a/configure.ac b/configure.ac index 4921d73c..c8f24dcc 100644 --- a/configure.ac +++ b/configure.ac @@ -2055,6 +2055,56 @@ fi # +# Check whether GCC inline assembler supports AArch64 SHA3/SHA512/SM3/SM4 instructions +# +AC_CACHE_CHECK([whether GCC inline assembler supports AArch64 SHA3/SHA512/SM3/SM4 instructions], + [gcry_cv_gcc_inline_asm_aarch64_sha3_sha512_sm3_sm4], + [if test "$mpi_cpu_arch" != "aarch64" || + test "$try_asm_modules" != "yes" ; then + gcry_cv_gcc_inline_asm_aarch64_sha3_sha512_sm3_sm4="n/a" + else + gcry_cv_gcc_inline_asm_aarch64_sha3_sha512_sm3_sm4=no + AC_LINK_IFELSE([AC_LANG_PROGRAM( + [[__asm__( + ".arch armv8.2-a+sha3+sm4\n\t" + ".text\n\t" + "testfn:\n\t" + + /* Test for SHA512 instructions */ + "sha512h q0, q0, v0.2d;\n\t" + "sha512h2 q0, q0, v0.2d;\n\t" + "sha512su0 v0.2d, v0.2d;\n\t" + "sha512su1 v0.2d, v0.2d, v31.2d;\n\t" + + /* Test for SHA3 instructions */ + "bcax v0.16b, v1.16b, v2.16b, v3.16b;\n\t" + "eor3 v0.16b, v1.16b, v2.16b, v3.16b;\n\t" + "rax1 v0.2d, v1.2d, v2.2d;\n\t" + "xar v0.2d, v1.2d, v2.2d, \#1;\n\t" + + /* Test for SM3 instructions */ + "sm3partw1 v0.4s, v1.4s, v2.4s;\n\t" + "sm3partw2 v0.4s, v1.4s, v2.4s;\n\t" + "sm3ss1 v0.4s, v1.4s, v2.4s, v3.4s;\n\t" + "sm3tt1a v0.4s, v1.4s, v2.s[0];\n\t" + "sm3tt1b v0.4s, v1.4s, v2.s[0];\n\t" + "sm3tt2a v0.4s, v1.4s, v2.s[0];\n\t" + "sm3tt2b v0.4s, v1.4s, v2.s[0];\n\t" + + /* Test for SM4 instructions */ + "sm4e v0.4s, v1.4s;\n\t" + "sm4ekey v0.4s, v1.4s, v2.4s;\n\t" + ); + ]], [ testfn(); ])], + [gcry_cv_gcc_inline_asm_aarch64_sha3_sha512_sm3_sm4=yes]) + fi]) +if test "$gcry_cv_gcc_inline_asm_aarch64_sha3_sha512_sm3_sm4" = "yes" ; then + AC_DEFINE(HAVE_GCC_INLINE_ASM_AARCH64_SHA3_SHA512_SM3_SM4,1, + [Defined if inline assembler supports AArch64 SHA3/SHA512/SM3/SM4 instructions]) +fi + + +# # Check whether PowerPC AltiVec/VSX intrinsics # AC_CACHE_CHECK([whether compiler supports PowerPC AltiVec/VSX/crypto intrinsics], @@ -3124,6 +3174,10 @@ if test "$found" = "1" ; then # Build with the assembly implementation GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha512-arm.lo" ;; + aarch64-*-*) + # Build with the assembly implementation + GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha512-armv8-aarch64-ce.lo" + ;; powerpc64le-*-*) # Build with the crypto extension implementation GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha512-ppc.lo" |