summaryrefslogtreecommitdiff
path: root/cipher/camellia-vaes-avx2-amd64.S
diff options
context:
space:
mode:
authorJussi Kivilinna <jussi.kivilinna@iki.fi>2021-01-11 00:56:47 +0200
committerJussi Kivilinna <jussi.kivilinna@iki.fi>2021-02-28 12:34:06 +0200
commit0e7e60241a0d054eae7a98116636a831ec6ccc97 (patch)
tree07d16dec8501dc730efd4b816fe0ad7b29996d87 /cipher/camellia-vaes-avx2-amd64.S
parenteb404d8904532f6dca82421c952be286a1f4e11c (diff)
downloadlibgcrypt-0e7e60241a0d054eae7a98116636a831ec6ccc97.tar.gz
camellia: add x86_64 VAES/AVX2 accelerated implementation
* cipher/Makefile.am: Add 'camellia-aesni-avx2-amd64.h' and 'camellia-vaes-avx2-amd64.S'. * cipher/camellia-aesni-avx2-amd64.S: New, old content moved to... * cipher/camellia-aesni-avx2-amd64.h: ...here. (IF_AESNI, IF_VAES, FUNC_NAME): New. * cipher/camellia-vaes-avx2-amd64.S: New. * cipher/camellia-glue.c (USE_VAES_AVX2): New. (CAMELLIA_context): New member 'use_vaes_avx2'. (_gcry_camellia_vaes_avx2_ctr_enc, _gcry_camellia_vaes_avx2_cbc_dec) (_gcry_camellia_vaes_avx2_cfb_dec, _gcry_camellia_vaes_avx2_ocb_enc) (_gcry_camellia_vaes_avx2_ocb_dec) (_gcry_camellia_vaes_avx2_ocb_auth): New. (camellia_setkey): Check for HWF_INTEL_VAES. (_gcry_camellia_ctr_enc, _gcry_camellia_cbc_dec) (_gcry_camellia_cfb_dec, _gcry_camellia_ocb_crypt) (_gcry_camellia_ocb_auth): Add USE_VAES_AVX2 code. * configure.ac: Add 'camellia-vaes-avx2-amd64.lo'. -- Camellia AES-NI/AVX2 implementation had to split 256-bit vector to 128-bit parts for AES processing, but now we can use those 256-bit registers directly with VAES. Benchmarks on AMD Ryzen 5800X: Before (AES-NI/AVX2): CAMELLIA128 | nanosecs/byte mebibytes/sec cycles/byte auto Mhz CBC dec | 0.539 ns/B 1769 MiB/s 2.62 c/B 4852 CFB dec | 0.528 ns/B 1806 MiB/s 2.56 c/B 4852±1 CTR enc | 0.552 ns/B 1728 MiB/s 2.68 c/B 4850 OCB enc | 0.550 ns/B 1734 MiB/s 2.65 c/B 4825 OCB dec | 0.577 ns/B 1653 MiB/s 2.78 c/B 4825 OCB auth | 0.546 ns/B 1747 MiB/s 2.63 c/B 4825 After (VAES/AVX2, CBC-dec ~13%, CFB-dec/CTR/OCB ~20% faster): CAMELLIA128 | nanosecs/byte mebibytes/sec cycles/byte auto Mhz CBC dec | 0.477 ns/B 1999 MiB/s 2.31 c/B 4850 CFB dec | 0.433 ns/B 2201 MiB/s 2.10 c/B 4850 CTR enc | 0.438 ns/B 2176 MiB/s 2.13 c/B 4851 OCB enc | 0.449 ns/B 2122 MiB/s 2.18 c/B 4850 OCB dec | 0.468 ns/B 2038 MiB/s 2.27 c/B 4850 OCB auth | 0.447 ns/B 2131 MiB/s 2.17 c/B 4850 Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Diffstat (limited to 'cipher/camellia-vaes-avx2-amd64.S')
-rw-r--r--cipher/camellia-vaes-avx2-amd64.S35
1 files changed, 35 insertions, 0 deletions
diff --git a/cipher/camellia-vaes-avx2-amd64.S b/cipher/camellia-vaes-avx2-amd64.S
new file mode 100644
index 00000000..e6e0c78e
--- /dev/null
+++ b/cipher/camellia-vaes-avx2-amd64.S
@@ -0,0 +1,35 @@
+/* camellia-vaes-avx2-amd64.S - VAES/AVX2 implementation of Camellia cipher
+ *
+ * Copyright (C) 2021 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+
+#ifdef __x86_64
+#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \
+ defined(ENABLE_AESNI_SUPPORT) && defined(ENABLE_AVX2_SUPPORT) && \
+ defined(HAVE_GCC_INLINE_ASM_VAES_VPCLMUL)
+
+#define CAMELLIA_VAES_BUILD 1
+#define FUNC_NAME(func) _gcry_camellia_vaes_avx2_ ## func
+
+#include "camellia-aesni-avx2-amd64.h"
+
+#endif /* defined(ENABLE_AESNI_SUPPORT) && defined(ENABLE_AVX2_SUPPORT) */
+#endif /* __x86_64 */