diff options
-rw-r--r-- | Makefile.in | 2 | ||||
-rw-r--r-- | configure.ac | 12 | ||||
-rw-r--r-- | fat-s390x.c | 33 | ||||
-rw-r--r-- | fat-setup.h | 1 | ||||
-rw-r--r-- | memxor3.c | 10 | ||||
-rw-r--r-- | s390x/fat/memxor3-2.asm | 36 | ||||
-rw-r--r-- | s390x/vf/memxor3.asm | 84 |
7 files changed, 173 insertions, 5 deletions
diff --git a/Makefile.in b/Makefile.in index 7b94f40f..69cf4872 100644 --- a/Makefile.in +++ b/Makefile.in @@ -620,7 +620,7 @@ distdir: $(DISTFILES) arm arm/neon arm/v6 arm/fat \ arm64 arm64/crypto arm64/fat \ powerpc64 powerpc64/p7 powerpc64/p8 powerpc64/fat \ - s390x s390x/msa_x1 s390x/msa_x2 s390x/msa_x4 s390x/fat ; do \ + s390x s390x/vf s390x/msa_x1 s390x/msa_x2 s390x/msa_x4 s390x/fat ; do \ mkdir "$(distdir)/$$d" ; \ find "$(srcdir)/$$d" -maxdepth 1 '(' -name '*.asm' -o -name '*.m4' -o -name README ')' \ -exec cp '{}' "$(distdir)/$$d" ';' ; \ diff --git a/configure.ac b/configure.ac index 74cbc7ac..0d0960f3 100644 --- a/configure.ac +++ b/configure.ac @@ -101,6 +101,10 @@ AC_ARG_ENABLE(power-altivec, AC_HELP_STRING([--enable-power-altivec], [Enable POWER altivec and vsx extensions. (default=no)]),, [enable_altivec=no]) +AC_ARG_ENABLE(s390x-vf, + AC_HELP_STRING([--enable-s390x-vf], [Enable vector facility on z/Architecture. (default=no)]),, + [enable_s390x_vf=no]) + AC_ARG_ENABLE(s390x-msa, AC_HELP_STRING([--enable-s390x-msa], [Enable message-security assist extensions on z/Architecture. (default=no)]),, [enable_s390x_msa=no]) @@ -533,8 +537,11 @@ if test "x$enable_assembler" = xyes ; then if test "x$enable_fat" = xyes ; then asm_path="s390x/fat $asm_path" OPT_NETTLE_SOURCES="fat-s390x.c $OPT_NETTLE_SOURCES" - FAT_TEST_LIST="none msa_x1 msa_x2 msa_x4" + FAT_TEST_LIST="none vf msa_x1 msa_x2 msa_x4" else + if test "$enable_s390x_vf" = yes ; then + asm_path="s390x/vf $asm_path" + fi if test "$enable_s390x_msa" = yes ; then asm_path="s390x/msa_x1 s390x/msa_x2 s390x/msa_x4 $asm_path" fi @@ -567,7 +574,7 @@ asm_replace_list="aes-encrypt-internal.asm aes-decrypt-internal.asm \ # Assembler files which generate additional object files if they are used. asm_nettle_optional_list="gcm-hash.asm gcm-hash8.asm cpuid.asm cpu-facility.asm \ - aes-encrypt-internal-2.asm aes-decrypt-internal-2.asm memxor-2.asm \ + aes-encrypt-internal-2.asm aes-decrypt-internal-2.asm memxor-2.asm memxor3-2.asm \ aes128-set-encrypt-key-2.asm aes128-set-decrypt-key-2.asm \ aes128-encrypt-2.asm aes128-decrypt-2.asm \ aes192-set-encrypt-key-2.asm aes192-set-decrypt-key-2.asm \ @@ -680,6 +687,7 @@ AC_SUBST([ASM_PPC_WANT_R_REGISTERS]) AH_VERBATIM([HAVE_NATIVE], [/* Define to 1 each of the following for which a native (ie. CPU specific) implementation of the corresponding routine exists. */ +#undef HAVE_NATIVE_memxor3 #undef HAVE_NATIVE_aes_decrypt #undef HAVE_NATIVE_aes_encrypt #undef HAVE_NATIVE_aes128_decrypt diff --git a/fat-s390x.c b/fat-s390x.c index 927cf837..12918cf8 100644 --- a/fat-s390x.c +++ b/fat-s390x.c @@ -49,6 +49,7 @@ #include "nettle-types.h" +#include "memxor.h" #include "aes.h" #include "gcm.h" #include "gcm-internal.h" @@ -67,6 +68,7 @@ #endif /* Facility bits */ +#define FAC_VF 129 /* vector facility */ #define FAC_MSA 17 /* message-security assist */ #define FAC_MSA_X4 77 /* message-security-assist extension 4 */ @@ -78,6 +80,7 @@ struct s390x_features { + int have_vector_facility; int have_km_aes128; int have_km_aes192; int have_km_aes256; @@ -94,6 +97,7 @@ void _nettle_kimd_status(uint64_t *status); static void get_s390x_features (struct s390x_features *features) { + features->have_vector_facility = 0; features->have_km_aes128 = 0; features->have_km_aes192 = 0; features->have_km_aes256 = 0; @@ -106,7 +110,9 @@ get_s390x_features (struct s390x_features *features) const char *sep = strchr (s, ','); size_t length = sep ? (size_t) (sep - s) : strlen(s); - if (MATCH (s, length, "msa_x1", 6)) + if (MATCH (s, length, "vf", 2)) + features->have_vector_facility = 1; + else if (MATCH (s, length, "msa_x1", 6)) { features->have_km_aes128 = 1; } @@ -132,6 +138,9 @@ get_s390x_features (struct s390x_features *features) uint64_t facilities[FACILITY_DOUBLEWORDS_MAX] = {0}; _nettle_stfle(facilities, FACILITY_DOUBLEWORDS_MAX); + if (facilities[FACILITY_INDEX(FAC_VF)] & FACILITY_BIT(FAC_VF)) + features->have_vector_facility = 1; + if (facilities[FACILITY_INDEX(FAC_MSA)] & FACILITY_BIT(FAC_MSA)) { uint64_t query_status[2] = {0}; @@ -156,6 +165,11 @@ get_s390x_features (struct s390x_features *features) } } +/* MEMXOR3 */ +DECLARE_FAT_FUNC(nettle_memxor3, memxor3_func) +DECLARE_FAT_FUNC_VAR(memxor3, memxor3_func, c) +DECLARE_FAT_FUNC_VAR(memxor3, memxor3_func, s390x) + /* AES128 */ DECLARE_FAT_FUNC(nettle_aes128_set_encrypt_key, aes128_set_key_func) DECLARE_FAT_FUNC_VAR(aes128_set_encrypt_key, aes128_set_key_func, c) @@ -227,6 +241,18 @@ fat_init (void) get_s390x_features (&features); verbose = getenv (ENV_VERBOSE) != NULL; + /* MEMXOR3 */ + if (features.have_vector_facility) + { + if (verbose) + fprintf (stderr, "libnettle: enabling vectorized memxor3.\n"); + nettle_memxor3_vec = _nettle_memxor3_s390x; + } + else + { + nettle_memxor3_vec = _nettle_memxor3_c; + } + /* AES128 */ if (features.have_km_aes128) { @@ -302,6 +328,11 @@ fat_init (void) } } +/* MEMXOR3 */ +DEFINE_FAT_FUNC(nettle_memxor3, void *, + (void *dst_in, const void *a_in, const void *b_in, size_t n), + (dst_in, a_in, b_in, n)) + /* AES128 */ DEFINE_FAT_FUNC(nettle_aes128_set_encrypt_key, void, (struct aes128_ctx *ctx, const uint8_t *key), diff --git a/fat-setup.h b/fat-setup.h index f9337dbe..78a6e396 100644 --- a/fat-setup.h +++ b/fat-setup.h @@ -170,6 +170,7 @@ typedef void gcm_hash_func (const struct gcm_key *key, union nettle_block16 *x, size_t length, const uint8_t *data); typedef void *(memxor_func)(void *dst, const void *src, size_t n); +typedef void *(memxor3_func)(void *dst_in, const void *a_in, const void *b_in, size_t n); typedef void salsa20_core_func (uint32_t *dst, const uint32_t *src, unsigned rounds); typedef void salsa20_crypt_func (struct salsa20_ctx *ctx, unsigned rounds, @@ -45,6 +45,13 @@ #define WORD_T_THRESH 16 +/* For fat builds */ +#if HAVE_NATIVE_memxor3 +void * +_nettle_memxor3_c(void *dst_in, const void *a_in, const void *b_in, size_t n); +# define nettle_memxor3 _nettle_memxor3_c +#endif + /* XOR word-aligned areas. n is the number of words, not bytes. */ static void memxor3_common_alignment (word_t *dst, @@ -236,7 +243,8 @@ memxor3_different_alignment_all (word_t *dst, internally by cbc decrypt, and it is not advertised or documented to nettle users. */ void * -memxor3(void *dst_in, const void *a_in, const void *b_in, size_t n) +nettle_memxor3(void *dst_in, const void *a_in, + const void *b_in, size_t n) { unsigned char *dst = dst_in; const unsigned char *a = a_in; diff --git a/s390x/fat/memxor3-2.asm b/s390x/fat/memxor3-2.asm new file mode 100644 index 00000000..c72e7ae7 --- /dev/null +++ b/s390x/fat/memxor3-2.asm @@ -0,0 +1,36 @@ +C s390x/fat/memxor3-2.asm + +ifelse(` + Copyright (C) 2021 Mamone Tarsha + + This file is part of GNU Nettle. + + GNU Nettle is free software: you can redistribute it and/or + modify it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your + option) any later version. + + or both in parallel, as here. + + GNU Nettle is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see http://www.gnu.org/licenses/. +') + +dnl PROLOGUE(nettle_memxor3) picked up by configure + +define(`fat_transform', `_$1_s390x') +include_src(`s390x/vf/memxor3.asm') diff --git a/s390x/vf/memxor3.asm b/s390x/vf/memxor3.asm new file mode 100644 index 00000000..46782005 --- /dev/null +++ b/s390x/vf/memxor3.asm @@ -0,0 +1,84 @@ +C s390/memxor3.asm + +ifelse(` + Copyright (C) 2020 Mamone Tarsha + This file is part of GNU Nettle. + + GNU Nettle is free software: you can redistribute it and/or + modify it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your + option) any later version. + + or both in parallel, as here. + + GNU Nettle is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see http://www.gnu.org/licenses/. +') + +.file "memxor3.asm" + +.text + +C void * memxor3(void *dst_in, const void *a_in, const void *b_in, size_t n) + +PROLOGUE(nettle_memxor3) + agr %r2,%r5 + agr %r3,%r5 + agr %r4,%r5 +Lmod: + risbg %r1,%r5,60,191,0 + jz L1x + sgr %r3,%r1 + sgr %r4,%r1 + sgr %r2,%r1 + aghi %r1,-1 C highest index + vll %v24,%r1,0(%r3) + vll %v28,%r1,0(%r4) + vx %v24,%v24,%v28 + vstl %v24,%r1,0(%r2) +L1x: + risbg %r1,%r5,58,187,0 + jz L4x + srlg %r1,%r1,4 C 1-block loop count 'n / 16' +L1x_loop: + aghi %r3,-16 + aghi %r4,-16 + aghi %r2,-16 + vl %v24,0(%r3),0 + vl %v28,0(%r4),0 + vx %v24,%v24,%v28 + vst %v24,0(%r2),0 + brctg %r1,L1x_loop +L4x: + risbg %r1,%r5,0,185,0 + jz Ldone + srlg %r1,%r1,6 C 4-blocks loop count 'n / (16 * 4)' +L4x_loop: + aghi %r3,-64 + aghi %r4,-64 + aghi %r2,-64 + vlm %v24,%v27,0(%r3),0 + vlm %v28,%v31,0(%r4),0 + vx %v24,%v24,%v28 + vx %v25,%v25,%v29 + vx %v26,%v26,%v30 + vx %v27,%v27,%v31 + vstm %v24,%v27,0(%r2),0 + brctg %r1,L4x_loop +Ldone: + br RA +EPILOGUE(nettle_memxor3) |