summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile.in2
-rw-r--r--configure.ac12
-rw-r--r--fat-s390x.c33
-rw-r--r--fat-setup.h1
-rw-r--r--memxor3.c10
-rw-r--r--s390x/fat/memxor3-2.asm36
-rw-r--r--s390x/vf/memxor3.asm84
7 files changed, 173 insertions, 5 deletions
diff --git a/Makefile.in b/Makefile.in
index 7b94f40f..69cf4872 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -620,7 +620,7 @@ distdir: $(DISTFILES)
arm arm/neon arm/v6 arm/fat \
arm64 arm64/crypto arm64/fat \
powerpc64 powerpc64/p7 powerpc64/p8 powerpc64/fat \
- s390x s390x/msa_x1 s390x/msa_x2 s390x/msa_x4 s390x/fat ; do \
+ s390x s390x/vf s390x/msa_x1 s390x/msa_x2 s390x/msa_x4 s390x/fat ; do \
mkdir "$(distdir)/$$d" ; \
find "$(srcdir)/$$d" -maxdepth 1 '(' -name '*.asm' -o -name '*.m4' -o -name README ')' \
-exec cp '{}' "$(distdir)/$$d" ';' ; \
diff --git a/configure.ac b/configure.ac
index 74cbc7ac..0d0960f3 100644
--- a/configure.ac
+++ b/configure.ac
@@ -101,6 +101,10 @@ AC_ARG_ENABLE(power-altivec,
AC_HELP_STRING([--enable-power-altivec], [Enable POWER altivec and vsx extensions. (default=no)]),,
[enable_altivec=no])
+AC_ARG_ENABLE(s390x-vf,
+ AC_HELP_STRING([--enable-s390x-vf], [Enable vector facility on z/Architecture. (default=no)]),,
+ [enable_s390x_vf=no])
+
AC_ARG_ENABLE(s390x-msa,
AC_HELP_STRING([--enable-s390x-msa], [Enable message-security assist extensions on z/Architecture. (default=no)]),,
[enable_s390x_msa=no])
@@ -533,8 +537,11 @@ if test "x$enable_assembler" = xyes ; then
if test "x$enable_fat" = xyes ; then
asm_path="s390x/fat $asm_path"
OPT_NETTLE_SOURCES="fat-s390x.c $OPT_NETTLE_SOURCES"
- FAT_TEST_LIST="none msa_x1 msa_x2 msa_x4"
+ FAT_TEST_LIST="none vf msa_x1 msa_x2 msa_x4"
else
+ if test "$enable_s390x_vf" = yes ; then
+ asm_path="s390x/vf $asm_path"
+ fi
if test "$enable_s390x_msa" = yes ; then
asm_path="s390x/msa_x1 s390x/msa_x2 s390x/msa_x4 $asm_path"
fi
@@ -567,7 +574,7 @@ asm_replace_list="aes-encrypt-internal.asm aes-decrypt-internal.asm \
# Assembler files which generate additional object files if they are used.
asm_nettle_optional_list="gcm-hash.asm gcm-hash8.asm cpuid.asm cpu-facility.asm \
- aes-encrypt-internal-2.asm aes-decrypt-internal-2.asm memxor-2.asm \
+ aes-encrypt-internal-2.asm aes-decrypt-internal-2.asm memxor-2.asm memxor3-2.asm \
aes128-set-encrypt-key-2.asm aes128-set-decrypt-key-2.asm \
aes128-encrypt-2.asm aes128-decrypt-2.asm \
aes192-set-encrypt-key-2.asm aes192-set-decrypt-key-2.asm \
@@ -680,6 +687,7 @@ AC_SUBST([ASM_PPC_WANT_R_REGISTERS])
AH_VERBATIM([HAVE_NATIVE],
[/* Define to 1 each of the following for which a native (ie. CPU specific)
implementation of the corresponding routine exists. */
+#undef HAVE_NATIVE_memxor3
#undef HAVE_NATIVE_aes_decrypt
#undef HAVE_NATIVE_aes_encrypt
#undef HAVE_NATIVE_aes128_decrypt
diff --git a/fat-s390x.c b/fat-s390x.c
index 927cf837..12918cf8 100644
--- a/fat-s390x.c
+++ b/fat-s390x.c
@@ -49,6 +49,7 @@
#include "nettle-types.h"
+#include "memxor.h"
#include "aes.h"
#include "gcm.h"
#include "gcm-internal.h"
@@ -67,6 +68,7 @@
#endif
/* Facility bits */
+#define FAC_VF 129 /* vector facility */
#define FAC_MSA 17 /* message-security assist */
#define FAC_MSA_X4 77 /* message-security-assist extension 4 */
@@ -78,6 +80,7 @@
struct s390x_features
{
+ int have_vector_facility;
int have_km_aes128;
int have_km_aes192;
int have_km_aes256;
@@ -94,6 +97,7 @@ void _nettle_kimd_status(uint64_t *status);
static void
get_s390x_features (struct s390x_features *features)
{
+ features->have_vector_facility = 0;
features->have_km_aes128 = 0;
features->have_km_aes192 = 0;
features->have_km_aes256 = 0;
@@ -106,7 +110,9 @@ get_s390x_features (struct s390x_features *features)
const char *sep = strchr (s, ',');
size_t length = sep ? (size_t) (sep - s) : strlen(s);
- if (MATCH (s, length, "msa_x1", 6))
+ if (MATCH (s, length, "vf", 2))
+ features->have_vector_facility = 1;
+ else if (MATCH (s, length, "msa_x1", 6))
{
features->have_km_aes128 = 1;
}
@@ -132,6 +138,9 @@ get_s390x_features (struct s390x_features *features)
uint64_t facilities[FACILITY_DOUBLEWORDS_MAX] = {0};
_nettle_stfle(facilities, FACILITY_DOUBLEWORDS_MAX);
+ if (facilities[FACILITY_INDEX(FAC_VF)] & FACILITY_BIT(FAC_VF))
+ features->have_vector_facility = 1;
+
if (facilities[FACILITY_INDEX(FAC_MSA)] & FACILITY_BIT(FAC_MSA))
{
uint64_t query_status[2] = {0};
@@ -156,6 +165,11 @@ get_s390x_features (struct s390x_features *features)
}
}
+/* MEMXOR3 */
+DECLARE_FAT_FUNC(nettle_memxor3, memxor3_func)
+DECLARE_FAT_FUNC_VAR(memxor3, memxor3_func, c)
+DECLARE_FAT_FUNC_VAR(memxor3, memxor3_func, s390x)
+
/* AES128 */
DECLARE_FAT_FUNC(nettle_aes128_set_encrypt_key, aes128_set_key_func)
DECLARE_FAT_FUNC_VAR(aes128_set_encrypt_key, aes128_set_key_func, c)
@@ -227,6 +241,18 @@ fat_init (void)
get_s390x_features (&features);
verbose = getenv (ENV_VERBOSE) != NULL;
+ /* MEMXOR3 */
+ if (features.have_vector_facility)
+ {
+ if (verbose)
+ fprintf (stderr, "libnettle: enabling vectorized memxor3.\n");
+ nettle_memxor3_vec = _nettle_memxor3_s390x;
+ }
+ else
+ {
+ nettle_memxor3_vec = _nettle_memxor3_c;
+ }
+
/* AES128 */
if (features.have_km_aes128)
{
@@ -302,6 +328,11 @@ fat_init (void)
}
}
+/* MEMXOR3 */
+DEFINE_FAT_FUNC(nettle_memxor3, void *,
+ (void *dst_in, const void *a_in, const void *b_in, size_t n),
+ (dst_in, a_in, b_in, n))
+
/* AES128 */
DEFINE_FAT_FUNC(nettle_aes128_set_encrypt_key, void,
(struct aes128_ctx *ctx, const uint8_t *key),
diff --git a/fat-setup.h b/fat-setup.h
index f9337dbe..78a6e396 100644
--- a/fat-setup.h
+++ b/fat-setup.h
@@ -170,6 +170,7 @@ typedef void gcm_hash_func (const struct gcm_key *key, union nettle_block16 *x,
size_t length, const uint8_t *data);
typedef void *(memxor_func)(void *dst, const void *src, size_t n);
+typedef void *(memxor3_func)(void *dst_in, const void *a_in, const void *b_in, size_t n);
typedef void salsa20_core_func (uint32_t *dst, const uint32_t *src, unsigned rounds);
typedef void salsa20_crypt_func (struct salsa20_ctx *ctx, unsigned rounds,
diff --git a/memxor3.c b/memxor3.c
index fe208bf1..c9ffa52b 100644
--- a/memxor3.c
+++ b/memxor3.c
@@ -45,6 +45,13 @@
#define WORD_T_THRESH 16
+/* For fat builds */
+#if HAVE_NATIVE_memxor3
+void *
+_nettle_memxor3_c(void *dst_in, const void *a_in, const void *b_in, size_t n);
+# define nettle_memxor3 _nettle_memxor3_c
+#endif
+
/* XOR word-aligned areas. n is the number of words, not bytes. */
static void
memxor3_common_alignment (word_t *dst,
@@ -236,7 +243,8 @@ memxor3_different_alignment_all (word_t *dst,
internally by cbc decrypt, and it is not advertised or documented
to nettle users. */
void *
-memxor3(void *dst_in, const void *a_in, const void *b_in, size_t n)
+nettle_memxor3(void *dst_in, const void *a_in,
+ const void *b_in, size_t n)
{
unsigned char *dst = dst_in;
const unsigned char *a = a_in;
diff --git a/s390x/fat/memxor3-2.asm b/s390x/fat/memxor3-2.asm
new file mode 100644
index 00000000..c72e7ae7
--- /dev/null
+++ b/s390x/fat/memxor3-2.asm
@@ -0,0 +1,36 @@
+C s390x/fat/memxor3-2.asm
+
+ifelse(`
+ Copyright (C) 2021 Mamone Tarsha
+
+ This file is part of GNU Nettle.
+
+ GNU Nettle is free software: you can redistribute it and/or
+ modify it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+ or
+
+ * the GNU General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your
+ option) any later version.
+
+ or both in parallel, as here.
+
+ GNU Nettle is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received copies of the GNU General Public License and
+ the GNU Lesser General Public License along with this program. If
+ not, see http://www.gnu.org/licenses/.
+')
+
+dnl PROLOGUE(nettle_memxor3) picked up by configure
+
+define(`fat_transform', `_$1_s390x')
+include_src(`s390x/vf/memxor3.asm')
diff --git a/s390x/vf/memxor3.asm b/s390x/vf/memxor3.asm
new file mode 100644
index 00000000..46782005
--- /dev/null
+++ b/s390x/vf/memxor3.asm
@@ -0,0 +1,84 @@
+C s390/memxor3.asm
+
+ifelse(`
+ Copyright (C) 2020 Mamone Tarsha
+ This file is part of GNU Nettle.
+
+ GNU Nettle is free software: you can redistribute it and/or
+ modify it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+ or
+
+ * the GNU General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your
+ option) any later version.
+
+ or both in parallel, as here.
+
+ GNU Nettle is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received copies of the GNU General Public License and
+ the GNU Lesser General Public License along with this program. If
+ not, see http://www.gnu.org/licenses/.
+')
+
+.file "memxor3.asm"
+
+.text
+
+C void * memxor3(void *dst_in, const void *a_in, const void *b_in, size_t n)
+
+PROLOGUE(nettle_memxor3)
+ agr %r2,%r5
+ agr %r3,%r5
+ agr %r4,%r5
+Lmod:
+ risbg %r1,%r5,60,191,0
+ jz L1x
+ sgr %r3,%r1
+ sgr %r4,%r1
+ sgr %r2,%r1
+ aghi %r1,-1 C highest index
+ vll %v24,%r1,0(%r3)
+ vll %v28,%r1,0(%r4)
+ vx %v24,%v24,%v28
+ vstl %v24,%r1,0(%r2)
+L1x:
+ risbg %r1,%r5,58,187,0
+ jz L4x
+ srlg %r1,%r1,4 C 1-block loop count 'n / 16'
+L1x_loop:
+ aghi %r3,-16
+ aghi %r4,-16
+ aghi %r2,-16
+ vl %v24,0(%r3),0
+ vl %v28,0(%r4),0
+ vx %v24,%v24,%v28
+ vst %v24,0(%r2),0
+ brctg %r1,L1x_loop
+L4x:
+ risbg %r1,%r5,0,185,0
+ jz Ldone
+ srlg %r1,%r1,6 C 4-blocks loop count 'n / (16 * 4)'
+L4x_loop:
+ aghi %r3,-64
+ aghi %r4,-64
+ aghi %r2,-64
+ vlm %v24,%v27,0(%r3),0
+ vlm %v28,%v31,0(%r4),0
+ vx %v24,%v24,%v28
+ vx %v25,%v25,%v29
+ vx %v26,%v26,%v30
+ vx %v27,%v27,%v31
+ vstm %v24,%v27,0(%r2),0
+ brctg %r1,L4x_loop
+Ldone:
+ br RA
+EPILOGUE(nettle_memxor3)