summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLauri Kasanen <cand@gmx.com>2019-11-06 19:17:48 +1100
committerLauri Kasanen <cand@gmx.com>2019-11-06 19:17:48 +1100
commita261783b1cf578ed1eb499f6ab664848b5809cca (patch)
treedd2389c38e3c1b6ed08bba3edf94ae432026e406
parent6a6666f66dd44c6abf871f00844c760dabd04349 (diff)
downloadnss-hg-a261783b1cf578ed1eb499f6ab664848b5809cca.tar.gz
Bug 1566126 - freebl: POWER GHASH Vector Acceleration, r=mt
Implementation for POWER8 adapted from the ARM paper: https://conradoplg.cryptoland.net/files/2010/12/gcm14.pdf Benchmark of `bltest -E -m aes_gcm -i tests/aes_gcm/plaintext10 \ -v tests/aes_gcm/iv10 -k tests/aes_gcm/key10 -5 10` on POWER8 3.3GHz. NSS_DISABLE_HW_CRYPTO=1 mode in symmkey opreps cxreps context op time(sec) thrgput aes_gcm_e 309Mb 192 5M 0 0.000 10000.000 10.001 30Mb mode in symmkey opreps cxreps context op time(sec) thrgput aes_gcm_e 829Mb 192 14M 0 0.000 10000.000 10.001 82Mb Notable operf results, sw: samples % image name symbol name 226033 59.3991 libfreeblpriv3.so bmul 80606 21.1824 libfreeblpriv3.so rijndael_encryptBlock128 28851 7.5817 libfreeblpriv3.so gcm_HashMult_sftw hw: 213899 56.2037 libfreeblpriv3.so rijndael_encryptBlock128 45233 11.8853 libfreeblpriv3.so gcm_HashMult_hw So the ghash part is ~5.6x faster. Signed-off-by: Lauri Kasanen <cand@gmx.com>
-rw-r--r--lib/freebl/Makefile5
-rw-r--r--lib/freebl/altivec-types.h23
-rw-r--r--lib/freebl/blapii.h1
-rw-r--r--lib/freebl/blinit.c29
-rw-r--r--lib/freebl/freebl.gyp28
-rw-r--r--lib/freebl/gcm-ppc.c106
-rw-r--r--lib/freebl/gcm.c6
-rw-r--r--lib/freebl/gcm.h24
8 files changed, 220 insertions, 2 deletions
diff --git a/lib/freebl/Makefile b/lib/freebl/Makefile
index 5943fb377..98a7c5d5c 100644
--- a/lib/freebl/Makefile
+++ b/lib/freebl/Makefile
@@ -263,6 +263,7 @@ ifeq ($(CPU_ARCH),arm)
MPI_SRCS += mpi_arm.c
endif
ifeq ($(CPU_ARCH),ppc)
+ EXTRA_SRCS += gcm-ppc.c
ifdef USE_64
DEFINES += -DNSS_NO_INIT_SUPPORT
endif # USE_64
@@ -785,3 +786,7 @@ ifeq ($(CPU_ARCH),aarch64)
$(OBJDIR)/$(PROG_PREFIX)aes-armv8$(OBJ_SUFFIX): CFLAGS += -march=armv8-a+crypto
$(OBJDIR)/$(PROG_PREFIX)gcm-aarch64$(OBJ_SUFFIX): CFLAGS += -march=armv8-a+crypto
endif
+
+ifeq ($(CPU_ARCH),ppc)
+$(OBJDIR)/$(PROG_PREFIX)gcm-ppc$(OBJ_SUFFIX): CFLAGS += -mcrypto -maltivec
+endif
diff --git a/lib/freebl/altivec-types.h b/lib/freebl/altivec-types.h
new file mode 100644
index 000000000..807a44db4
--- /dev/null
+++ b/lib/freebl/altivec-types.h
@@ -0,0 +1,23 @@
+/*
+ * altivec-types.h - shorter vector typedefs
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef _ALTIVEC_TYPES_H_
+#define _ALTIVEC_TYPES_H_ 1
+
+#include <altivec.h>
+
+typedef __vector unsigned char vec_u8;
+typedef __vector signed char vec_s8;
+typedef __vector unsigned short vec_u16;
+typedef __vector signed short vec_s16;
+typedef __vector unsigned int vec_u32;
+typedef __vector signed int vec_s32;
+typedef __vector unsigned long long vec_u64;
+typedef __vector signed long long vec_s64;
+typedef __vector float vec_f;
+
+#endif
diff --git a/lib/freebl/blapii.h b/lib/freebl/blapii.h
index 743a1168b..006c26977 100644
--- a/lib/freebl/blapii.h
+++ b/lib/freebl/blapii.h
@@ -86,5 +86,6 @@ PRBool arm_aes_support();
PRBool arm_pmull_support();
PRBool arm_sha1_support();
PRBool arm_sha2_support();
+PRBool ppc_crypto_support();
#endif /* _BLAPII_H_ */
diff --git a/lib/freebl/blinit.c b/lib/freebl/blinit.c
index 543206c12..5e1f4826c 100644
--- a/lib/freebl/blinit.c
+++ b/lib/freebl/blinit.c
@@ -29,6 +29,7 @@ static PRBool arm_aes_support_ = PR_FALSE;
static PRBool arm_sha1_support_ = PR_FALSE;
static PRBool arm_sha2_support_ = PR_FALSE;
static PRBool arm_pmull_support_ = PR_FALSE;
+static PRBool ppc_crypto_support_ = PR_FALSE;
#ifdef NSS_X86_OR_X64
/*
@@ -348,6 +349,32 @@ arm_sha2_support()
{
return arm_sha2_support_;
}
+PRBool
+ppc_crypto_support()
+{
+ return ppc_crypto_support_;
+}
+
+#if defined(__powerpc__)
+
+#include <sys/auxv.h>
+
+// Defines from cputable.h in Linux kernel - PPC, letting us build on older kernels
+#ifndef PPC_FEATURE2_VEC_CRYPTO
+#define PPC_FEATURE2_VEC_CRYPTO 0x02000000
+#endif
+
+static void
+CheckPPCSupport()
+{
+ char *disable_hw_crypto = PR_GetEnvSecure("NSS_DISABLE_PPC_GHASH");
+
+ long hwcaps = getauxval(AT_HWCAP2);
+
+ ppc_crypto_support_ = hwcaps & PPC_FEATURE2_VEC_CRYPTO && disable_hw_crypto == NULL;
+}
+
+#endif /* __powerpc__ */
static PRStatus
FreeblInit(void)
@@ -356,6 +383,8 @@ FreeblInit(void)
CheckX86CPUSupport();
#elif (defined(__aarch64__) || defined(__arm__))
CheckARMSupport();
+#elif (defined(__powerpc__))
+ CheckPPCSupport();
#endif
return PR_SUCCESS;
}
diff --git a/lib/freebl/freebl.gyp b/lib/freebl/freebl.gyp
index f12508136..7ea5c3750 100644
--- a/lib/freebl/freebl.gyp
+++ b/lib/freebl/freebl.gyp
@@ -133,6 +133,24 @@
]
},
{
+ 'target_name': 'gcm-aes-ppc_c_lib',
+ 'type': 'static_library',
+ 'sources': [
+ 'gcm-ppc.c'
+ ],
+ 'dependencies': [
+ '<(DEPTH)/exports.gyp:nss_exports'
+ ],
+ 'cflags': [
+ '-mcrypto',
+ '-maltivec'
+ ],
+ 'cflags_mozilla': [
+ '-mcrypto',
+ '-maltivec'
+ ]
+ },
+ {
'target_name': 'armv8_c_lib',
'type': 'static_library',
'sources': [
@@ -199,6 +217,11 @@
'gcm-aes-aarch64_c_lib',
],
}],
+ [ 'target_arch=="ppc64le"', {
+ 'dependencies': [
+ 'gcm-aes-ppc_c_lib',
+ ],
+ }],
[ 'OS=="linux"', {
'defines!': [
'FREEBL_NO_DEPEND',
@@ -245,6 +268,11 @@
'gcm-aes-aarch64_c_lib',
],
}],
+ [ 'target_arch=="ppc64" or target_arch=="ppc64le"', {
+ 'dependencies': [
+ 'gcm-aes-ppc_c_lib',
+ ],
+ }],
[ 'OS!="linux"', {
'conditions': [
[ 'moz_fold_libs==0', {
diff --git a/lib/freebl/gcm-ppc.c b/lib/freebl/gcm-ppc.c
new file mode 100644
index 000000000..3eedcbc57
--- /dev/null
+++ b/lib/freebl/gcm-ppc.c
@@ -0,0 +1,106 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+#include "gcm.h"
+#include "secerr.h"
+
+#if defined(USE_PPC_CRYPTO)
+
+SECStatus
+gcm_HashWrite_hw(gcmHashContext *ghash, unsigned char *outbuf)
+{
+ vec_xst_be((vec_u8) ghash->x, 0, outbuf);
+ return SECSuccess;
+}
+
+static vec_u64 vpmsumd(const vec_u64 a, const vec_u64 b)
+{
+#if defined(__clang__)
+ /* Clang uses a different name */
+ return __builtin_altivec_crypto_vpmsumd(a, b);
+#elif (__GNUC__ >= 10) || (__GNUC__ == 9 && __GNUC_MINOR__ >= 3) || \
+ (__GNUC__ == 8 && __GNUC_MINOR__ >= 4) || \
+ (__GNUC__ == 7 && __GNUC_MINOR__ >= 5)
+ /* GCC versions not affected by https://gcc.gnu.org/bugzilla/show_bug.cgi?id=91275 */
+ return __builtin_crypto_vpmsumd(a, b);
+#else
+ /* GCC versions where this builtin is buggy */
+ vec_u64 vr;
+ __asm("vpmsumd %0, %1, %2" : "=v"(vr) : "v"(a), "v"(b));
+ return vr;
+#endif
+}
+
+SECStatus
+gcm_HashMult_hw(gcmHashContext *ghash, const unsigned char *buf,
+ unsigned int count)
+{
+ const vec_u8 leftshift = vec_splat_u8(1);
+ const vec_u64 onebit = (vec_u64) {1, 0};
+ const unsigned long long pd = 0xc2LLU << 56;
+
+ vec_u64 ci, v, r0, r1;
+ vec_u64 hibit;
+ unsigned i;
+
+ ci = ghash->x;
+
+ for (i = 0; i < count; i++, buf += 16) {
+ /* clang needs the following cast away from const; maybe a bug in 7.0.0 */
+ v = (vec_u64) vec_xl_be(0, (unsigned char *) buf);
+ ci ^= v;
+
+ /* Do binary mult ghash->X = C * ghash->H (Karatsuba). */
+ r0 = vpmsumd((vec_u64) {ci[0], 0}, (vec_u64) {ghash->h[0], 0});
+ r1 = vpmsumd((vec_u64) {ci[1], 0}, (vec_u64) {ghash->h[1], 0});
+ v = (vec_u64) {ci[0] ^ ci[1], ghash->h[0] ^ ghash->h[1]};
+ v = vpmsumd((vec_u64) {v[0], 0}, (vec_u64) {v[1], 0});
+ v ^= r0;
+ v ^= r1;
+ r0 ^= (vec_u64) {0, v[0]};
+ r1 ^= (vec_u64) {v[1], 0};
+
+ /* Shift one (multiply by x) as gcm spec is stupid. */
+ hibit = (vec_u64) vec_splat((vec_u8) r0, 15);
+ hibit = (vec_u64) vec_rl((vec_u8) hibit, leftshift);
+ hibit &= onebit;
+ r0 = vec_sll(r0, leftshift);
+ r1 = vec_sll(r1, leftshift);
+ r1 |= hibit;
+
+ /* Reduce */
+ v = vpmsumd((vec_u64) {r0[0], 0}, (vec_u64) {pd, 0});
+ r0 ^= (vec_u64) {0, v[0]};
+ r1 ^= (vec_u64) {v[1], 0};
+ v = vpmsumd((vec_u64) {r0[1], 0}, (vec_u64) {pd, 0});
+ r1 ^= v;
+ ci = r0 ^ r1;
+ }
+
+ ghash->x = ci;
+
+ return SECSuccess;
+}
+
+SECStatus
+gcm_HashInit_hw(gcmHashContext *ghash)
+{
+ ghash->x = (vec_u64) vec_splat_u32(0);
+ ghash->h = (vec_u64) {ghash->h_low, ghash->h_high};
+ ghash->ghash_mul = gcm_HashMult_hw;
+ ghash->hw = PR_TRUE;
+ return SECSuccess;
+}
+
+SECStatus
+gcm_HashZeroX_hw(gcmHashContext *ghash)
+{
+ ghash->x = (vec_u64) vec_splat_u32(0);
+ return SECSuccess;
+}
+
+#endif /* defined(USE_PPC_CRYPTO) */
diff --git a/lib/freebl/gcm.c b/lib/freebl/gcm.c
index 6edf0e8f3..737252eec 100644
--- a/lib/freebl/gcm.c
+++ b/lib/freebl/gcm.c
@@ -36,7 +36,7 @@ SECStatus gcm_HashMult_sftw32(gcmHashContext *ghash, const unsigned char *buf,
/* Stub definitions for the above *_hw functions, which shouldn't be
* used unless NSS_X86_OR_X64 is defined */
-#if !defined(NSS_X86_OR_X64) && !defined(USE_ARM_GCM)
+#if !defined(NSS_X86_OR_X64) && !defined(USE_ARM_GCM) && !defined(USE_PPC_CRYPTO)
SECStatus
gcm_HashWrite_hw(gcmHashContext *ghash, unsigned char *outbuf)
{
@@ -65,7 +65,7 @@ gcm_HashZeroX_hw(gcmHashContext *ghash)
PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
return SECFailure;
}
-#endif /* !NSS_X86_OR_X64 && !USE_ARM_GCM */
+#endif /* !NSS_X86_OR_X64 && !USE_ARM_GCM && !USE_PPC_CRYPTO */
uint64_t
get64(const unsigned char *bytes)
@@ -94,6 +94,8 @@ gcmHash_InitContext(gcmHashContext *ghash, const unsigned char *H, PRBool sw)
ghash->h_high = get64(H);
#ifdef USE_ARM_GCM
if (arm_pmull_support() && !sw) {
+#elif defined(USE_PPC_CRYPTO)
+ if (ppc_crypto_support() && !sw) {
#else
if (clmul_support() && !sw) {
#endif
diff --git a/lib/freebl/gcm.h b/lib/freebl/gcm.h
index 49a9ec9fa..571b9ec55 100644
--- a/lib/freebl/gcm.h
+++ b/lib/freebl/gcm.h
@@ -30,6 +30,28 @@
#include <arm_neon.h>
#endif
+#ifdef __powerpc64__
+#include "altivec-types.h"
+
+/* The ghash freebl test tries to use this in C++, and gcc defines conflict. */
+#ifdef __cplusplus
+#undef pixel
+#undef vector
+#undef bool
+#endif
+
+/*
+ * PPC CRYPTO requires at least gcc 5 or clang. The LE check is purely
+ * because it's only been tested on LE. If you're interested in BE,
+ * please send a patch.
+ */
+#if (defined(__clang__) || (defined(__GNUC__) && __GNUC__ >= 5)) && \
+ defined(IS_LITTLE_ENDIAN)
+#define USE_PPC_CRYPTO
+#endif
+
+#endif
+
SEC_BEGIN_PROTOS
#ifdef HAVE_INT128_SUPPORT
@@ -67,6 +89,8 @@ pre_align struct gcmHashContextStr {
__m128i x, h;
#elif defined(__aarch64__)
uint64x2_t x, h;
+#elif defined(USE_PPC_CRYPTO)
+ vec_u64 x, h;
#endif
uint64_t x_low, x_high, h_high, h_low;
unsigned char buffer[MAX_BLOCK_SIZE];