diff options
author | Lauri Kasanen <cand@gmx.com> | 2019-11-06 19:17:48 +1100 |
---|---|---|
committer | Lauri Kasanen <cand@gmx.com> | 2019-11-06 19:17:48 +1100 |
commit | a261783b1cf578ed1eb499f6ab664848b5809cca (patch) | |
tree | dd2389c38e3c1b6ed08bba3edf94ae432026e406 | |
parent | 6a6666f66dd44c6abf871f00844c760dabd04349 (diff) | |
download | nss-hg-a261783b1cf578ed1eb499f6ab664848b5809cca.tar.gz |
Bug 1566126 - freebl: POWER GHASH Vector Acceleration, r=mt
Implementation for POWER8 adapted from the ARM paper:
https://conradoplg.cryptoland.net/files/2010/12/gcm14.pdf
Benchmark of `bltest -E -m aes_gcm -i tests/aes_gcm/plaintext10 \
-v tests/aes_gcm/iv10 -k tests/aes_gcm/key10 -5 10` on POWER8 3.3GHz.
NSS_DISABLE_HW_CRYPTO=1
mode in symmkey opreps cxreps context op time(sec) thrgput
aes_gcm_e 309Mb 192 5M 0 0.000 10000.000 10.001 30Mb
mode in symmkey opreps cxreps context op time(sec) thrgput
aes_gcm_e 829Mb 192 14M 0 0.000 10000.000 10.001 82Mb
Notable operf results, sw:
samples % image name symbol name
226033 59.3991 libfreeblpriv3.so bmul
80606 21.1824 libfreeblpriv3.so rijndael_encryptBlock128
28851 7.5817 libfreeblpriv3.so gcm_HashMult_sftw
hw:
213899 56.2037 libfreeblpriv3.so rijndael_encryptBlock128
45233 11.8853 libfreeblpriv3.so gcm_HashMult_hw
So the ghash part is ~5.6x faster.
Signed-off-by: Lauri Kasanen <cand@gmx.com>
-rw-r--r-- | lib/freebl/Makefile | 5 | ||||
-rw-r--r-- | lib/freebl/altivec-types.h | 23 | ||||
-rw-r--r-- | lib/freebl/blapii.h | 1 | ||||
-rw-r--r-- | lib/freebl/blinit.c | 29 | ||||
-rw-r--r-- | lib/freebl/freebl.gyp | 28 | ||||
-rw-r--r-- | lib/freebl/gcm-ppc.c | 106 | ||||
-rw-r--r-- | lib/freebl/gcm.c | 6 | ||||
-rw-r--r-- | lib/freebl/gcm.h | 24 |
8 files changed, 220 insertions, 2 deletions
diff --git a/lib/freebl/Makefile b/lib/freebl/Makefile index 5943fb377..98a7c5d5c 100644 --- a/lib/freebl/Makefile +++ b/lib/freebl/Makefile @@ -263,6 +263,7 @@ ifeq ($(CPU_ARCH),arm) MPI_SRCS += mpi_arm.c endif ifeq ($(CPU_ARCH),ppc) + EXTRA_SRCS += gcm-ppc.c ifdef USE_64 DEFINES += -DNSS_NO_INIT_SUPPORT endif # USE_64 @@ -785,3 +786,7 @@ ifeq ($(CPU_ARCH),aarch64) $(OBJDIR)/$(PROG_PREFIX)aes-armv8$(OBJ_SUFFIX): CFLAGS += -march=armv8-a+crypto $(OBJDIR)/$(PROG_PREFIX)gcm-aarch64$(OBJ_SUFFIX): CFLAGS += -march=armv8-a+crypto endif + +ifeq ($(CPU_ARCH),ppc) +$(OBJDIR)/$(PROG_PREFIX)gcm-ppc$(OBJ_SUFFIX): CFLAGS += -mcrypto -maltivec +endif diff --git a/lib/freebl/altivec-types.h b/lib/freebl/altivec-types.h new file mode 100644 index 000000000..807a44db4 --- /dev/null +++ b/lib/freebl/altivec-types.h @@ -0,0 +1,23 @@ +/* + * altivec-types.h - shorter vector typedefs + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef _ALTIVEC_TYPES_H_ +#define _ALTIVEC_TYPES_H_ 1 + +#include <altivec.h> + +typedef __vector unsigned char vec_u8; +typedef __vector signed char vec_s8; +typedef __vector unsigned short vec_u16; +typedef __vector signed short vec_s16; +typedef __vector unsigned int vec_u32; +typedef __vector signed int vec_s32; +typedef __vector unsigned long long vec_u64; +typedef __vector signed long long vec_s64; +typedef __vector float vec_f; + +#endif diff --git a/lib/freebl/blapii.h b/lib/freebl/blapii.h index 743a1168b..006c26977 100644 --- a/lib/freebl/blapii.h +++ b/lib/freebl/blapii.h @@ -86,5 +86,6 @@ PRBool arm_aes_support(); PRBool arm_pmull_support(); PRBool arm_sha1_support(); PRBool arm_sha2_support(); +PRBool ppc_crypto_support(); #endif /* _BLAPII_H_ */ diff --git a/lib/freebl/blinit.c b/lib/freebl/blinit.c index 543206c12..5e1f4826c 100644 --- a/lib/freebl/blinit.c +++ b/lib/freebl/blinit.c @@ -29,6 +29,7 @@ static PRBool arm_aes_support_ = PR_FALSE; static PRBool arm_sha1_support_ = PR_FALSE; static PRBool arm_sha2_support_ = PR_FALSE; static PRBool arm_pmull_support_ = PR_FALSE; +static PRBool ppc_crypto_support_ = PR_FALSE; #ifdef NSS_X86_OR_X64 /* @@ -348,6 +349,32 @@ arm_sha2_support() { return arm_sha2_support_; } +PRBool +ppc_crypto_support() +{ + return ppc_crypto_support_; +} + +#if defined(__powerpc__) + +#include <sys/auxv.h> + +// Defines from cputable.h in Linux kernel - PPC, letting us build on older kernels +#ifndef PPC_FEATURE2_VEC_CRYPTO +#define PPC_FEATURE2_VEC_CRYPTO 0x02000000 +#endif + +static void +CheckPPCSupport() +{ + char *disable_hw_crypto = PR_GetEnvSecure("NSS_DISABLE_PPC_GHASH"); + + long hwcaps = getauxval(AT_HWCAP2); + + ppc_crypto_support_ = hwcaps & PPC_FEATURE2_VEC_CRYPTO && disable_hw_crypto == NULL; +} + +#endif /* __powerpc__ */ static PRStatus FreeblInit(void) @@ -356,6 +383,8 @@ FreeblInit(void) CheckX86CPUSupport(); #elif (defined(__aarch64__) || defined(__arm__)) CheckARMSupport(); +#elif (defined(__powerpc__)) + CheckPPCSupport(); #endif return PR_SUCCESS; } diff --git a/lib/freebl/freebl.gyp b/lib/freebl/freebl.gyp index f12508136..7ea5c3750 100644 --- a/lib/freebl/freebl.gyp +++ b/lib/freebl/freebl.gyp @@ -133,6 +133,24 @@ ] }, { + 'target_name': 'gcm-aes-ppc_c_lib', + 'type': 'static_library', + 'sources': [ + 'gcm-ppc.c' + ], + 'dependencies': [ + '<(DEPTH)/exports.gyp:nss_exports' + ], + 'cflags': [ + '-mcrypto', + '-maltivec' + ], + 'cflags_mozilla': [ + '-mcrypto', + '-maltivec' + ] + }, + { 'target_name': 'armv8_c_lib', 'type': 'static_library', 'sources': [ @@ -199,6 +217,11 @@ 'gcm-aes-aarch64_c_lib', ], }], + [ 'target_arch=="ppc64le"', { + 'dependencies': [ + 'gcm-aes-ppc_c_lib', + ], + }], [ 'OS=="linux"', { 'defines!': [ 'FREEBL_NO_DEPEND', @@ -245,6 +268,11 @@ 'gcm-aes-aarch64_c_lib', ], }], + [ 'target_arch=="ppc64" or target_arch=="ppc64le"', { + 'dependencies': [ + 'gcm-aes-ppc_c_lib', + ], + }], [ 'OS!="linux"', { 'conditions': [ [ 'moz_fold_libs==0', { diff --git a/lib/freebl/gcm-ppc.c b/lib/freebl/gcm-ppc.c new file mode 100644 index 000000000..3eedcbc57 --- /dev/null +++ b/lib/freebl/gcm-ppc.c @@ -0,0 +1,106 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifdef FREEBL_NO_DEPEND +#include "stubs.h" +#endif +#include "gcm.h" +#include "secerr.h" + +#if defined(USE_PPC_CRYPTO) + +SECStatus +gcm_HashWrite_hw(gcmHashContext *ghash, unsigned char *outbuf) +{ + vec_xst_be((vec_u8) ghash->x, 0, outbuf); + return SECSuccess; +} + +static vec_u64 vpmsumd(const vec_u64 a, const vec_u64 b) +{ +#if defined(__clang__) + /* Clang uses a different name */ + return __builtin_altivec_crypto_vpmsumd(a, b); +#elif (__GNUC__ >= 10) || (__GNUC__ == 9 && __GNUC_MINOR__ >= 3) || \ + (__GNUC__ == 8 && __GNUC_MINOR__ >= 4) || \ + (__GNUC__ == 7 && __GNUC_MINOR__ >= 5) + /* GCC versions not affected by https://gcc.gnu.org/bugzilla/show_bug.cgi?id=91275 */ + return __builtin_crypto_vpmsumd(a, b); +#else + /* GCC versions where this builtin is buggy */ + vec_u64 vr; + __asm("vpmsumd %0, %1, %2" : "=v"(vr) : "v"(a), "v"(b)); + return vr; +#endif +} + +SECStatus +gcm_HashMult_hw(gcmHashContext *ghash, const unsigned char *buf, + unsigned int count) +{ + const vec_u8 leftshift = vec_splat_u8(1); + const vec_u64 onebit = (vec_u64) {1, 0}; + const unsigned long long pd = 0xc2LLU << 56; + + vec_u64 ci, v, r0, r1; + vec_u64 hibit; + unsigned i; + + ci = ghash->x; + + for (i = 0; i < count; i++, buf += 16) { + /* clang needs the following cast away from const; maybe a bug in 7.0.0 */ + v = (vec_u64) vec_xl_be(0, (unsigned char *) buf); + ci ^= v; + + /* Do binary mult ghash->X = C * ghash->H (Karatsuba). */ + r0 = vpmsumd((vec_u64) {ci[0], 0}, (vec_u64) {ghash->h[0], 0}); + r1 = vpmsumd((vec_u64) {ci[1], 0}, (vec_u64) {ghash->h[1], 0}); + v = (vec_u64) {ci[0] ^ ci[1], ghash->h[0] ^ ghash->h[1]}; + v = vpmsumd((vec_u64) {v[0], 0}, (vec_u64) {v[1], 0}); + v ^= r0; + v ^= r1; + r0 ^= (vec_u64) {0, v[0]}; + r1 ^= (vec_u64) {v[1], 0}; + + /* Shift one (multiply by x) as gcm spec is stupid. */ + hibit = (vec_u64) vec_splat((vec_u8) r0, 15); + hibit = (vec_u64) vec_rl((vec_u8) hibit, leftshift); + hibit &= onebit; + r0 = vec_sll(r0, leftshift); + r1 = vec_sll(r1, leftshift); + r1 |= hibit; + + /* Reduce */ + v = vpmsumd((vec_u64) {r0[0], 0}, (vec_u64) {pd, 0}); + r0 ^= (vec_u64) {0, v[0]}; + r1 ^= (vec_u64) {v[1], 0}; + v = vpmsumd((vec_u64) {r0[1], 0}, (vec_u64) {pd, 0}); + r1 ^= v; + ci = r0 ^ r1; + } + + ghash->x = ci; + + return SECSuccess; +} + +SECStatus +gcm_HashInit_hw(gcmHashContext *ghash) +{ + ghash->x = (vec_u64) vec_splat_u32(0); + ghash->h = (vec_u64) {ghash->h_low, ghash->h_high}; + ghash->ghash_mul = gcm_HashMult_hw; + ghash->hw = PR_TRUE; + return SECSuccess; +} + +SECStatus +gcm_HashZeroX_hw(gcmHashContext *ghash) +{ + ghash->x = (vec_u64) vec_splat_u32(0); + return SECSuccess; +} + +#endif /* defined(USE_PPC_CRYPTO) */ diff --git a/lib/freebl/gcm.c b/lib/freebl/gcm.c index 6edf0e8f3..737252eec 100644 --- a/lib/freebl/gcm.c +++ b/lib/freebl/gcm.c @@ -36,7 +36,7 @@ SECStatus gcm_HashMult_sftw32(gcmHashContext *ghash, const unsigned char *buf, /* Stub definitions for the above *_hw functions, which shouldn't be * used unless NSS_X86_OR_X64 is defined */ -#if !defined(NSS_X86_OR_X64) && !defined(USE_ARM_GCM) +#if !defined(NSS_X86_OR_X64) && !defined(USE_ARM_GCM) && !defined(USE_PPC_CRYPTO) SECStatus gcm_HashWrite_hw(gcmHashContext *ghash, unsigned char *outbuf) { @@ -65,7 +65,7 @@ gcm_HashZeroX_hw(gcmHashContext *ghash) PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); return SECFailure; } -#endif /* !NSS_X86_OR_X64 && !USE_ARM_GCM */ +#endif /* !NSS_X86_OR_X64 && !USE_ARM_GCM && !USE_PPC_CRYPTO */ uint64_t get64(const unsigned char *bytes) @@ -94,6 +94,8 @@ gcmHash_InitContext(gcmHashContext *ghash, const unsigned char *H, PRBool sw) ghash->h_high = get64(H); #ifdef USE_ARM_GCM if (arm_pmull_support() && !sw) { +#elif defined(USE_PPC_CRYPTO) + if (ppc_crypto_support() && !sw) { #else if (clmul_support() && !sw) { #endif diff --git a/lib/freebl/gcm.h b/lib/freebl/gcm.h index 49a9ec9fa..571b9ec55 100644 --- a/lib/freebl/gcm.h +++ b/lib/freebl/gcm.h @@ -30,6 +30,28 @@ #include <arm_neon.h> #endif +#ifdef __powerpc64__ +#include "altivec-types.h" + +/* The ghash freebl test tries to use this in C++, and gcc defines conflict. */ +#ifdef __cplusplus +#undef pixel +#undef vector +#undef bool +#endif + +/* + * PPC CRYPTO requires at least gcc 5 or clang. The LE check is purely + * because it's only been tested on LE. If you're interested in BE, + * please send a patch. + */ +#if (defined(__clang__) || (defined(__GNUC__) && __GNUC__ >= 5)) && \ + defined(IS_LITTLE_ENDIAN) +#define USE_PPC_CRYPTO +#endif + +#endif + SEC_BEGIN_PROTOS #ifdef HAVE_INT128_SUPPORT @@ -67,6 +89,8 @@ pre_align struct gcmHashContextStr { __m128i x, h; #elif defined(__aarch64__) uint64x2_t x, h; +#elif defined(USE_PPC_CRYPTO) + vec_u64 x, h; #endif uint64_t x_low, x_high, h_high, h_low; unsigned char buffer[MAX_BLOCK_SIZE]; |