summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMakoto Kato <m_kato@ga2.so-net.ne.jp>2020-07-29 21:49:09 +0000
committerMakoto Kato <m_kato@ga2.so-net.ne.jp>2020-07-29 21:49:09 +0000
commit2189ce5a648136354811a8a6dfc928fd956b2830 (patch)
treedd98121f8e554a87d9b5f845d61417efff3a0305
parent07f7eac3fc51a3872db72577d7a1efbadc677981 (diff)
downloadnss-hg-2189ce5a648136354811a8a6dfc928fd956b2830.tar.gz
Bug 1650702 - Use ARM's crypt extension for SHA1. r=kjacobs
ARM Crypto extension has SHA1 acceleration. Using this, SHA1 is 3 times faster on ARMv8 CPU. The following data is AWS's a1 instance (Cortex-A72). Before ====== ``` # mode in opreps cxreps context op time(sec) thrgput sha1_e 954Mb 31M 0 0.000 10000.000 10.000 95Mb ``` After ===== ``` # mode in opreps cxreps context op time(sec) thrgput sha1_e 2Gb 94M 0 0.000 10000.000 10.000 288Mb ``` Differential Revision: https://phabricator.services.mozilla.com/D84125
-rw-r--r--coreconf/config.gypi1
-rw-r--r--lib/freebl/Makefile18
-rw-r--r--lib/freebl/blinit.c2
-rw-r--r--lib/freebl/freebl.gyp13
-rw-r--r--lib/freebl/sha1-armv8.c260
-rw-r--r--lib/freebl/sha_fast.c51
-rw-r--r--lib/freebl/sha_fast.h12
7 files changed, 344 insertions, 13 deletions
diff --git a/coreconf/config.gypi b/coreconf/config.gypi
index 6d8654d27..d8bf7bcd2 100644
--- a/coreconf/config.gypi
+++ b/coreconf/config.gypi
@@ -97,6 +97,7 @@
'cc_use_gnu_ld%': '<(cc_use_gnu_ld)',
# Some defaults
'disable_arm_hw_aes%': 0,
+ 'disable_arm_hw_sha1%': 0,
'disable_arm_hw_sha2%': 0,
'disable_tests%': 0,
'disable_chachapoly%': 0,
diff --git a/lib/freebl/Makefile b/lib/freebl/Makefile
index 0a437ce33..52a30927b 100644
--- a/lib/freebl/Makefile
+++ b/lib/freebl/Makefile
@@ -121,8 +121,8 @@ endif
endif
ifdef NS_USE_GCC
ifeq ($(CPU_ARCH),aarch64)
- DEFINES += -DUSE_HW_AES -DUSE_HW_SHA2
- EXTRA_SRCS += aes-armv8.c gcm-aarch64.c sha256-armv8.c
+ DEFINES += -DUSE_HW_AES -DUSE_HW_SHA1 -DUSE_HW_SHA2
+ EXTRA_SRCS += aes-armv8.c gcm-aarch64.c sha1-armv8.c sha256-armv8.c
endif
endif
ifeq ($(CPU_ARCH),arm)
@@ -130,17 +130,17 @@ ifndef NSS_DISABLE_ARM32_NEON
EXTRA_SRCS += gcm-arm32-neon.c
endif
ifdef CC_IS_CLANG
- DEFINES += -DUSE_HW_AES -DUSE_HW_SHA2
- EXTRA_SRCS += aes-armv8.c sha256-armv8.c
+ DEFINES += -DUSE_HW_AES -DUSE_HW_SHA1 -DUSE_HW_SHA2
+ EXTRA_SRCS += aes-armv8.c sha1-armv8.c sha256-armv8.c
else ifeq (1,$(CC_IS_GCC))
# Old compiler doesn't support ARM AES.
ifneq (,$(filter 4.9,$(word 1,$(GCC_VERSION)).$(word 2,$(GCC_VERSION))))
- DEFINES += -DUSE_HW_AES -DUSE_HW_SHA2
- EXTRA_SRCS += aes-armv8.c sha256-armv8.c
+ DEFINES += -DUSE_HW_AES -DUSE_HW_SHA1 -DUSE_HW_SHA2
+ EXTRA_SRCS += aes-armv8.c sha1-armv8.c sha256-armv8.c
endif
ifeq (,$(filter 0 1 2 3 4,$(word 1,$(GCC_VERSION))))
- DEFINES += -DUSE_HW_AES -DUSE_HW_SHA2
- EXTRA_SRCS += aes-armv8.c sha256-armv8.c
+ DEFINES += -DUSE_HW_AES -DUSE_HW_SHA1 -DUSE_HW_SHA2
+ EXTRA_SRCS += aes-armv8.c sha1-armv8.c sha256-armv8.c
endif
endif
endif
@@ -715,6 +715,7 @@ ifeq ($(CPU_ARCH),arm)
# Confusingly, __SOFTFP__ is the name of the define for the softfloat ABI, not for the softfp ABI.
USES_SOFTFLOAT_ABI := $(shell $(CC) -o - -E -dM - $(CFLAGS) < /dev/null | grep __SOFTFP__ > /dev/null && echo 1)
$(OBJDIR)/$(PROG_PREFIX)aes-armv8$(OBJ_SUFFIX): CFLAGS += -march=armv8-a -mfpu=crypto-neon-fp-armv8$(if $(USES_SOFTFLOAT_ABI), -mfloat-abi=softfp)
+$(OBJDIR)/$(PROG_PREFIX)sha1-armv8$(OBJ_SUFFIX): CFLAGS += -march=armv8-a -mfpu=crypto-neon-fp-armv8$(if $(USES_SOFTFLOAT_ABI), -mfloat-abi=softfp)
$(OBJDIR)/$(PROG_PREFIX)sha256-armv8$(OBJ_SUFFIX): CFLAGS += -march=armv8-a -mfpu=crypto-neon-fp-armv8$(if $(USES_SOFTFLOAT_ABI), -mfloat-abi=softfp)
ifndef NSS_DISABLE_ARM32_NEON
$(OBJDIR)/$(PROG_PREFIX)gcm-arm32-neon$(OBJ_SUFFIX): CFLAGS += -mfpu=neon$(if $(USES_SOFTFLOAT_ABI), -mfloat-abi=softfp)
@@ -725,6 +726,7 @@ ifdef NS_USE_GCC
ifeq ($(CPU_ARCH),aarch64)
$(OBJDIR)/$(PROG_PREFIX)aes-armv8$(OBJ_SUFFIX): CFLAGS += -march=armv8-a+crypto
$(OBJDIR)/$(PROG_PREFIX)gcm-aarch64$(OBJ_SUFFIX): CFLAGS += -march=armv8-a+crypto
+$(OBJDIR)/$(PROG_PREFIX)sha1-armv8$(OBJ_SUFFIX): CFLAGS += -march=armv8-a+crypto
$(OBJDIR)/$(PROG_PREFIX)sha256-armv8$(OBJ_SUFFIX): CFLAGS += -march=armv8-a+crypto
endif
endif
diff --git a/lib/freebl/blinit.c b/lib/freebl/blinit.c
index 4f2d2d492..3a04de887 100644
--- a/lib/freebl/blinit.c
+++ b/lib/freebl/blinit.c
@@ -232,6 +232,7 @@ CheckARMSupport()
arm_neon_support_ = PR_GetEnvSecure("NSS_DISABLE_ARM_NEON") == NULL;
arm_aes_support_ &= PR_GetEnvSecure("NSS_DISABLE_HW_AES") == NULL;
arm_pmull_support_ &= PR_GetEnvSecure("NSS_DISABLE_PMULL") == NULL;
+ arm_sha1_support_ &= PR_GetEnvSecure("NSS_DISABLE_HW_SHA1") == NULL;
arm_sha2_support_ &= PR_GetEnvSecure("NSS_DISABLE_HW_SHA2") == NULL;
}
#endif /* defined(__aarch64__) */
@@ -356,6 +357,7 @@ CheckARMSupport()
arm_sha2_support_ = hwcaps & HWCAP2_SHA2;
}
arm_neon_support_ = GetNeonSupport();
+ arm_sha1_support_ &= PR_GetEnvSecure("NSS_DISABLE_HW_SHA1") == NULL;
arm_sha2_support_ &= PR_GetEnvSecure("NSS_DISABLE_HW_SHA2") == NULL;
}
#endif /* defined(__arm__) */
diff --git a/lib/freebl/freebl.gyp b/lib/freebl/freebl.gyp
index d780b2217..2f5139809 100644
--- a/lib/freebl/freebl.gyp
+++ b/lib/freebl/freebl.gyp
@@ -329,6 +329,7 @@
'type': 'static_library',
'sources': [
'aes-armv8.c',
+ 'sha1-armv8.c',
'sha256-armv8.c',
],
'dependencies': [
@@ -385,7 +386,7 @@
'dependencies': [
'gcm-aes-x86_c_lib',
],
- }, '(disable_arm_hw_aes==0 or disable_arm_hw_sha2==0) and (target_arch=="arm" or target_arch=="arm64" or target_arch=="aarch64")', {
+ }, '(disable_arm_hw_aes==0 or disable_arm_hw_sha1==0 or disable_arm_hw_sha2==0) and (target_arch=="arm" or target_arch=="arm64" or target_arch=="aarch64")', {
'dependencies': [
'armv8_c_lib'
],
@@ -643,6 +644,11 @@
'USE_HW_AES',
],
}],
+ [ 'OS=="win" and (target_arch=="arm64" or target_arch=="aarch64") and disable_arm_hw_sha1==0', {
+ 'defines': [
+ 'USE_HW_SHA1',
+ ],
+ }],
[ 'OS=="win" and (target_arch=="arm64" or target_arch=="aarch64") and disable_arm_hw_sha2==0', {
'defines': [
'USE_HW_SHA2',
@@ -712,6 +718,11 @@
'USE_HW_AES',
],
}],
+ [ 'disable_arm_hw_sha1==0 and (target_arch=="arm" or target_arch=="arm64" or target_arch=="aarch64")', {
+ 'defines': [
+ 'USE_HW_SHA1',
+ ],
+ }],
[ 'disable_arm_hw_sha2==0 and (target_arch=="arm" or target_arch=="arm64" or target_arch=="aarch64")', {
'defines': [
'USE_HW_SHA2',
diff --git a/lib/freebl/sha1-armv8.c b/lib/freebl/sha1-armv8.c
new file mode 100644
index 000000000..a20804e1b
--- /dev/null
+++ b/lib/freebl/sha1-armv8.c
@@ -0,0 +1,260 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef __ARM_FEATURE_CRYPTO
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include <arm_neon.h>
+#include <memory.h>
+#include "blapi.h"
+#include "sha_fast.h"
+
+#if !defined(SHA_PUT_W_IN_STACK)
+#define H2X 11
+#else
+#define H2X 0
+#endif
+
+static void shaCompress(SHA_HW_t *X, const PRUint32 *datain);
+
+void
+SHA1_Compress_Native(SHA1Context *ctx)
+{
+ shaCompress(&ctx->H[H2X], ctx->u.w);
+}
+
+/*
+ * SHA: Add data to context.
+ */
+void
+SHA1_Update_Native(SHA1Context *ctx, const unsigned char *dataIn, unsigned int len)
+{
+ unsigned int lenB;
+ unsigned int togo;
+
+ if (!len) {
+ return;
+ }
+
+ /* accumulate the byte count. */
+ lenB = (unsigned int)(ctx->size) & 63U;
+
+ ctx->size += len;
+
+ /*
+ * Read the data into W and process blocks as they get full
+ */
+ if (lenB > 0) {
+ togo = 64U - lenB;
+ if (len < togo) {
+ togo = len;
+ }
+ memcpy(ctx->u.b + lenB, dataIn, togo);
+ len -= togo;
+ dataIn += togo;
+ lenB = (lenB + togo) & 63U;
+ if (!lenB) {
+ shaCompress(&ctx->H[H2X], ctx->u.w);
+ }
+ }
+
+ while (len >= 64U) {
+ len -= 64U;
+ shaCompress(&ctx->H[H2X], (PRUint32 *)dataIn);
+ dataIn += 64U;
+ }
+
+ if (len) {
+ memcpy(ctx->u.b, dataIn, len);
+ }
+}
+
+/*
+ * SHA: Compression function, unrolled.
+ */
+static void
+shaCompress(SHA_HW_t *X, const PRUint32 *inbuf)
+{
+#define XH(n) X[n - H2X]
+
+ const uint32x4_t K0 = vdupq_n_u32(0x5a827999);
+ const uint32x4_t K1 = vdupq_n_u32(0x6ed9eba1);
+ const uint32x4_t K2 = vdupq_n_u32(0x8f1bbcdc);
+ const uint32x4_t K3 = vdupq_n_u32(0xca62c1d6);
+
+ uint32x4_t abcd = vld1q_u32(&XH(0));
+ PRUint32 e = XH(4);
+
+ const uint32x4_t origABCD = abcd;
+ const PRUint32 origE = e;
+
+ uint32x4_t w0 = vld1q_u32(inbuf);
+ uint32x4_t w1 = vld1q_u32(inbuf + 4);
+ uint32x4_t w2 = vld1q_u32(inbuf + 8);
+ uint32x4_t w3 = vld1q_u32(inbuf + 12);
+
+ w0 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(w0)));
+ w1 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(w1)));
+ w2 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(w2)));
+ w3 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(w3)));
+
+ uint32x4_t t0 = vaddq_u32(w0, K0);
+ uint32x4_t t1 = vaddq_u32(w1, K0);
+
+ PRUint32 tmpE;
+
+ /*
+ * Using the following ARM instructions to accelerate SHA1
+ *
+ * sha1c for round 0 - 20
+ * sha1p for round 20 - 40
+ * sha1m for round 40 - 60
+ * sha1p for round 60 - 80
+ * sha1su0 and shasu1 for message schedule
+ * sha1h for rotate left 30
+ */
+
+ /* Round 0-3 */
+ tmpE = vsha1h_u32(vgetq_lane_u32(abcd, 0));
+ abcd = vsha1cq_u32(abcd, e, t0);
+ t0 = vaddq_u32(w2, K0);
+ w0 = vsha1su0q_u32(w0, w1, w2);
+
+ /* Round 4-7 */
+ e = vsha1h_u32(vgetq_lane_u32(abcd, 0));
+ abcd = vsha1cq_u32(abcd, tmpE, t1);
+ t1 = vaddq_u32(w3, K0);
+ w0 = vsha1su1q_u32(w0, w3);
+ w1 = vsha1su0q_u32(w1, w2, w3);
+
+ /* Round 8-11 */
+ tmpE = vsha1h_u32(vgetq_lane_u32(abcd, 0));
+ abcd = vsha1cq_u32(abcd, e, t0);
+ t0 = vaddq_u32(w0, K0);
+ w1 = vsha1su1q_u32(w1, w0);
+ w2 = vsha1su0q_u32(w2, w3, w0);
+
+ /* Round 12-15 */
+ e = vsha1h_u32(vgetq_lane_u32(abcd, 0));
+ abcd = vsha1cq_u32(abcd, tmpE, t1);
+ t1 = vaddq_u32(w1, K1);
+ w2 = vsha1su1q_u32(w2, w1);
+ w3 = vsha1su0q_u32(w3, w0, w1);
+
+ /* Round 16-19 */
+ tmpE = vsha1h_u32(vgetq_lane_u32(abcd, 0));
+ abcd = vsha1cq_u32(abcd, e, t0);
+ t0 = vaddq_u32(w2, K1);
+ w3 = vsha1su1q_u32(w3, w2);
+ w0 = vsha1su0q_u32(w0, w1, w2);
+
+ /* Round 20-23 */
+ e = vsha1h_u32(vgetq_lane_u32(abcd, 0));
+ abcd = vsha1pq_u32(abcd, tmpE, t1);
+ t1 = vaddq_u32(w3, K1);
+ w0 = vsha1su1q_u32(w0, w3);
+ w1 = vsha1su0q_u32(w1, w2, w3);
+
+ /* Round 24-27 */
+ tmpE = vsha1h_u32(vgetq_lane_u32(abcd, 0));
+ abcd = vsha1pq_u32(abcd, e, t0);
+ t0 = vaddq_u32(w0, K1);
+ w1 = vsha1su1q_u32(w1, w0);
+ w2 = vsha1su0q_u32(w2, w3, w0);
+
+ /* Round 28-31 */
+ e = vsha1h_u32(vgetq_lane_u32(abcd, 0));
+ abcd = vsha1pq_u32(abcd, tmpE, t1);
+ t1 = vaddq_u32(w1, K1);
+ w2 = vsha1su1q_u32(w2, w1);
+ w3 = vsha1su0q_u32(w3, w0, w1);
+
+ /* Round 32-35 */
+ tmpE = vsha1h_u32(vgetq_lane_u32(abcd, 0));
+ abcd = vsha1pq_u32(abcd, e, t0);
+ t0 = vaddq_u32(w2, K2);
+ w3 = vsha1su1q_u32(w3, w2);
+ w0 = vsha1su0q_u32(w0, w1, w2);
+
+ /* Round 36-39 */
+ e = vsha1h_u32(vgetq_lane_u32(abcd, 0));
+ abcd = vsha1pq_u32(abcd, tmpE, t1);
+ t1 = vaddq_u32(w3, K2);
+ w0 = vsha1su1q_u32(w0, w3);
+ w1 = vsha1su0q_u32(w1, w2, w3);
+
+ /* Round 40-43 */
+ tmpE = vsha1h_u32(vgetq_lane_u32(abcd, 0));
+ abcd = vsha1mq_u32(abcd, e, t0);
+ t0 = vaddq_u32(w0, K2);
+ w1 = vsha1su1q_u32(w1, w0);
+ w2 = vsha1su0q_u32(w2, w3, w0);
+
+ /* Round 44-47 */
+ e = vsha1h_u32(vgetq_lane_u32(abcd, 0));
+ abcd = vsha1mq_u32(abcd, tmpE, t1);
+ t1 = vaddq_u32(w1, K2);
+ w2 = vsha1su1q_u32(w2, w1);
+ w3 = vsha1su0q_u32(w3, w0, w1);
+
+ /* Round 48-51 */
+ tmpE = vsha1h_u32(vgetq_lane_u32(abcd, 0));
+ abcd = vsha1mq_u32(abcd, e, t0);
+ t0 = vaddq_u32(w2, K2);
+ w3 = vsha1su1q_u32(w3, w2);
+ w0 = vsha1su0q_u32(w0, w1, w2);
+
+ /* Round 52-55 */
+ e = vsha1h_u32(vgetq_lane_u32(abcd, 0));
+ abcd = vsha1mq_u32(abcd, tmpE, t1);
+ t1 = vaddq_u32(w3, K3);
+ w0 = vsha1su1q_u32(w0, w3);
+ w1 = vsha1su0q_u32(w1, w2, w3);
+
+ /* Round 56-59 */
+ tmpE = vsha1h_u32(vgetq_lane_u32(abcd, 0));
+ abcd = vsha1mq_u32(abcd, e, t0);
+ t0 = vaddq_u32(w0, K3);
+ w1 = vsha1su1q_u32(w1, w0);
+ w2 = vsha1su0q_u32(w2, w3, w0);
+
+ /* Round 60-63 */
+ e = vsha1h_u32(vgetq_lane_u32(abcd, 0));
+ abcd = vsha1pq_u32(abcd, tmpE, t1);
+ t1 = vaddq_u32(w1, K3);
+ w2 = vsha1su1q_u32(w2, w1);
+ w3 = vsha1su0q_u32(w3, w0, w1);
+
+ /* Round 64-67 */
+ tmpE = vsha1h_u32(vgetq_lane_u32(abcd, 0));
+ abcd = vsha1pq_u32(abcd, e, t0);
+ t0 = vaddq_u32(w2, K3);
+ w3 = vsha1su1q_u32(w3, w2);
+ w0 = vsha1su0q_u32(w0, w1, w2);
+
+ /* Round 68-71 */
+ e = vsha1h_u32(vgetq_lane_u32(abcd, 0));
+ abcd = vsha1pq_u32(abcd, tmpE, t1);
+ t1 = vaddq_u32(w3, K3);
+ w0 = vsha1su1q_u32(w0, w3);
+
+ /* Round 72-75 */
+ tmpE = vsha1h_u32(vgetq_lane_u32(abcd, 0));
+ abcd = vsha1pq_u32(abcd, e, t0);
+
+ /* Round 76-79 */
+ e = vsha1h_u32(vgetq_lane_u32(abcd, 0));
+ abcd = vsha1pq_u32(abcd, tmpE, t1);
+
+ e += origE;
+ abcd = vaddq_u32(origABCD, abcd);
+
+ vst1q_u32(&XH(0), abcd);
+ XH(4) = e;
+}
+
+#endif /* __ARM_FEATURE_CRYPTO */
diff --git a/lib/freebl/sha_fast.c b/lib/freebl/sha_fast.c
index 52071f0c9..2a8ac576c 100644
--- a/lib/freebl/sha_fast.c
+++ b/lib/freebl/sha_fast.c
@@ -10,6 +10,7 @@
#include "blapi.h"
#include "sha_fast.h"
#include "prerror.h"
+#include "secerr.h"
#ifdef TRACING_SSL
#include "ssl.h"
@@ -28,6 +29,28 @@ static void shaCompress(volatile SHA_HW_t *X, const PRUint32 *datain);
#define SHA_MIX(n, a, b, c) XW(n) = SHA_ROTL(XW(a) ^ XW(b) ^ XW(c) ^ XW(n), 1)
+void SHA1_Compress_Native(SHA1Context *ctx);
+void SHA1_Update_Native(SHA1Context *ctx, const unsigned char *dataIn, unsigned int len);
+
+static void SHA1_Compress_Generic(SHA1Context *ctx);
+static void SHA1_Update_Generic(SHA1Context *ctx, const unsigned char *dataIn, unsigned int len);
+
+#ifndef USE_HW_SHA1
+void
+SHA1_Compress_Native(SHA1Context *ctx)
+{
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ PORT_Assert(0);
+}
+
+void
+SHA1_Update_Native(SHA1Context *ctx, const unsigned char *dataIn, unsigned int len)
+{
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ PORT_Assert(0);
+}
+#endif
+
/*
* SHA: initialize context
*/
@@ -43,6 +66,18 @@ SHA1_Begin(SHA1Context *ctx)
ctx->H[2] = 0x98badcfeL;
ctx->H[3] = 0x10325476L;
ctx->H[4] = 0xc3d2e1f0L;
+
+#if defined(USE_HW_SHA1) && defined(IS_LITTLE_ENDIAN)
+ /* arm's implementation is tested on little endian only */
+ if (arm_sha1_support()) {
+ ctx->compress = SHA1_Compress_Native;
+ ctx->update = SHA1_Update_Native;
+ } else
+#endif
+ {
+ ctx->compress = SHA1_Compress_Generic;
+ ctx->update = SHA1_Update_Generic;
+ }
}
/* Explanation of H array and index values:
@@ -89,6 +124,12 @@ SHA1_Begin(SHA1Context *ctx)
void
SHA1_Update(SHA1Context *ctx, const unsigned char *dataIn, unsigned int len)
{
+ ctx->update(ctx, dataIn, len);
+}
+
+static void
+SHA1_Update_Generic(SHA1Context *ctx, const unsigned char *dataIn, unsigned int len)
+{
register unsigned int lenB;
register unsigned int togo;
@@ -166,7 +207,7 @@ SHA1_End(SHA1Context *ctx, unsigned char *hashout,
size <<= 3;
ctx->W[14] = SHA_HTONL((PRUint32)(size >> 32));
ctx->W[15] = SHA_HTONL((PRUint32)size);
- shaCompress(&ctx->H[H2X], ctx->W);
+ ctx->compress(ctx);
/*
* Output hash
@@ -460,6 +501,12 @@ shaCompress(volatile SHA_HW_t *X, const PRUint32 *inbuf)
XH(4) += E;
}
+static void
+SHA1_Compress_Generic(SHA1Context *ctx)
+{
+ shaCompress(&ctx->H[H2X], ctx->u.w);
+}
+
/*************************************************************************
** Code below this line added to make SHA code support BLAPI interface
*/
@@ -491,7 +538,7 @@ SHA1_HashBuf(unsigned char *dest, const unsigned char *src, PRUint32 src_length)
unsigned int outLen;
SHA1_Begin(&ctx);
- SHA1_Update(&ctx, src, src_length);
+ ctx.update(&ctx, src, src_length);
SHA1_End(&ctx, dest, &outLen, SHA1_LENGTH);
memset(&ctx, 0, sizeof ctx);
return SECSuccess;
diff --git a/lib/freebl/sha_fast.h b/lib/freebl/sha_fast.h
index 1780ca8f4..c03c0637a 100644
--- a/lib/freebl/sha_fast.h
+++ b/lib/freebl/sha_fast.h
@@ -10,13 +10,19 @@
#define SHA1_INPUT_LEN 64
-#if defined(IS_64) && !defined(__sparc)
+#if defined(IS_64) && !defined(__sparc) && !defined(__aarch64__)
typedef PRUint64 SHA_HW_t;
#define SHA1_USING_64_BIT 1
#else
typedef PRUint32 SHA_HW_t;
#endif
+struct SHA1ContextStr;
+
+typedef void (*sha1_compress_t)(struct SHA1ContextStr *);
+typedef void (*sha1_update_t)(struct SHA1ContextStr *, const unsigned char *,
+ unsigned int);
+
struct SHA1ContextStr {
union {
PRUint32 w[16]; /* input buffer */
@@ -24,6 +30,8 @@ struct SHA1ContextStr {
} u;
PRUint64 size; /* count of hashed bytes. */
SHA_HW_t H[22]; /* 5 state variables, 16 tmp values, 1 extra */
+ sha1_compress_t compress;
+ sha1_update_t update;
};
#if defined(_MSC_VER)
@@ -135,7 +143,7 @@ swap4b(PRUint32 value)
#define SHA_BYTESWAP(x) x = SHA_HTONL(x)
-#define SHA_STORE(n) ((PRUint32*)hashout)[n] = SHA_HTONL(ctx->H[n])
+#define SHA_STORE(n) ((PRUint32 *)hashout)[n] = SHA_HTONL(ctx->H[n])
#if defined(HAVE_UNALIGNED_ACCESS)
#define SHA_STORE_RESULT \
SHA_STORE(0); \