Bug 1650702 - Use ARM's crypt extension for SHA1. r=kjacobs

ARM Crypto extension has SHA1 acceleration. Using this, SHA1 is 3 times faster on ARMv8 CPU. The following data is AWS's a1 instance (Cortex-A72). Before ====== ``` # mode in opreps cxreps context op time(sec) thrgput sha1_e 954Mb 31M 0 0.000 10000.000 10.000 95Mb ``` After ===== ``` # mode in opreps cxreps context op time(sec) thrgput sha1_e 2Gb 94M 0 0.000 10000.000 10.000 288Mb ``` Differential Revision: https://phabricator.services.mozilla.com/D84125
author: Makoto Kato <m_kato@ga2.so-net.ne.jp> 2020-07-29 21:49:09 +0000
committer: Makoto Kato <m_kato@ga2.so-net.ne.jp> 2020-07-29 21:49:09 +0000
commit: 2189ce5a648136354811a8a6dfc928fd956b2830 (patch)
tree: dd98121f8e554a87d9b5f845d61417efff3a0305
parent: 07f7eac3fc51a3872db72577d7a1efbadc677981 (diff)
download: nss-hg-2189ce5a648136354811a8a6dfc928fd956b2830.tar.gz
7 files changed, 344 insertions, 13 deletions
diff --git a/coreconf/config.gypi b/coreconf/config.gypi
index 6d8654d27..d8bf7bcd2 100644
--- a/coreconf/config.gypi
+++ b/coreconf/config.gypi
@@ -97,6 +97,7 @@
     'cc_use_gnu_ld%': '<(cc_use_gnu_ld)',
     # Some defaults
     'disable_arm_hw_aes%': 0,
+    'disable_arm_hw_sha1%': 0,
     'disable_arm_hw_sha2%': 0,
     'disable_tests%': 0,
     'disable_chachapoly%': 0,
diff --git a/lib/freebl/Makefile b/lib/freebl/Makefile
index 0a437ce33..52a30927b 100644
--- a/lib/freebl/Makefile
+++ b/lib/freebl/Makefile
@@ -121,8 +121,8 @@ endif
 endif
 ifdef NS_USE_GCC
 ifeq ($(CPU_ARCH),aarch64)
-    DEFINES += -DUSE_HW_AES -DUSE_HW_SHA2
-    EXTRA_SRCS += aes-armv8.c gcm-aarch64.c sha256-armv8.c
+    DEFINES += -DUSE_HW_AES -DUSE_HW_SHA1 -DUSE_HW_SHA2
+    EXTRA_SRCS += aes-armv8.c gcm-aarch64.c sha1-armv8.c sha256-armv8.c
 endif
 endif
 ifeq ($(CPU_ARCH),arm)
@@ -130,17 +130,17 @@ ifndef NSS_DISABLE_ARM32_NEON
     EXTRA_SRCS += gcm-arm32-neon.c
 endif
     ifdef CC_IS_CLANG
-        DEFINES += -DUSE_HW_AES -DUSE_HW_SHA2
-        EXTRA_SRCS += aes-armv8.c sha256-armv8.c
+        DEFINES += -DUSE_HW_AES -DUSE_HW_SHA1 -DUSE_HW_SHA2
+        EXTRA_SRCS += aes-armv8.c sha1-armv8.c sha256-armv8.c
     else ifeq (1,$(CC_IS_GCC))
         # Old compiler doesn't support ARM AES.
         ifneq (,$(filter 4.9,$(word 1,$(GCC_VERSION)).$(word 2,$(GCC_VERSION))))
-            DEFINES += -DUSE_HW_AES -DUSE_HW_SHA2
-            EXTRA_SRCS += aes-armv8.c sha256-armv8.c
+            DEFINES += -DUSE_HW_AES -DUSE_HW_SHA1 -DUSE_HW_SHA2
+            EXTRA_SRCS += aes-armv8.c sha1-armv8.c sha256-armv8.c
         endif
         ifeq (,$(filter 0 1 2 3 4,$(word 1,$(GCC_VERSION))))
-            DEFINES += -DUSE_HW_AES -DUSE_HW_SHA2
-            EXTRA_SRCS += aes-armv8.c sha256-armv8.c
+            DEFINES += -DUSE_HW_AES -DUSE_HW_SHA1 -DUSE_HW_SHA2
+            EXTRA_SRCS += aes-armv8.c sha1-armv8.c sha256-armv8.c
         endif
     endif
 endif
@@ -715,6 +715,7 @@ ifeq ($(CPU_ARCH),arm)
 # Confusingly, __SOFTFP__ is the name of the define for the softfloat ABI, not for the softfp ABI.
 USES_SOFTFLOAT_ABI := $(shell $(CC) -o - -E -dM - $(CFLAGS) < /dev/null | grep __SOFTFP__ > /dev/null && echo 1)
 $(OBJDIR)/$(PROG_PREFIX)aes-armv8$(OBJ_SUFFIX): CFLAGS += -march=armv8-a -mfpu=crypto-neon-fp-armv8$(if $(USES_SOFTFLOAT_ABI), -mfloat-abi=softfp)
+$(OBJDIR)/$(PROG_PREFIX)sha1-armv8$(OBJ_SUFFIX): CFLAGS += -march=armv8-a -mfpu=crypto-neon-fp-armv8$(if $(USES_SOFTFLOAT_ABI), -mfloat-abi=softfp)
 $(OBJDIR)/$(PROG_PREFIX)sha256-armv8$(OBJ_SUFFIX): CFLAGS += -march=armv8-a -mfpu=crypto-neon-fp-armv8$(if $(USES_SOFTFLOAT_ABI), -mfloat-abi=softfp)
 ifndef NSS_DISABLE_ARM32_NEON
 $(OBJDIR)/$(PROG_PREFIX)gcm-arm32-neon$(OBJ_SUFFIX): CFLAGS += -mfpu=neon$(if $(USES_SOFTFLOAT_ABI), -mfloat-abi=softfp)
@@ -725,6 +726,7 @@ ifdef NS_USE_GCC
 ifeq ($(CPU_ARCH),aarch64)
 $(OBJDIR)/$(PROG_PREFIX)aes-armv8$(OBJ_SUFFIX): CFLAGS += -march=armv8-a+crypto
 $(OBJDIR)/$(PROG_PREFIX)gcm-aarch64$(OBJ_SUFFIX): CFLAGS += -march=armv8-a+crypto
+$(OBJDIR)/$(PROG_PREFIX)sha1-armv8$(OBJ_SUFFIX): CFLAGS += -march=armv8-a+crypto
 $(OBJDIR)/$(PROG_PREFIX)sha256-armv8$(OBJ_SUFFIX): CFLAGS += -march=armv8-a+crypto
 endif
 endif
diff --git a/lib/freebl/blinit.c b/lib/freebl/blinit.c
index 4f2d2d492..3a04de887 100644
--- a/lib/freebl/blinit.c
+++ b/lib/freebl/blinit.c
@@ -232,6 +232,7 @@ CheckARMSupport()
     arm_neon_support_ = PR_GetEnvSecure("NSS_DISABLE_ARM_NEON") == NULL;
     arm_aes_support_ &= PR_GetEnvSecure("NSS_DISABLE_HW_AES") == NULL;
     arm_pmull_support_ &= PR_GetEnvSecure("NSS_DISABLE_PMULL") == NULL;
+    arm_sha1_support_ &= PR_GetEnvSecure("NSS_DISABLE_HW_SHA1") == NULL;
     arm_sha2_support_ &= PR_GetEnvSecure("NSS_DISABLE_HW_SHA2") == NULL;
 }
 #endif /* defined(__aarch64__) */
@@ -356,6 +357,7 @@ CheckARMSupport()
         arm_sha2_support_ = hwcaps & HWCAP2_SHA2;
     }
     arm_neon_support_ = GetNeonSupport();
+    arm_sha1_support_ &= PR_GetEnvSecure("NSS_DISABLE_HW_SHA1") == NULL;
     arm_sha2_support_ &= PR_GetEnvSecure("NSS_DISABLE_HW_SHA2") == NULL;
 }
 #endif /* defined(__arm__) */
diff --git a/lib/freebl/freebl.gyp b/lib/freebl/freebl.gyp
index d780b2217..2f5139809 100644
--- a/lib/freebl/freebl.gyp
+++ b/lib/freebl/freebl.gyp
@@ -329,6 +329,7 @@
       'type': 'static_library',
       'sources': [
         'aes-armv8.c',
+        'sha1-armv8.c',
         'sha256-armv8.c',
       ],
       'dependencies': [
@@ -385,7 +386,7 @@
           'dependencies': [
             'gcm-aes-x86_c_lib',
           ],
-        }, '(disable_arm_hw_aes==0 or disable_arm_hw_sha2==0) and (target_arch=="arm" or target_arch=="arm64" or target_arch=="aarch64")', {
+        }, '(disable_arm_hw_aes==0 or disable_arm_hw_sha1==0 or disable_arm_hw_sha2==0) and (target_arch=="arm" or target_arch=="arm64" or target_arch=="aarch64")', {
           'dependencies': [
             'armv8_c_lib'
           ],
@@ -643,6 +644,11 @@
           'USE_HW_AES',
         ],
       }],
+      [ 'OS=="win" and (target_arch=="arm64" or target_arch=="aarch64") and disable_arm_hw_sha1==0', {
+        'defines': [
+          'USE_HW_SHA1',
+        ],
+      }],
       [ 'OS=="win" and (target_arch=="arm64" or target_arch=="aarch64") and disable_arm_hw_sha2==0', {
         'defines': [
           'USE_HW_SHA2',
@@ -712,6 +718,11 @@
               'USE_HW_AES',
             ],
           }],
+          [ 'disable_arm_hw_sha1==0 and (target_arch=="arm" or target_arch=="arm64" or target_arch=="aarch64")', {
+            'defines': [
+              'USE_HW_SHA1',
+            ],
+          }],
           [ 'disable_arm_hw_sha2==0 and (target_arch=="arm" or target_arch=="arm64" or target_arch=="aarch64")', {
             'defines': [
               'USE_HW_SHA2',
diff --git a/lib/freebl/sha1-armv8.c b/lib/freebl/sha1-armv8.c
new file mode 100644
index 000000000..a20804e1b
--- /dev/null
+++ b/lib/freebl/sha1-armv8.c
@@ -0,0 +1,260 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef __ARM_FEATURE_CRYPTO
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include <arm_neon.h>
+#include <memory.h>
+#include "blapi.h"
+#include "sha_fast.h"
+
+#if !defined(SHA_PUT_W_IN_STACK)
+#define H2X 11
+#else
+#define H2X 0
+#endif
+
+static void shaCompress(SHA_HW_t *X, const PRUint32 *datain);
+
+void
+SHA1_Compress_Native(SHA1Context *ctx)
+{
+    shaCompress(&ctx->H[H2X], ctx->u.w);
+}
+
+/*
+ *  SHA: Add data to context.
+ */
+void
+SHA1_Update_Native(SHA1Context *ctx, const unsigned char *dataIn, unsigned int len)
+{
+    unsigned int lenB;
+    unsigned int togo;
+
+    if (!len) {
+        return;
+    }
+
+    /* accumulate the byte count. */
+    lenB = (unsigned int)(ctx->size) & 63U;
+
+    ctx->size += len;
+
+    /*
+   *  Read the data into W and process blocks as they get full
+   */
+    if (lenB > 0) {
+        togo = 64U - lenB;
+        if (len < togo) {
+            togo = len;
+        }
+        memcpy(ctx->u.b + lenB, dataIn, togo);
+        len -= togo;
+        dataIn += togo;
+        lenB = (lenB + togo) & 63U;
+        if (!lenB) {
+            shaCompress(&ctx->H[H2X], ctx->u.w);
+        }
+    }
+
+    while (len >= 64U) {
+        len -= 64U;
+        shaCompress(&ctx->H[H2X], (PRUint32 *)dataIn);
+        dataIn += 64U;
+    }
+
+    if (len) {
+        memcpy(ctx->u.b, dataIn, len);
+    }
+}
+
+/*
+ *  SHA: Compression function, unrolled.
+ */
+static void
+shaCompress(SHA_HW_t *X, const PRUint32 *inbuf)
+{
+#define XH(n) X[n - H2X]
+
+    const uint32x4_t K0 = vdupq_n_u32(0x5a827999);
+    const uint32x4_t K1 = vdupq_n_u32(0x6ed9eba1);
+    const uint32x4_t K2 = vdupq_n_u32(0x8f1bbcdc);
+    const uint32x4_t K3 = vdupq_n_u32(0xca62c1d6);
+
+    uint32x4_t abcd = vld1q_u32(&XH(0));
+    PRUint32 e = XH(4);
+
+    const uint32x4_t origABCD = abcd;
+    const PRUint32 origE = e;
+
+    uint32x4_t w0 = vld1q_u32(inbuf);
+    uint32x4_t w1 = vld1q_u32(inbuf + 4);
+    uint32x4_t w2 = vld1q_u32(inbuf + 8);
+    uint32x4_t w3 = vld1q_u32(inbuf + 12);
+
+    w0 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(w0)));
+    w1 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(w1)));
+    w2 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(w2)));
+    w3 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(w3)));
+
+    uint32x4_t t0 = vaddq_u32(w0, K0);
+    uint32x4_t t1 = vaddq_u32(w1, K0);
+
+    PRUint32 tmpE;
+
+    /*
+         * Using the following ARM instructions to accelerate SHA1
+         *
+         * sha1c for round 0 - 20
+         * sha1p for round 20 - 40
+         * sha1m for round 40 - 60
+         * sha1p for round 60 - 80
+         * sha1su0 and shasu1 for message schedule
+         * sha1h for rotate left 30
+         */
+
+    /* Round 0-3 */
+    tmpE = vsha1h_u32(vgetq_lane_u32(abcd, 0));
+    abcd = vsha1cq_u32(abcd, e, t0);
+    t0 = vaddq_u32(w2, K0);
+    w0 = vsha1su0q_u32(w0, w1, w2);
+
+    /* Round 4-7 */
+    e = vsha1h_u32(vgetq_lane_u32(abcd, 0));
+    abcd = vsha1cq_u32(abcd, tmpE, t1);
+    t1 = vaddq_u32(w3, K0);
+    w0 = vsha1su1q_u32(w0, w3);
+    w1 = vsha1su0q_u32(w1, w2, w3);
+
+    /* Round 8-11 */
+    tmpE = vsha1h_u32(vgetq_lane_u32(abcd, 0));
+    abcd = vsha1cq_u32(abcd, e, t0);
+    t0 = vaddq_u32(w0, K0);
+    w1 = vsha1su1q_u32(w1, w0);
+    w2 = vsha1su0q_u32(w2, w3, w0);
+
+    /* Round 12-15 */
+    e = vsha1h_u32(vgetq_lane_u32(abcd, 0));
+    abcd = vsha1cq_u32(abcd, tmpE, t1);
+    t1 = vaddq_u32(w1, K1);
+    w2 = vsha1su1q_u32(w2, w1);
+    w3 = vsha1su0q_u32(w3, w0, w1);
+
+    /* Round 16-19 */
+    tmpE = vsha1h_u32(vgetq_lane_u32(abcd, 0));
+    abcd = vsha1cq_u32(abcd, e, t0);
+    t0 = vaddq_u32(w2, K1);
+    w3 = vsha1su1q_u32(w3, w2);
+    w0 = vsha1su0q_u32(w0, w1, w2);
+
+    /* Round 20-23 */
+    e = vsha1h_u32(vgetq_lane_u32(abcd, 0));
+    abcd = vsha1pq_u32(abcd, tmpE, t1);
+    t1 = vaddq_u32(w3, K1);
+    w0 = vsha1su1q_u32(w0, w3);
+    w1 = vsha1su0q_u32(w1, w2, w3);
+
+    /* Round 24-27 */
+    tmpE = vsha1h_u32(vgetq_lane_u32(abcd, 0));
+    abcd = vsha1pq_u32(abcd, e, t0);
+    t0 = vaddq_u32(w0, K1);
+    w1 = vsha1su1q_u32(w1, w0);
+    w2 = vsha1su0q_u32(w2, w3, w0);
+
+    /* Round 28-31 */
+    e = vsha1h_u32(vgetq_lane_u32(abcd, 0));
+    abcd = vsha1pq_u32(abcd, tmpE, t1);
+    t1 = vaddq_u32(w1, K1);
+    w2 = vsha1su1q_u32(w2, w1);
+    w3 = vsha1su0q_u32(w3, w0, w1);
+
+    /* Round 32-35 */
+    tmpE = vsha1h_u32(vgetq_lane_u32(abcd, 0));
+    abcd = vsha1pq_u32(abcd, e, t0);
+    t0 = vaddq_u32(w2, K2);
+    w3 = vsha1su1q_u32(w3, w2);
+    w0 = vsha1su0q_u32(w0, w1, w2);
+
+    /* Round 36-39 */
+    e = vsha1h_u32(vgetq_lane_u32(abcd, 0));
+    abcd = vsha1pq_u32(abcd, tmpE, t1);
+    t1 = vaddq_u32(w3, K2);
+    w0 = vsha1su1q_u32(w0, w3);
+    w1 = vsha1su0q_u32(w1, w2, w3);
+
+    /* Round 40-43 */
+    tmpE = vsha1h_u32(vgetq_lane_u32(abcd, 0));
+    abcd = vsha1mq_u32(abcd, e, t0);
+    t0 = vaddq_u32(w0, K2);
+    w1 = vsha1su1q_u32(w1, w0);
+    w2 = vsha1su0q_u32(w2, w3, w0);
+
+    /* Round 44-47 */
+    e = vsha1h_u32(vgetq_lane_u32(abcd, 0));
+    abcd = vsha1mq_u32(abcd, tmpE, t1);
+    t1 = vaddq_u32(w1, K2);
+    w2 = vsha1su1q_u32(w2, w1);
+    w3 = vsha1su0q_u32(w3, w0, w1);
+
+    /* Round 48-51 */
+    tmpE = vsha1h_u32(vgetq_lane_u32(abcd, 0));
+    abcd = vsha1mq_u32(abcd, e, t0);
+    t0 = vaddq_u32(w2, K2);
+    w3 = vsha1su1q_u32(w3, w2);
+    w0 = vsha1su0q_u32(w0, w1, w2);
+
+    /* Round 52-55 */
+    e = vsha1h_u32(vgetq_lane_u32(abcd, 0));
+    abcd = vsha1mq_u32(abcd, tmpE, t1);
+    t1 = vaddq_u32(w3, K3);
+    w0 = vsha1su1q_u32(w0, w3);
+    w1 = vsha1su0q_u32(w1, w2, w3);
+
+    /* Round 56-59 */
+    tmpE = vsha1h_u32(vgetq_lane_u32(abcd, 0));
+    abcd = vsha1mq_u32(abcd, e, t0);
+    t0 = vaddq_u32(w0, K3);
+    w1 = vsha1su1q_u32(w1, w0);
+    w2 = vsha1su0q_u32(w2, w3, w0);
+
+    /* Round 60-63 */
+    e = vsha1h_u32(vgetq_lane_u32(abcd, 0));
+    abcd = vsha1pq_u32(abcd, tmpE, t1);
+    t1 = vaddq_u32(w1, K3);
+    w2 = vsha1su1q_u32(w2, w1);
+    w3 = vsha1su0q_u32(w3, w0, w1);
+
+    /* Round 64-67 */
+    tmpE = vsha1h_u32(vgetq_lane_u32(abcd, 0));
+    abcd = vsha1pq_u32(abcd, e, t0);
+    t0 = vaddq_u32(w2, K3);
+    w3 = vsha1su1q_u32(w3, w2);
+    w0 = vsha1su0q_u32(w0, w1, w2);
+
+    /* Round 68-71 */
+    e = vsha1h_u32(vgetq_lane_u32(abcd, 0));
+    abcd = vsha1pq_u32(abcd, tmpE, t1);
+    t1 = vaddq_u32(w3, K3);
+    w0 = vsha1su1q_u32(w0, w3);
+
+    /* Round 72-75 */
+    tmpE = vsha1h_u32(vgetq_lane_u32(abcd, 0));
+    abcd = vsha1pq_u32(abcd, e, t0);
+
+    /* Round 76-79 */
+    e = vsha1h_u32(vgetq_lane_u32(abcd, 0));
+    abcd = vsha1pq_u32(abcd, tmpE, t1);
+
+    e += origE;
+    abcd = vaddq_u32(origABCD, abcd);
+
+    vst1q_u32(&XH(0), abcd);
+    XH(4) = e;
+}
+
+#endif /* __ARM_FEATURE_CRYPTO */
diff --git a/lib/freebl/sha_fast.c b/lib/freebl/sha_fast.c
index 52071f0c9..2a8ac576c 100644
--- a/lib/freebl/sha_fast.c
+++ b/lib/freebl/sha_fast.c
@@ -10,6 +10,7 @@
 #include "blapi.h"
 #include "sha_fast.h"
 #include "prerror.h"
+#include "secerr.h"
 
 #ifdef TRACING_SSL
 #include "ssl.h"
@@ -28,6 +29,28 @@ static void shaCompress(volatile SHA_HW_t *X, const PRUint32 *datain);
 
 #define SHA_MIX(n, a, b, c) XW(n) = SHA_ROTL(XW(a) ^ XW(b) ^ XW(c) ^ XW(n), 1)
 
+void SHA1_Compress_Native(SHA1Context *ctx);
+void SHA1_Update_Native(SHA1Context *ctx, const unsigned char *dataIn, unsigned int len);
+
+static void SHA1_Compress_Generic(SHA1Context *ctx);
+static void SHA1_Update_Generic(SHA1Context *ctx, const unsigned char *dataIn, unsigned int len);
+
+#ifndef USE_HW_SHA1
+void
+SHA1_Compress_Native(SHA1Context *ctx)
+{
+    PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+    PORT_Assert(0);
+}
+
+void
+SHA1_Update_Native(SHA1Context *ctx, const unsigned char *dataIn, unsigned int len)
+{
+    PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+    PORT_Assert(0);
+}
+#endif
+
 /*
  *  SHA: initialize context
  */
@@ -43,6 +66,18 @@ SHA1_Begin(SHA1Context *ctx)
     ctx->H[2] = 0x98badcfeL;
     ctx->H[3] = 0x10325476L;
     ctx->H[4] = 0xc3d2e1f0L;
+
+#if defined(USE_HW_SHA1) && defined(IS_LITTLE_ENDIAN)
+    /* arm's implementation is tested on little endian only */
+    if (arm_sha1_support()) {
+        ctx->compress = SHA1_Compress_Native;
+        ctx->update = SHA1_Update_Native;
+    } else
+#endif
+    {
+        ctx->compress = SHA1_Compress_Generic;
+        ctx->update = SHA1_Update_Generic;
+    }
 }
 
 /* Explanation of H array and index values:
@@ -89,6 +124,12 @@ SHA1_Begin(SHA1Context *ctx)
 void
 SHA1_Update(SHA1Context *ctx, const unsigned char *dataIn, unsigned int len)
 {
+    ctx->update(ctx, dataIn, len);
+}
+
+static void
+SHA1_Update_Generic(SHA1Context *ctx, const unsigned char *dataIn, unsigned int len)
+{
     register unsigned int lenB;
     register unsigned int togo;
 
@@ -166,7 +207,7 @@ SHA1_End(SHA1Context *ctx, unsigned char *hashout,
     size <<= 3;
     ctx->W[14] = SHA_HTONL((PRUint32)(size >> 32));
     ctx->W[15] = SHA_HTONL((PRUint32)size);
-    shaCompress(&ctx->H[H2X], ctx->W);
+    ctx->compress(ctx);
 
     /*
      *  Output hash
@@ -460,6 +501,12 @@ shaCompress(volatile SHA_HW_t *X, const PRUint32 *inbuf)
     XH(4) += E;
 }
 
+static void
+SHA1_Compress_Generic(SHA1Context *ctx)
+{
+    shaCompress(&ctx->H[H2X], ctx->u.w);
+}
+
 /*************************************************************************
 ** Code below this line added to make SHA code support BLAPI interface
 */
@@ -491,7 +538,7 @@ SHA1_HashBuf(unsigned char *dest, const unsigned char *src, PRUint32 src_length)
     unsigned int outLen;
 
     SHA1_Begin(&ctx);
-    SHA1_Update(&ctx, src, src_length);
+    ctx.update(&ctx, src, src_length);
     SHA1_End(&ctx, dest, &outLen, SHA1_LENGTH);
     memset(&ctx, 0, sizeof ctx);
     return SECSuccess;
diff --git a/lib/freebl/sha_fast.h b/lib/freebl/sha_fast.h
index 1780ca8f4..c03c0637a 100644
--- a/lib/freebl/sha_fast.h
+++ b/lib/freebl/sha_fast.h
@@ -10,13 +10,19 @@
 
 #define SHA1_INPUT_LEN 64
 
-#if defined(IS_64) && !defined(__sparc)
+#if defined(IS_64) && !defined(__sparc) && !defined(__aarch64__)
 typedef PRUint64 SHA_HW_t;
 #define SHA1_USING_64_BIT 1
 #else
 typedef PRUint32 SHA_HW_t;
 #endif
 
+struct SHA1ContextStr;
+
+typedef void (*sha1_compress_t)(struct SHA1ContextStr *);
+typedef void (*sha1_update_t)(struct SHA1ContextStr *, const unsigned char *,
+                              unsigned int);
+
 struct SHA1ContextStr {
     union {
         PRUint32 w[16]; /* input buffer */
@@ -24,6 +30,8 @@ struct SHA1ContextStr {
     } u;
     PRUint64 size;  /* count of hashed bytes. */
     SHA_HW_t H[22]; /* 5 state variables, 16 tmp values, 1 extra */
+    sha1_compress_t compress;
+    sha1_update_t update;
 };
 
 #if defined(_MSC_VER)
@@ -135,7 +143,7 @@ swap4b(PRUint32 value)
 
 #define SHA_BYTESWAP(x) x = SHA_HTONL(x)
 
-#define SHA_STORE(n) ((PRUint32*)hashout)[n] = SHA_HTONL(ctx->H[n])
+#define SHA_STORE(n) ((PRUint32 *)hashout)[n] = SHA_HTONL(ctx->H[n])
 #if defined(HAVE_UNALIGNED_ACCESS)
 #define SHA_STORE_RESULT \
     SHA_STORE(0);        \
author	Makoto Kato <m_kato@ga2.so-net.ne.jp>	2020-07-29 21:49:09 +0000
committer	Makoto Kato <m_kato@ga2.so-net.ne.jp>	2020-07-29 21:49:09 +0000
commit	2189ce5a648136354811a8a6dfc928fd956b2830 (patch)
tree	dd98121f8e554a87d9b5f845d61417efff3a0305
parent	07f7eac3fc51a3872db72577d7a1efbadc677981 (diff)
download	nss-hg-2189ce5a648136354811a8a6dfc928fd956b2830.tar.gz