Bug 1656981 - Use 64x64->128 multiply and MP_COMBA on x86_64 Mac. r=mt

This patch makes two MPI changes for MacOS: 1. Rename `mpi_amd64_gas.s` to `mpi_amd64_common.S` and add defines for macho64, allowing Intel Macs to take advantage of the 64x64->128 multiply code. 2. Define and use `NSS_USE_COMBA` on Intel Macs. Performance results with `rsaperf -n none -p 10 -e -x 65537` (default 2048-bit key): Before: `12629.12 operations/s. one operation every 79 microseconds` With 64x64->128 assembly: `29431.65 operations/s. one operation every 33 microseconds` With MP_COMBA and 64x64->128 assembly: `30332.99 operations/s. one operation every 32 microseconds` Differential Revision: https://phabricator.services.mozilla.com/D85783
author: Kevin Jacobs <kjacobs@mozilla.com> 2020-08-07 15:31:25 +0000
committer: Kevin Jacobs <kjacobs@mozilla.com> 2020-08-07 15:31:25 +0000
commit: a60284539c9b12b28d0b407c4637d14b2a5447d3 (patch)
tree: 5cf06ceec90053ebfa91690f0695f4980269119b
parent: 84c6aa74c7884af09a3f1abdcddb631ad5e929d9 (diff)
download: nss-hg-a60284539c9b12b28d0b407c4637d14b2a5447d3.tar.gz
3 files changed, 45 insertions, 7 deletions
diff --git a/lib/freebl/Makefile b/lib/freebl/Makefile
index 52a30927b..4a600417c 100644
--- a/lib/freebl/Makefile
+++ b/lib/freebl/Makefile
@@ -234,7 +234,12 @@ endif
 endif
 
 ifeq ($(OS_TARGET),Darwin)
-ifeq ($(CPU_ARCH),x86)
+ifeq ($(CPU_ARCH),x86_64)
+    ASFILES  = mpi_amd64_common.s
+    DEFINES += -DMPI_AMD64 -DMP_IS_LITTLE_ENDIAN
+    DEFINES += -DMP_ASSEMBLY_MULTIPLY -DNSS_USE_COMBA
+    MPI_SRCS += mpi_amd64.c mp_comba.c
+else ifeq ($(CPU_ARCH),x86)
     ASFILES  = mpi_sse2.s
     DEFINES += -DMP_USE_UINT_DIGIT
     DEFINES += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE
@@ -244,7 +249,8 @@ endif # Darwin
 
 ifeq ($(OS_TARGET),Linux)
 ifeq ($(CPU_ARCH),x86_64)
-    ASFILES  = arcfour-amd64-gas.s mpi_amd64_gas.s
+    # Lower case s on mpi_amd64_common due to make implicit rules.
+    ASFILES  = arcfour-amd64-gas.s mpi_amd64_common.s
     ASFLAGS += -fPIC -Wa,--noexecstack
     DEFINES += -DNSS_BEVAND_ARCFOUR -DMPI_AMD64 -DMP_ASSEMBLY_MULTIPLY
     DEFINES += -DNSS_USE_COMBA
@@ -484,7 +490,7 @@ else
     ifeq ($(USE_64),1)
 	# Solaris for AMD64
 	ifdef NS_USE_GCC
-	    ASFILES  = arcfour-amd64-gas.s mpi_amd64_gas.s
+	    ASFILES  = arcfour-amd64-gas.s mpi_amd64_common.s
 	    ASFLAGS += -march=opteron -m64 -fPIC
 	    MPI_SRCS += mp_comba.c
 	    # comment the next four lines to turn off Intel HW acceleration
diff --git a/lib/freebl/freebl_base.gypi b/lib/freebl/freebl_base.gypi
index b8333a7db..39ec14982 100644
--- a/lib/freebl/freebl_base.gypi
+++ b/lib/freebl/freebl_base.gypi
@@ -68,7 +68,7 @@
           'sources': [
             'arcfour-amd64-gas.s',
             'mpi/mpi_amd64.c',
-            'mpi/mpi_amd64_gas.s',
+            'mpi/mpi_amd64_common.S',
             'mpi/mp_comba.c',
           ],
           'conditions': [
@@ -202,6 +202,18 @@
             'MP_ASSEMBLY_SQUARE',
             'MP_ASSEMBLY_DIV_2DX1D',
           ],
+        }, 'target_arch=="x64"', {
+          'sources': [
+            'mpi/mpi_amd64.c',
+            'mpi/mpi_amd64_common.S',
+            'mpi/mp_comba.c',
+          ],
+          'defines': [
+            'MP_IS_LITTLE_ENDIAN',
+            'MPI_AMD64',
+            'MP_ASSEMBLY_MULTIPLY',
+            'NSS_USE_COMBA',
+          ],
         }],
       ],
     }],
diff --git a/lib/freebl/mpi/mpi_amd64_gas.s b/lib/freebl/mpi/mpi_amd64_common.S
index ad6e2b9d7..4000f2066 100644
--- a/lib/freebl/mpi/mpi_amd64_gas.s
+++ b/lib/freebl/mpi/mpi_amd64_common.S
@@ -18,7 +18,15 @@
 # s_mpv_mul_set_vec64(uint64_t *r, uint64_t *a, int len, uint64_t digit)
 #
 
-.text; .align 16; .globl s_mpv_mul_set_vec64; .type s_mpv_mul_set_vec64, @function; s_mpv_mul_set_vec64:
+.text; .align 16; .globl s_mpv_mul_set_vec64;
+
+#ifdef DARWIN
+#define s_mpv_mul_set_vec64		_s_mpv_mul_set_vec64
+.private_extern s_mpv_mul_set_vec64
+s_mpv_mul_set_vec64:
+#else
+.type s_mpv_mul_set_vec64, @function; s_mpv_mul_set_vec64:
+#endif
 
 	xorq	%rax, %rax		# if (len == 0) return (0)
 	testq	%rdx, %rdx
@@ -169,7 +177,9 @@
 	movq	%r9, %rax
 	ret
 
+#ifndef DARWIN
 .size s_mpv_mul_set_vec64, .-s_mpv_mul_set_vec64
+#endif
 
 # ------------------------------------------------------------------------
 #
@@ -186,7 +196,15 @@
 # s_mpv_mul_add_vec64(uint64_t *r, uint64_t *a, int len, uint64_t digit)
 #
 
-.text; .align 16; .globl s_mpv_mul_add_vec64; .type s_mpv_mul_add_vec64, @function; s_mpv_mul_add_vec64:
+.text; .align 16; .globl s_mpv_mul_add_vec64;
+
+#ifdef DARWIN
+#define s_mpv_mul_add_vec64      _s_mpv_mul_add_vec64
+.private_extern s_mpv_mul_add_vec64
+s_mpv_mul_add_vec64:
+#else
+.type s_mpv_mul_add_vec64, @function; s_mpv_mul_add_vec64:
+#endif
 
 	xorq	%rax, %rax		# if (len == 0) return (0)
 	testq	%rdx, %rdx
@@ -381,9 +399,11 @@
 .L27:
 	movq	%r9, %rax
 	ret
-        
+
+#ifndef DARWIN
 .size s_mpv_mul_add_vec64, .-s_mpv_mul_add_vec64
 
 # Magic indicating no need for an executable stack
 .section .note.GNU-stack, "", @progbits
 .previous
+#endif
author	Kevin Jacobs <kjacobs@mozilla.com>	2020-08-07 15:31:25 +0000
committer	Kevin Jacobs <kjacobs@mozilla.com>	2020-08-07 15:31:25 +0000
commit	a60284539c9b12b28d0b407c4637d14b2a5447d3 (patch)
tree	5cf06ceec90053ebfa91690f0695f4980269119b
parent	84c6aa74c7884af09a3f1abdcddb631ad5e929d9 (diff)
download	nss-hg-a60284539c9b12b28d0b407c4637d14b2a5447d3.tar.gz