summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKevin Jacobs <kjacobs@mozilla.com>2020-08-07 15:31:25 +0000
committerKevin Jacobs <kjacobs@mozilla.com>2020-08-07 15:31:25 +0000
commita60284539c9b12b28d0b407c4637d14b2a5447d3 (patch)
tree5cf06ceec90053ebfa91690f0695f4980269119b
parent84c6aa74c7884af09a3f1abdcddb631ad5e929d9 (diff)
downloadnss-hg-a60284539c9b12b28d0b407c4637d14b2a5447d3.tar.gz
Bug 1656981 - Use 64x64->128 multiply and MP_COMBA on x86_64 Mac. r=mt
This patch makes two MPI changes for MacOS: 1. Rename `mpi_amd64_gas.s` to `mpi_amd64_common.S` and add defines for macho64, allowing Intel Macs to take advantage of the 64x64->128 multiply code. 2. Define and use `NSS_USE_COMBA` on Intel Macs. Performance results with `rsaperf -n none -p 10 -e -x 65537` (default 2048-bit key): Before: `12629.12 operations/s. one operation every 79 microseconds` With 64x64->128 assembly: `29431.65 operations/s. one operation every 33 microseconds` With MP_COMBA and 64x64->128 assembly: `30332.99 operations/s. one operation every 32 microseconds` Differential Revision: https://phabricator.services.mozilla.com/D85783
-rw-r--r--lib/freebl/Makefile12
-rw-r--r--lib/freebl/freebl_base.gypi14
-rw-r--r--lib/freebl/mpi/mpi_amd64_common.S (renamed from lib/freebl/mpi/mpi_amd64_gas.s)26
3 files changed, 45 insertions, 7 deletions
diff --git a/lib/freebl/Makefile b/lib/freebl/Makefile
index 52a30927b..4a600417c 100644
--- a/lib/freebl/Makefile
+++ b/lib/freebl/Makefile
@@ -234,7 +234,12 @@ endif
endif
ifeq ($(OS_TARGET),Darwin)
-ifeq ($(CPU_ARCH),x86)
+ifeq ($(CPU_ARCH),x86_64)
+ ASFILES = mpi_amd64_common.s
+ DEFINES += -DMPI_AMD64 -DMP_IS_LITTLE_ENDIAN
+ DEFINES += -DMP_ASSEMBLY_MULTIPLY -DNSS_USE_COMBA
+ MPI_SRCS += mpi_amd64.c mp_comba.c
+else ifeq ($(CPU_ARCH),x86)
ASFILES = mpi_sse2.s
DEFINES += -DMP_USE_UINT_DIGIT
DEFINES += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE
@@ -244,7 +249,8 @@ endif # Darwin
ifeq ($(OS_TARGET),Linux)
ifeq ($(CPU_ARCH),x86_64)
- ASFILES = arcfour-amd64-gas.s mpi_amd64_gas.s
+ # Lower case s on mpi_amd64_common due to make implicit rules.
+ ASFILES = arcfour-amd64-gas.s mpi_amd64_common.s
ASFLAGS += -fPIC -Wa,--noexecstack
DEFINES += -DNSS_BEVAND_ARCFOUR -DMPI_AMD64 -DMP_ASSEMBLY_MULTIPLY
DEFINES += -DNSS_USE_COMBA
@@ -484,7 +490,7 @@ else
ifeq ($(USE_64),1)
# Solaris for AMD64
ifdef NS_USE_GCC
- ASFILES = arcfour-amd64-gas.s mpi_amd64_gas.s
+ ASFILES = arcfour-amd64-gas.s mpi_amd64_common.s
ASFLAGS += -march=opteron -m64 -fPIC
MPI_SRCS += mp_comba.c
# comment the next four lines to turn off Intel HW acceleration
diff --git a/lib/freebl/freebl_base.gypi b/lib/freebl/freebl_base.gypi
index b8333a7db..39ec14982 100644
--- a/lib/freebl/freebl_base.gypi
+++ b/lib/freebl/freebl_base.gypi
@@ -68,7 +68,7 @@
'sources': [
'arcfour-amd64-gas.s',
'mpi/mpi_amd64.c',
- 'mpi/mpi_amd64_gas.s',
+ 'mpi/mpi_amd64_common.S',
'mpi/mp_comba.c',
],
'conditions': [
@@ -202,6 +202,18 @@
'MP_ASSEMBLY_SQUARE',
'MP_ASSEMBLY_DIV_2DX1D',
],
+ }, 'target_arch=="x64"', {
+ 'sources': [
+ 'mpi/mpi_amd64.c',
+ 'mpi/mpi_amd64_common.S',
+ 'mpi/mp_comba.c',
+ ],
+ 'defines': [
+ 'MP_IS_LITTLE_ENDIAN',
+ 'MPI_AMD64',
+ 'MP_ASSEMBLY_MULTIPLY',
+ 'NSS_USE_COMBA',
+ ],
}],
],
}],
diff --git a/lib/freebl/mpi/mpi_amd64_gas.s b/lib/freebl/mpi/mpi_amd64_common.S
index ad6e2b9d7..4000f2066 100644
--- a/lib/freebl/mpi/mpi_amd64_gas.s
+++ b/lib/freebl/mpi/mpi_amd64_common.S
@@ -18,7 +18,15 @@
# s_mpv_mul_set_vec64(uint64_t *r, uint64_t *a, int len, uint64_t digit)
#
-.text; .align 16; .globl s_mpv_mul_set_vec64; .type s_mpv_mul_set_vec64, @function; s_mpv_mul_set_vec64:
+.text; .align 16; .globl s_mpv_mul_set_vec64;
+
+#ifdef DARWIN
+#define s_mpv_mul_set_vec64 _s_mpv_mul_set_vec64
+.private_extern s_mpv_mul_set_vec64
+s_mpv_mul_set_vec64:
+#else
+.type s_mpv_mul_set_vec64, @function; s_mpv_mul_set_vec64:
+#endif
xorq %rax, %rax # if (len == 0) return (0)
testq %rdx, %rdx
@@ -169,7 +177,9 @@
movq %r9, %rax
ret
+#ifndef DARWIN
.size s_mpv_mul_set_vec64, .-s_mpv_mul_set_vec64
+#endif
# ------------------------------------------------------------------------
#
@@ -186,7 +196,15 @@
# s_mpv_mul_add_vec64(uint64_t *r, uint64_t *a, int len, uint64_t digit)
#
-.text; .align 16; .globl s_mpv_mul_add_vec64; .type s_mpv_mul_add_vec64, @function; s_mpv_mul_add_vec64:
+.text; .align 16; .globl s_mpv_mul_add_vec64;
+
+#ifdef DARWIN
+#define s_mpv_mul_add_vec64 _s_mpv_mul_add_vec64
+.private_extern s_mpv_mul_add_vec64
+s_mpv_mul_add_vec64:
+#else
+.type s_mpv_mul_add_vec64, @function; s_mpv_mul_add_vec64:
+#endif
xorq %rax, %rax # if (len == 0) return (0)
testq %rdx, %rdx
@@ -381,9 +399,11 @@
.L27:
movq %r9, %rax
ret
-
+
+#ifndef DARWIN
.size s_mpv_mul_add_vec64, .-s_mpv_mul_add_vec64
# Magic indicating no need for an executable stack
.section .note.GNU-stack, "", @progbits
.previous
+#endif