diff options
author | Kevin Jacobs <kjacobs@mozilla.com> | 2020-08-07 15:31:25 +0000 |
---|---|---|
committer | Kevin Jacobs <kjacobs@mozilla.com> | 2020-08-07 15:31:25 +0000 |
commit | a60284539c9b12b28d0b407c4637d14b2a5447d3 (patch) | |
tree | 5cf06ceec90053ebfa91690f0695f4980269119b | |
parent | 84c6aa74c7884af09a3f1abdcddb631ad5e929d9 (diff) | |
download | nss-hg-a60284539c9b12b28d0b407c4637d14b2a5447d3.tar.gz |
Bug 1656981 - Use 64x64->128 multiply and MP_COMBA on x86_64 Mac. r=mt
This patch makes two MPI changes for MacOS:
1. Rename `mpi_amd64_gas.s` to `mpi_amd64_common.S` and add defines for macho64, allowing Intel Macs to take advantage of the 64x64->128 multiply code.
2. Define and use `NSS_USE_COMBA` on Intel Macs.
Performance results with `rsaperf -n none -p 10 -e -x 65537` (default 2048-bit key):
Before: `12629.12 operations/s. one operation every 79 microseconds`
With 64x64->128 assembly: `29431.65 operations/s. one operation every 33 microseconds`
With MP_COMBA and 64x64->128 assembly: `30332.99 operations/s. one operation every 32 microseconds`
Differential Revision: https://phabricator.services.mozilla.com/D85783
-rw-r--r-- | lib/freebl/Makefile | 12 | ||||
-rw-r--r-- | lib/freebl/freebl_base.gypi | 14 | ||||
-rw-r--r-- | lib/freebl/mpi/mpi_amd64_common.S (renamed from lib/freebl/mpi/mpi_amd64_gas.s) | 26 |
3 files changed, 45 insertions, 7 deletions
diff --git a/lib/freebl/Makefile b/lib/freebl/Makefile index 52a30927b..4a600417c 100644 --- a/lib/freebl/Makefile +++ b/lib/freebl/Makefile @@ -234,7 +234,12 @@ endif endif ifeq ($(OS_TARGET),Darwin) -ifeq ($(CPU_ARCH),x86) +ifeq ($(CPU_ARCH),x86_64) + ASFILES = mpi_amd64_common.s + DEFINES += -DMPI_AMD64 -DMP_IS_LITTLE_ENDIAN + DEFINES += -DMP_ASSEMBLY_MULTIPLY -DNSS_USE_COMBA + MPI_SRCS += mpi_amd64.c mp_comba.c +else ifeq ($(CPU_ARCH),x86) ASFILES = mpi_sse2.s DEFINES += -DMP_USE_UINT_DIGIT DEFINES += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE @@ -244,7 +249,8 @@ endif # Darwin ifeq ($(OS_TARGET),Linux) ifeq ($(CPU_ARCH),x86_64) - ASFILES = arcfour-amd64-gas.s mpi_amd64_gas.s + # Lower case s on mpi_amd64_common due to make implicit rules. + ASFILES = arcfour-amd64-gas.s mpi_amd64_common.s ASFLAGS += -fPIC -Wa,--noexecstack DEFINES += -DNSS_BEVAND_ARCFOUR -DMPI_AMD64 -DMP_ASSEMBLY_MULTIPLY DEFINES += -DNSS_USE_COMBA @@ -484,7 +490,7 @@ else ifeq ($(USE_64),1) # Solaris for AMD64 ifdef NS_USE_GCC - ASFILES = arcfour-amd64-gas.s mpi_amd64_gas.s + ASFILES = arcfour-amd64-gas.s mpi_amd64_common.s ASFLAGS += -march=opteron -m64 -fPIC MPI_SRCS += mp_comba.c # comment the next four lines to turn off Intel HW acceleration diff --git a/lib/freebl/freebl_base.gypi b/lib/freebl/freebl_base.gypi index b8333a7db..39ec14982 100644 --- a/lib/freebl/freebl_base.gypi +++ b/lib/freebl/freebl_base.gypi @@ -68,7 +68,7 @@ 'sources': [ 'arcfour-amd64-gas.s', 'mpi/mpi_amd64.c', - 'mpi/mpi_amd64_gas.s', + 'mpi/mpi_amd64_common.S', 'mpi/mp_comba.c', ], 'conditions': [ @@ -202,6 +202,18 @@ 'MP_ASSEMBLY_SQUARE', 'MP_ASSEMBLY_DIV_2DX1D', ], + }, 'target_arch=="x64"', { + 'sources': [ + 'mpi/mpi_amd64.c', + 'mpi/mpi_amd64_common.S', + 'mpi/mp_comba.c', + ], + 'defines': [ + 'MP_IS_LITTLE_ENDIAN', + 'MPI_AMD64', + 'MP_ASSEMBLY_MULTIPLY', + 'NSS_USE_COMBA', + ], }], ], }], diff --git a/lib/freebl/mpi/mpi_amd64_gas.s b/lib/freebl/mpi/mpi_amd64_common.S index ad6e2b9d7..4000f2066 100644 --- a/lib/freebl/mpi/mpi_amd64_gas.s +++ b/lib/freebl/mpi/mpi_amd64_common.S @@ -18,7 +18,15 @@ # s_mpv_mul_set_vec64(uint64_t *r, uint64_t *a, int len, uint64_t digit) # -.text; .align 16; .globl s_mpv_mul_set_vec64; .type s_mpv_mul_set_vec64, @function; s_mpv_mul_set_vec64: +.text; .align 16; .globl s_mpv_mul_set_vec64; + +#ifdef DARWIN +#define s_mpv_mul_set_vec64 _s_mpv_mul_set_vec64 +.private_extern s_mpv_mul_set_vec64 +s_mpv_mul_set_vec64: +#else +.type s_mpv_mul_set_vec64, @function; s_mpv_mul_set_vec64: +#endif xorq %rax, %rax # if (len == 0) return (0) testq %rdx, %rdx @@ -169,7 +177,9 @@ movq %r9, %rax ret +#ifndef DARWIN .size s_mpv_mul_set_vec64, .-s_mpv_mul_set_vec64 +#endif # ------------------------------------------------------------------------ # @@ -186,7 +196,15 @@ # s_mpv_mul_add_vec64(uint64_t *r, uint64_t *a, int len, uint64_t digit) # -.text; .align 16; .globl s_mpv_mul_add_vec64; .type s_mpv_mul_add_vec64, @function; s_mpv_mul_add_vec64: +.text; .align 16; .globl s_mpv_mul_add_vec64; + +#ifdef DARWIN +#define s_mpv_mul_add_vec64 _s_mpv_mul_add_vec64 +.private_extern s_mpv_mul_add_vec64 +s_mpv_mul_add_vec64: +#else +.type s_mpv_mul_add_vec64, @function; s_mpv_mul_add_vec64: +#endif xorq %rax, %rax # if (len == 0) return (0) testq %rdx, %rdx @@ -381,9 +399,11 @@ .L27: movq %r9, %rax ret - + +#ifndef DARWIN .size s_mpv_mul_add_vec64, .-s_mpv_mul_add_vec64 # Magic indicating no need for an executable stack .section .note.GNU-stack, "", @progbits .previous +#endif |