diff options
Diffstat (limited to 'security/nss/lib/freebl/mpi/mpi_amd64_gas.s')
-rw-r--r-- | security/nss/lib/freebl/mpi/mpi_amd64_gas.s | 418 |
1 files changed, 0 insertions, 418 deletions
diff --git a/security/nss/lib/freebl/mpi/mpi_amd64_gas.s b/security/nss/lib/freebl/mpi/mpi_amd64_gas.s deleted file mode 100644 index 7515ac20a..000000000 --- a/security/nss/lib/freebl/mpi/mpi_amd64_gas.s +++ /dev/null @@ -1,418 +0,0 @@ -# ***** BEGIN LICENSE BLOCK ***** -# Version: MPL 1.1/GPL 2.0/LGPL 2.1 -# -# The contents of this file are subject to the Mozilla Public License Version -# 1.1 (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# http://www.mozilla.org/MPL/ -# -# Software distributed under the License is distributed on an "AS IS" basis, -# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License -# for the specific language governing rights and limitations under the -# License. -# -# The Original Code is the Solaris software cryptographic token. -# -# The Initial Developer of the Original Code is -# Sun Microsystems, Inc. -# Portions created by the Initial Developer are Copyright (C) 2005 -# the Initial Developer. All Rights Reserved. -# -# Contributor(s): -# Sun Microsystems, Inc. -# -# Alternatively, the contents of this file may be used under the terms of -# either the GNU General Public License Version 2 or later (the "GPL"), or -# the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), -# in which case the provisions of the GPL or the LGPL are applicable instead -# of those above. If you wish to allow use of your version of this file only -# under the terms of either the GPL or the LGPL, and not to allow others to -# use your version of this file under the terms of the MPL, indicate your -# decision by deleting the provisions above and replace them with the notice -# and other provisions required by the GPL or the LGPL. If you do not delete -# the provisions above, a recipient may use your version of this file under -# the terms of any one of the MPL, the GPL or the LGPL. -# -# ***** END LICENSE BLOCK ***** */ - - -# ------------------------------------------------------------------------ -# -# Implementation of s_mpv_mul_set_vec which exploits -# the 64X64->128 bit unsigned multiply instruction. -# -# ------------------------------------------------------------------------ - -# r = a * digit, r and a are vectors of length len -# returns the carry digit -# r and a are 64 bit aligned. -# -# uint64_t -# s_mpv_mul_set_vec64(uint64_t *r, uint64_t *a, int len, uint64_t digit) -# - -.text; .align 16; .globl s_mpv_mul_set_vec64; .type s_mpv_mul_set_vec64, @function; s_mpv_mul_set_vec64: - - xorq %rax, %rax # if (len == 0) return (0) - testq %rdx, %rdx - jz .L17 - - movq %rdx, %r8 # Use r8 for len; %rdx is used by mul - xorq %r9, %r9 # cy = 0 - -.L15: - cmpq $8, %r8 # 8 - len - jb .L16 - movq 0(%rsi), %rax # rax = a[0] - movq 8(%rsi), %r11 # prefetch a[1] - mulq %rcx # p = a[0] * digit - addq %r9, %rax - adcq $0, %rdx # p += cy - movq %rax, 0(%rdi) # r[0] = lo(p) - movq %rdx, %r9 # cy = hi(p) - - movq %r11, %rax - movq 16(%rsi), %r11 # prefetch a[2] - mulq %rcx # p = a[1] * digit - addq %r9, %rax - adcq $0, %rdx # p += cy - movq %rax, 8(%rdi) # r[1] = lo(p) - movq %rdx, %r9 # cy = hi(p) - - movq %r11, %rax - movq 24(%rsi), %r11 # prefetch a[3] - mulq %rcx # p = a[2] * digit - addq %r9, %rax - adcq $0, %rdx # p += cy - movq %rax, 16(%rdi) # r[2] = lo(p) - movq %rdx, %r9 # cy = hi(p) - - movq %r11, %rax - movq 32(%rsi), %r11 # prefetch a[4] - mulq %rcx # p = a[3] * digit - addq %r9, %rax - adcq $0, %rdx # p += cy - movq %rax, 24(%rdi) # r[3] = lo(p) - movq %rdx, %r9 # cy = hi(p) - - movq %r11, %rax - movq 40(%rsi), %r11 # prefetch a[5] - mulq %rcx # p = a[4] * digit - addq %r9, %rax - adcq $0, %rdx # p += cy - movq %rax, 32(%rdi) # r[4] = lo(p) - movq %rdx, %r9 # cy = hi(p) - - movq %r11, %rax - movq 48(%rsi), %r11 # prefetch a[6] - mulq %rcx # p = a[5] * digit - addq %r9, %rax - adcq $0, %rdx # p += cy - movq %rax, 40(%rdi) # r[5] = lo(p) - movq %rdx, %r9 # cy = hi(p) - - movq %r11, %rax - movq 56(%rsi), %r11 # prefetch a[7] - mulq %rcx # p = a[6] * digit - addq %r9, %rax - adcq $0, %rdx # p += cy - movq %rax, 48(%rdi) # r[6] = lo(p) - movq %rdx, %r9 # cy = hi(p) - - movq %r11, %rax - mulq %rcx # p = a[7] * digit - addq %r9, %rax - adcq $0, %rdx # p += cy - movq %rax, 56(%rdi) # r[7] = lo(p) - movq %rdx, %r9 # cy = hi(p) - - addq $64, %rsi - addq $64, %rdi - subq $8, %r8 - - jz .L17 - jmp .L15 - -.L16: - movq 0(%rsi), %rax - mulq %rcx # p = a[0] * digit - addq %r9, %rax - adcq $0, %rdx # p += cy - movq %rax, 0(%rdi) # r[0] = lo(p) - movq %rdx, %r9 # cy = hi(p) - decq %r8 - jz .L17 - - movq 8(%rsi), %rax - mulq %rcx # p = a[1] * digit - addq %r9, %rax - adcq $0, %rdx # p += cy - movq %rax, 8(%rdi) # r[1] = lo(p) - movq %rdx, %r9 # cy = hi(p) - decq %r8 - jz .L17 - - movq 16(%rsi), %rax - mulq %rcx # p = a[2] * digit - addq %r9, %rax - adcq $0, %rdx # p += cy - movq %rax, 16(%rdi) # r[2] = lo(p) - movq %rdx, %r9 # cy = hi(p) - decq %r8 - jz .L17 - - movq 24(%rsi), %rax - mulq %rcx # p = a[3] * digit - addq %r9, %rax - adcq $0, %rdx # p += cy - movq %rax, 24(%rdi) # r[3] = lo(p) - movq %rdx, %r9 # cy = hi(p) - decq %r8 - jz .L17 - - movq 32(%rsi), %rax - mulq %rcx # p = a[4] * digit - addq %r9, %rax - adcq $0, %rdx # p += cy - movq %rax, 32(%rdi) # r[4] = lo(p) - movq %rdx, %r9 # cy = hi(p) - decq %r8 - jz .L17 - - movq 40(%rsi), %rax - mulq %rcx # p = a[5] * digit - addq %r9, %rax - adcq $0, %rdx # p += cy - movq %rax, 40(%rdi) # r[5] = lo(p) - movq %rdx, %r9 # cy = hi(p) - decq %r8 - jz .L17 - - movq 48(%rsi), %rax - mulq %rcx # p = a[6] * digit - addq %r9, %rax - adcq $0, %rdx # p += cy - movq %rax, 48(%rdi) # r[6] = lo(p) - movq %rdx, %r9 # cy = hi(p) - decq %r8 - jz .L17 - - -.L17: - movq %r9, %rax - ret - -.size s_mpv_mul_set_vec64, [.-s_mpv_mul_set_vec64] - -# ------------------------------------------------------------------------ -# -# Implementation of s_mpv_mul_add_vec which exploits -# the 64X64->128 bit unsigned multiply instruction. -# -# ------------------------------------------------------------------------ - -# r += a * digit, r and a are vectors of length len -# returns the carry digit -# r and a are 64 bit aligned. -# -# uint64_t -# s_mpv_mul_add_vec64(uint64_t *r, uint64_t *a, int len, uint64_t digit) -# - -.text; .align 16; .globl s_mpv_mul_add_vec64; .type s_mpv_mul_add_vec64, @function; s_mpv_mul_add_vec64: - - xorq %rax, %rax # if (len == 0) return (0) - testq %rdx, %rdx - jz .L27 - - movq %rdx, %r8 # Use r8 for len; %rdx is used by mul - xorq %r9, %r9 # cy = 0 - -.L25: - cmpq $8, %r8 # 8 - len - jb .L26 - movq 0(%rsi), %rax # rax = a[0] - movq 0(%rdi), %r10 # r10 = r[0] - movq 8(%rsi), %r11 # prefetch a[1] - mulq %rcx # p = a[0] * digit - addq %r10, %rax - adcq $0, %rdx # p += r[0] - movq 8(%rdi), %r10 # prefetch r[1] - addq %r9, %rax - adcq $0, %rdx # p += cy - movq %rax, 0(%rdi) # r[0] = lo(p) - movq %rdx, %r9 # cy = hi(p) - - movq %r11, %rax - movq 16(%rsi), %r11 # prefetch a[2] - mulq %rcx # p = a[1] * digit - addq %r10, %rax - adcq $0, %rdx # p += r[1] - movq 16(%rdi), %r10 # prefetch r[2] - addq %r9, %rax - adcq $0, %rdx # p += cy - movq %rax, 8(%rdi) # r[1] = lo(p) - movq %rdx, %r9 # cy = hi(p) - - movq %r11, %rax - movq 24(%rsi), %r11 # prefetch a[3] - mulq %rcx # p = a[2] * digit - addq %r10, %rax - adcq $0, %rdx # p += r[2] - movq 24(%rdi), %r10 # prefetch r[3] - addq %r9, %rax - adcq $0, %rdx # p += cy - movq %rax, 16(%rdi) # r[2] = lo(p) - movq %rdx, %r9 # cy = hi(p) - - movq %r11, %rax - movq 32(%rsi), %r11 # prefetch a[4] - mulq %rcx # p = a[3] * digit - addq %r10, %rax - adcq $0, %rdx # p += r[3] - movq 32(%rdi), %r10 # prefetch r[4] - addq %r9, %rax - adcq $0, %rdx # p += cy - movq %rax, 24(%rdi) # r[3] = lo(p) - movq %rdx, %r9 # cy = hi(p) - - movq %r11, %rax - movq 40(%rsi), %r11 # prefetch a[5] - mulq %rcx # p = a[4] * digit - addq %r10, %rax - adcq $0, %rdx # p += r[4] - movq 40(%rdi), %r10 # prefetch r[5] - addq %r9, %rax - adcq $0, %rdx # p += cy - movq %rax, 32(%rdi) # r[4] = lo(p) - movq %rdx, %r9 # cy = hi(p) - - movq %r11, %rax - movq 48(%rsi), %r11 # prefetch a[6] - mulq %rcx # p = a[5] * digit - addq %r10, %rax - adcq $0, %rdx # p += r[5] - movq 48(%rdi), %r10 # prefetch r[6] - addq %r9, %rax - adcq $0, %rdx # p += cy - movq %rax, 40(%rdi) # r[5] = lo(p) - movq %rdx, %r9 # cy = hi(p) - - movq %r11, %rax - movq 56(%rsi), %r11 # prefetch a[7] - mulq %rcx # p = a[6] * digit - addq %r10, %rax - adcq $0, %rdx # p += r[6] - movq 56(%rdi), %r10 # prefetch r[7] - addq %r9, %rax - adcq $0, %rdx # p += cy - movq %rax, 48(%rdi) # r[6] = lo(p) - movq %rdx, %r9 # cy = hi(p) - - movq %r11, %rax - mulq %rcx # p = a[7] * digit - addq %r10, %rax - adcq $0, %rdx # p += r[7] - addq %r9, %rax - adcq $0, %rdx # p += cy - movq %rax, 56(%rdi) # r[7] = lo(p) - movq %rdx, %r9 # cy = hi(p) - - addq $64, %rsi - addq $64, %rdi - subq $8, %r8 - - jz .L27 - jmp .L25 - -.L26: - movq 0(%rsi), %rax - movq 0(%rdi), %r10 - mulq %rcx # p = a[0] * digit - addq %r10, %rax - adcq $0, %rdx # p += r[0] - addq %r9, %rax - adcq $0, %rdx # p += cy - movq %rax, 0(%rdi) # r[0] = lo(p) - movq %rdx, %r9 # cy = hi(p) - decq %r8 - jz .L27 - - movq 8(%rsi), %rax - movq 8(%rdi), %r10 - mulq %rcx # p = a[1] * digit - addq %r10, %rax - adcq $0, %rdx # p += r[1] - addq %r9, %rax - adcq $0, %rdx # p += cy - movq %rax, 8(%rdi) # r[1] = lo(p) - movq %rdx, %r9 # cy = hi(p) - decq %r8 - jz .L27 - - movq 16(%rsi), %rax - movq 16(%rdi), %r10 - mulq %rcx # p = a[2] * digit - addq %r10, %rax - adcq $0, %rdx # p += r[2] - addq %r9, %rax - adcq $0, %rdx # p += cy - movq %rax, 16(%rdi) # r[2] = lo(p) - movq %rdx, %r9 # cy = hi(p) - decq %r8 - jz .L27 - - movq 24(%rsi), %rax - movq 24(%rdi), %r10 - mulq %rcx # p = a[3] * digit - addq %r10, %rax - adcq $0, %rdx # p += r[3] - addq %r9, %rax - adcq $0, %rdx # p += cy - movq %rax, 24(%rdi) # r[3] = lo(p) - movq %rdx, %r9 # cy = hi(p) - decq %r8 - jz .L27 - - movq 32(%rsi), %rax - movq 32(%rdi), %r10 - mulq %rcx # p = a[4] * digit - addq %r10, %rax - adcq $0, %rdx # p += r[4] - addq %r9, %rax - adcq $0, %rdx # p += cy - movq %rax, 32(%rdi) # r[4] = lo(p) - movq %rdx, %r9 # cy = hi(p) - decq %r8 - jz .L27 - - movq 40(%rsi), %rax - movq 40(%rdi), %r10 - mulq %rcx # p = a[5] * digit - addq %r10, %rax - adcq $0, %rdx # p += r[5] - addq %r9, %rax - adcq $0, %rdx # p += cy - movq %rax, 40(%rdi) # r[5] = lo(p) - movq %rdx, %r9 # cy = hi(p) - decq %r8 - jz .L27 - - movq 48(%rsi), %rax - movq 48(%rdi), %r10 - mulq %rcx # p = a[6] * digit - addq %r10, %rax - adcq $0, %rdx # p += r[6] - addq %r9, %rax - adcq $0, %rdx # p += cy - movq %rax, 48(%rdi) # r[6] = lo(p) - movq %rdx, %r9 # cy = hi(p) - decq %r8 - jz .L27 - - -.L27: - movq %r9, %rax - ret - -.size s_mpv_mul_add_vec64, [.-s_mpv_mul_add_vec64] |