summaryrefslogtreecommitdiff
path: root/security/nss/lib/freebl/mpi/mpi_amd64_sun.s
diff options
context:
space:
mode:
Diffstat (limited to 'security/nss/lib/freebl/mpi/mpi_amd64_sun.s')
-rw-r--r--security/nss/lib/freebl/mpi/mpi_amd64_sun.s418
1 files changed, 0 insertions, 418 deletions
diff --git a/security/nss/lib/freebl/mpi/mpi_amd64_sun.s b/security/nss/lib/freebl/mpi/mpi_amd64_sun.s
deleted file mode 100644
index ac7107415..000000000
--- a/security/nss/lib/freebl/mpi/mpi_amd64_sun.s
+++ /dev/null
@@ -1,418 +0,0 @@
-/ ***** BEGIN LICENSE BLOCK *****
-/ Version: MPL 1.1/GPL 2.0/LGPL 2.1
-/
-/ The contents of this file are subject to the Mozilla Public License Version
-/ 1.1 (the "License"); you may not use this file except in compliance with
-/ the License. You may obtain a copy of the License at
-/ http://www.mozilla.org/MPL/
-/
-/ Software distributed under the License is distributed on an "AS IS" basis,
-/ WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
-/ for the specific language governing rights and limitations under the
-/ License.
-/
-/ The Original Code is the Solaris software cryptographic token.
-/
-/ The Initial Developer of the Original Code is
-/ Sun Microsystems, Inc.
-/ Portions created by the Initial Developer are Copyright (C) 2005
-/ the Initial Developer. All Rights Reserved.
-/
-/ Contributor(s):
-/ Sun Microsystems, Inc.
-/
-/ Alternatively, the contents of this file may be used under the terms of
-/ either the GNU General Public License Version 2 or later (the "GPL"), or
-/ the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
-/ in which case the provisions of the GPL or the LGPL are applicable instead
-/ of those above. If you wish to allow use of your version of this file only
-/ under the terms of either the GPL or the LGPL, and not to allow others to
-/ use your version of this file under the terms of the MPL, indicate your
-/ decision by deleting the provisions above and replace them with the notice
-/ and other provisions required by the GPL or the LGPL. If you do not delete
-/ the provisions above, a recipient may use your version of this file under
-/ the terms of any one of the MPL, the GPL or the LGPL.
-/
-/ ***** END LICENSE BLOCK ***** */
-
-
-/ ------------------------------------------------------------------------
-/
-/ Implementation of s_mpv_mul_set_vec which exploits
-/ the 64X64->128 bit unsigned multiply instruction.
-/
-/ ------------------------------------------------------------------------
-
-/ r = a * digit, r and a are vectors of length len
-/ returns the carry digit
-/ r and a are 64 bit aligned.
-/
-/ uint64_t
-/ s_mpv_mul_set_vec64(uint64_t *r, uint64_t *a, int len, uint64_t digit)
-/
-
-.text; .align 16; .globl s_mpv_mul_set_vec64; .type s_mpv_mul_set_vec64, @function; s_mpv_mul_set_vec64:
-
- xorq %rax, %rax / if (len == 0) return (0)
- testq %rdx, %rdx
- jz .L17
-
- movq %rdx, %r8 / Use r8 for len; %rdx is used by mul
- xorq %r9, %r9 / cy = 0
-
-.L15:
- cmpq $8, %r8 / 8 - len
- jb .L16
- movq 0(%rsi), %rax / rax = a[0]
- movq 8(%rsi), %r11 / prefetch a[1]
- mulq %rcx / p = a[0] * digit
- addq %r9, %rax
- adcq $0, %rdx / p += cy
- movq %rax, 0(%rdi) / r[0] = lo(p)
- movq %rdx, %r9 / cy = hi(p)
-
- movq %r11, %rax
- movq 16(%rsi), %r11 / prefetch a[2]
- mulq %rcx / p = a[1] * digit
- addq %r9, %rax
- adcq $0, %rdx / p += cy
- movq %rax, 8(%rdi) / r[1] = lo(p)
- movq %rdx, %r9 / cy = hi(p)
-
- movq %r11, %rax
- movq 24(%rsi), %r11 / prefetch a[3]
- mulq %rcx / p = a[2] * digit
- addq %r9, %rax
- adcq $0, %rdx / p += cy
- movq %rax, 16(%rdi) / r[2] = lo(p)
- movq %rdx, %r9 / cy = hi(p)
-
- movq %r11, %rax
- movq 32(%rsi), %r11 / prefetch a[4]
- mulq %rcx / p = a[3] * digit
- addq %r9, %rax
- adcq $0, %rdx / p += cy
- movq %rax, 24(%rdi) / r[3] = lo(p)
- movq %rdx, %r9 / cy = hi(p)
-
- movq %r11, %rax
- movq 40(%rsi), %r11 / prefetch a[5]
- mulq %rcx / p = a[4] * digit
- addq %r9, %rax
- adcq $0, %rdx / p += cy
- movq %rax, 32(%rdi) / r[4] = lo(p)
- movq %rdx, %r9 / cy = hi(p)
-
- movq %r11, %rax
- movq 48(%rsi), %r11 / prefetch a[6]
- mulq %rcx / p = a[5] * digit
- addq %r9, %rax
- adcq $0, %rdx / p += cy
- movq %rax, 40(%rdi) / r[5] = lo(p)
- movq %rdx, %r9 / cy = hi(p)
-
- movq %r11, %rax
- movq 56(%rsi), %r11 / prefetch a[7]
- mulq %rcx / p = a[6] * digit
- addq %r9, %rax
- adcq $0, %rdx / p += cy
- movq %rax, 48(%rdi) / r[6] = lo(p)
- movq %rdx, %r9 / cy = hi(p)
-
- movq %r11, %rax
- mulq %rcx / p = a[7] * digit
- addq %r9, %rax
- adcq $0, %rdx / p += cy
- movq %rax, 56(%rdi) / r[7] = lo(p)
- movq %rdx, %r9 / cy = hi(p)
-
- addq $64, %rsi
- addq $64, %rdi
- subq $8, %r8
-
- jz .L17
- jmp .L15
-
-.L16:
- movq 0(%rsi), %rax
- mulq %rcx / p = a[0] * digit
- addq %r9, %rax
- adcq $0, %rdx / p += cy
- movq %rax, 0(%rdi) / r[0] = lo(p)
- movq %rdx, %r9 / cy = hi(p)
- decq %r8
- jz .L17
-
- movq 8(%rsi), %rax
- mulq %rcx / p = a[1] * digit
- addq %r9, %rax
- adcq $0, %rdx / p += cy
- movq %rax, 8(%rdi) / r[1] = lo(p)
- movq %rdx, %r9 / cy = hi(p)
- decq %r8
- jz .L17
-
- movq 16(%rsi), %rax
- mulq %rcx / p = a[2] * digit
- addq %r9, %rax
- adcq $0, %rdx / p += cy
- movq %rax, 16(%rdi) / r[2] = lo(p)
- movq %rdx, %r9 / cy = hi(p)
- decq %r8
- jz .L17
-
- movq 24(%rsi), %rax
- mulq %rcx / p = a[3] * digit
- addq %r9, %rax
- adcq $0, %rdx / p += cy
- movq %rax, 24(%rdi) / r[3] = lo(p)
- movq %rdx, %r9 / cy = hi(p)
- decq %r8
- jz .L17
-
- movq 32(%rsi), %rax
- mulq %rcx / p = a[4] * digit
- addq %r9, %rax
- adcq $0, %rdx / p += cy
- movq %rax, 32(%rdi) / r[4] = lo(p)
- movq %rdx, %r9 / cy = hi(p)
- decq %r8
- jz .L17
-
- movq 40(%rsi), %rax
- mulq %rcx / p = a[5] * digit
- addq %r9, %rax
- adcq $0, %rdx / p += cy
- movq %rax, 40(%rdi) / r[5] = lo(p)
- movq %rdx, %r9 / cy = hi(p)
- decq %r8
- jz .L17
-
- movq 48(%rsi), %rax
- mulq %rcx / p = a[6] * digit
- addq %r9, %rax
- adcq $0, %rdx / p += cy
- movq %rax, 48(%rdi) / r[6] = lo(p)
- movq %rdx, %r9 / cy = hi(p)
- decq %r8
- jz .L17
-
-
-.L17:
- movq %r9, %rax
- ret
-
-.size s_mpv_mul_set_vec64, [.-s_mpv_mul_set_vec64]
-
-/ ------------------------------------------------------------------------
-/
-/ Implementation of s_mpv_mul_add_vec which exploits
-/ the 64X64->128 bit unsigned multiply instruction.
-/
-/ ------------------------------------------------------------------------
-
-/ r += a * digit, r and a are vectors of length len
-/ returns the carry digit
-/ r and a are 64 bit aligned.
-/
-/ uint64_t
-/ s_mpv_mul_add_vec64(uint64_t *r, uint64_t *a, int len, uint64_t digit)
-/
-
-.text; .align 16; .globl s_mpv_mul_add_vec64; .type s_mpv_mul_add_vec64, @function; s_mpv_mul_add_vec64:
-
- xorq %rax, %rax / if (len == 0) return (0)
- testq %rdx, %rdx
- jz .L27
-
- movq %rdx, %r8 / Use r8 for len; %rdx is used by mul
- xorq %r9, %r9 / cy = 0
-
-.L25:
- cmpq $8, %r8 / 8 - len
- jb .L26
- movq 0(%rsi), %rax / rax = a[0]
- movq 0(%rdi), %r10 / r10 = r[0]
- movq 8(%rsi), %r11 / prefetch a[1]
- mulq %rcx / p = a[0] * digit
- addq %r10, %rax
- adcq $0, %rdx / p += r[0]
- movq 8(%rdi), %r10 / prefetch r[1]
- addq %r9, %rax
- adcq $0, %rdx / p += cy
- movq %rax, 0(%rdi) / r[0] = lo(p)
- movq %rdx, %r9 / cy = hi(p)
-
- movq %r11, %rax
- movq 16(%rsi), %r11 / prefetch a[2]
- mulq %rcx / p = a[1] * digit
- addq %r10, %rax
- adcq $0, %rdx / p += r[1]
- movq 16(%rdi), %r10 / prefetch r[2]
- addq %r9, %rax
- adcq $0, %rdx / p += cy
- movq %rax, 8(%rdi) / r[1] = lo(p)
- movq %rdx, %r9 / cy = hi(p)
-
- movq %r11, %rax
- movq 24(%rsi), %r11 / prefetch a[3]
- mulq %rcx / p = a[2] * digit
- addq %r10, %rax
- adcq $0, %rdx / p += r[2]
- movq 24(%rdi), %r10 / prefetch r[3]
- addq %r9, %rax
- adcq $0, %rdx / p += cy
- movq %rax, 16(%rdi) / r[2] = lo(p)
- movq %rdx, %r9 / cy = hi(p)
-
- movq %r11, %rax
- movq 32(%rsi), %r11 / prefetch a[4]
- mulq %rcx / p = a[3] * digit
- addq %r10, %rax
- adcq $0, %rdx / p += r[3]
- movq 32(%rdi), %r10 / prefetch r[4]
- addq %r9, %rax
- adcq $0, %rdx / p += cy
- movq %rax, 24(%rdi) / r[3] = lo(p)
- movq %rdx, %r9 / cy = hi(p)
-
- movq %r11, %rax
- movq 40(%rsi), %r11 / prefetch a[5]
- mulq %rcx / p = a[4] * digit
- addq %r10, %rax
- adcq $0, %rdx / p += r[4]
- movq 40(%rdi), %r10 / prefetch r[5]
- addq %r9, %rax
- adcq $0, %rdx / p += cy
- movq %rax, 32(%rdi) / r[4] = lo(p)
- movq %rdx, %r9 / cy = hi(p)
-
- movq %r11, %rax
- movq 48(%rsi), %r11 / prefetch a[6]
- mulq %rcx / p = a[5] * digit
- addq %r10, %rax
- adcq $0, %rdx / p += r[5]
- movq 48(%rdi), %r10 / prefetch r[6]
- addq %r9, %rax
- adcq $0, %rdx / p += cy
- movq %rax, 40(%rdi) / r[5] = lo(p)
- movq %rdx, %r9 / cy = hi(p)
-
- movq %r11, %rax
- movq 56(%rsi), %r11 / prefetch a[7]
- mulq %rcx / p = a[6] * digit
- addq %r10, %rax
- adcq $0, %rdx / p += r[6]
- movq 56(%rdi), %r10 / prefetch r[7]
- addq %r9, %rax
- adcq $0, %rdx / p += cy
- movq %rax, 48(%rdi) / r[6] = lo(p)
- movq %rdx, %r9 / cy = hi(p)
-
- movq %r11, %rax
- mulq %rcx / p = a[7] * digit
- addq %r10, %rax
- adcq $0, %rdx / p += r[7]
- addq %r9, %rax
- adcq $0, %rdx / p += cy
- movq %rax, 56(%rdi) / r[7] = lo(p)
- movq %rdx, %r9 / cy = hi(p)
-
- addq $64, %rsi
- addq $64, %rdi
- subq $8, %r8
-
- jz .L27
- jmp .L25
-
-.L26:
- movq 0(%rsi), %rax
- movq 0(%rdi), %r10
- mulq %rcx / p = a[0] * digit
- addq %r10, %rax
- adcq $0, %rdx / p += r[0]
- addq %r9, %rax
- adcq $0, %rdx / p += cy
- movq %rax, 0(%rdi) / r[0] = lo(p)
- movq %rdx, %r9 / cy = hi(p)
- decq %r8
- jz .L27
-
- movq 8(%rsi), %rax
- movq 8(%rdi), %r10
- mulq %rcx / p = a[1] * digit
- addq %r10, %rax
- adcq $0, %rdx / p += r[1]
- addq %r9, %rax
- adcq $0, %rdx / p += cy
- movq %rax, 8(%rdi) / r[1] = lo(p)
- movq %rdx, %r9 / cy = hi(p)
- decq %r8
- jz .L27
-
- movq 16(%rsi), %rax
- movq 16(%rdi), %r10
- mulq %rcx / p = a[2] * digit
- addq %r10, %rax
- adcq $0, %rdx / p += r[2]
- addq %r9, %rax
- adcq $0, %rdx / p += cy
- movq %rax, 16(%rdi) / r[2] = lo(p)
- movq %rdx, %r9 / cy = hi(p)
- decq %r8
- jz .L27
-
- movq 24(%rsi), %rax
- movq 24(%rdi), %r10
- mulq %rcx / p = a[3] * digit
- addq %r10, %rax
- adcq $0, %rdx / p += r[3]
- addq %r9, %rax
- adcq $0, %rdx / p += cy
- movq %rax, 24(%rdi) / r[3] = lo(p)
- movq %rdx, %r9 / cy = hi(p)
- decq %r8
- jz .L27
-
- movq 32(%rsi), %rax
- movq 32(%rdi), %r10
- mulq %rcx / p = a[4] * digit
- addq %r10, %rax
- adcq $0, %rdx / p += r[4]
- addq %r9, %rax
- adcq $0, %rdx / p += cy
- movq %rax, 32(%rdi) / r[4] = lo(p)
- movq %rdx, %r9 / cy = hi(p)
- decq %r8
- jz .L27
-
- movq 40(%rsi), %rax
- movq 40(%rdi), %r10
- mulq %rcx / p = a[5] * digit
- addq %r10, %rax
- adcq $0, %rdx / p += r[5]
- addq %r9, %rax
- adcq $0, %rdx / p += cy
- movq %rax, 40(%rdi) / r[5] = lo(p)
- movq %rdx, %r9 / cy = hi(p)
- decq %r8
- jz .L27
-
- movq 48(%rsi), %rax
- movq 48(%rdi), %r10
- mulq %rcx / p = a[6] * digit
- addq %r10, %rax
- adcq $0, %rdx / p += r[6]
- addq %r9, %rax
- adcq $0, %rdx / p += cy
- movq %rax, 48(%rdi) / r[6] = lo(p)
- movq %rdx, %r9 / cy = hi(p)
- decq %r8
- jz .L27
-
-
-.L27:
- movq %r9, %rax
- ret
-
-.size s_mpv_mul_add_vec64, [.-s_mpv_mul_add_vec64]