diff options
author | Torbjorn Granlund <tege@gmplib.org> | 2012-03-07 21:14:07 +0100 |
---|---|---|
committer | Torbjorn Granlund <tege@gmplib.org> | 2012-03-07 21:14:07 +0100 |
commit | 19fccc5b6a37f686dddfe8ee765b451e5eda24e6 (patch) | |
tree | 65ae819ba60ad36ac6af36a7b2084b5d6df873d2 /mpn/x86_64/bobcat | |
parent | df5c14df466fbbe1eb01e224f7fe1a5eea438239 (diff) | |
download | gmp-19fccc5b6a37f686dddfe8ee765b451e5eda24e6.tar.gz |
Add bobcat copyi/copyd.
Diffstat (limited to 'mpn/x86_64/bobcat')
-rw-r--r-- | mpn/x86_64/bobcat/copyd.asm | 80 | ||||
-rw-r--r-- | mpn/x86_64/bobcat/copyi.asm | 83 |
2 files changed, 163 insertions, 0 deletions
diff --git a/mpn/x86_64/bobcat/copyd.asm b/mpn/x86_64/bobcat/copyd.asm new file mode 100644 index 000000000..c6253a861 --- /dev/null +++ b/mpn/x86_64/bobcat/copyd.asm @@ -0,0 +1,80 @@ +dnl AMD64 mpn_copyd optimised for AMD bobcat. + +dnl Copyright 2003, 2005, 2007, 2011, 2012 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. + +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of the GNU Lesser General Public License as published +dnl by the Free Software Foundation; either version 3 of the License, or (at +dnl your option) any later version. + +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. + +include(`../config.m4') + +C cycles/limb +C AMD K8,K9 1 +C AMD K10 1-2 (alignment fluctuations) +C AMD bd1 ? +C AMD bobcat 1.5 +C Intel P4 2.8 +C Intel core2 1 +C Intel NHM 1-1.25 +C Intel SBR 1 +C Intel atom 2.87 +C VIA nano 2 + +C INPUT PARAMETERS +C rp rdi +C up rsi +C n rdx + +define(`rp',`%rdi') +define(`up',`%rsi') +define(`n',`%rdx') + +ABI_SUPPORT(DOS64) +ABI_SUPPORT(STD64) + +ASM_START() + TEXT + ALIGN(16) +PROLOGUE(mpn_copyd) + DOS64_ENTRY(3) + sub $4, n + jl L(end) + ALIGN(16) +L(top): mov 24(up,n,8), %r8 + mov %r8, 24(rp,n,8) + mov 16(up,n,8), %r8 + mov %r8, 16(rp,n,8) + mov 8(up,n,8), %r8 + mov %r8, 8(rp,n,8) + mov (up,n,8), %r8 + mov %r8, (rp,n,8) +L(ent): sub $4, n + jge L(top) + +L(end): cmp $-4, R32(n) + jz L(ret) + mov 24(up,n,8), %r8 + mov %r8, 24(rp,n,8) + cmp $-3, R32(n) + jz L(ret) + mov 16(up,n,8), %r8 + mov %r8, 16(rp,n,8) + cmp $-2, R32(n) + jz L(ret) + mov 8(up,n,8), %r8 + mov %r8, 8(rp,n,8) + +L(ret): DOS64_EXIT() + ret +EPILOGUE() diff --git a/mpn/x86_64/bobcat/copyi.asm b/mpn/x86_64/bobcat/copyi.asm new file mode 100644 index 000000000..2433c7d55 --- /dev/null +++ b/mpn/x86_64/bobcat/copyi.asm @@ -0,0 +1,83 @@ +dnl AMD64 mpn_copyi optimised for AMD bobcat. + +dnl Copyright 2003, 2005, 2007, 2011, 2012 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. + +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of the GNU Lesser General Public License as published +dnl by the Free Software Foundation; either version 3 of the License, or (at +dnl your option) any later version. + +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. + +include(`../config.m4') + +C cycles/limb +C AMD K8,K9 1 +C AMD K10 1-2 (alignment fluctuations) +C AMD bd1 ? +C AMD bobcat 1.5 +C Intel P4 2.8 +C Intel core2 1 +C Intel NHM 1-1.25 +C Intel SBR 1 +C Intel atom 2.87 +C VIA nano 2 + +C INPUT PARAMETERS +C rp rdi +C up rsi +C n rdx + +define(`rp',`%rdi') +define(`up',`%rsi') +define(`n',`%rdx') + +ABI_SUPPORT(DOS64) +ABI_SUPPORT(STD64) + +ASM_START() + TEXT + ALIGN(16) +PROLOGUE(mpn_copyi) + DOS64_ENTRY(3) + lea -32(up,n,8), up + lea -32(rp,n,8), rp + neg n + add $4, n + jg L(end) + ALIGN(16) +L(top): mov (up,n,8), %r8 + mov %r8, (rp,n,8) + mov 8(up,n,8), %r8 + mov %r8, 8(rp,n,8) + mov 16(up,n,8), %r8 + mov %r8, 16(rp,n,8) + mov 24(up,n,8), %r8 + mov %r8, 24(rp,n,8) +L(ent): add $4, n + jle L(top) + +L(end): cmp $4, R32(n) + jz L(ret) + mov (up,n,8), %r8 + mov %r8, (rp,n,8) + cmp $3, R32(n) + jz L(ret) + mov 8(up,n,8), %r8 + mov %r8, 8(rp,n,8) + cmp $2, R32(n) + jz L(ret) + mov 16(up,n,8), %r8 + mov %r8, 16(rp,n,8) + +L(ret): DOS64_EXIT() + ret +EPILOGUE() |