summaryrefslogtreecommitdiff
path: root/mpn
diff options
context:
space:
mode:
authortege <tege@gmplib.org>2006-11-02 16:36:46 +0100
committertege <tege@gmplib.org>2006-11-02 16:36:46 +0100
commit35a4ff9cf4ff6cff496f9eab3b63aded9bef1f76 (patch)
treeae9ea97e32d8ae7c82a107db5f5ab4cadb55e6bf /mpn
parent48f22cf7ca01f0c2ee8b559c1e81b78a9b70f2b6 (diff)
downloadgmp-35a4ff9cf4ff6cff496f9eab3b63aded9bef1f76.tar.gz
*** empty log message ***
Diffstat (limited to 'mpn')
-rw-r--r--mpn/x86_64/core2/lshift.asm78
-rw-r--r--mpn/x86_64/core2/rshift.asm80
2 files changed, 158 insertions, 0 deletions
diff --git a/mpn/x86_64/core2/lshift.asm b/mpn/x86_64/core2/lshift.asm
new file mode 100644
index 000000000..9b66d6bbc
--- /dev/null
+++ b/mpn/x86_64/core2/lshift.asm
@@ -0,0 +1,78 @@
+dnl x86 mpn_lshift -- mpn left shift.
+
+dnl Copyright 1992, 1994, 1996, 1999, 2000, 2001, 2002, 2006 Free Software
+dnl Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 51 Franklin Street,
+dnl Fifth Floor, Boston, MA 02110-1301, USA.
+
+include(`../config.m4')
+
+
+C cycles/limb
+C K8: 4.5
+C P4: 16
+C P6-15: 1.75
+
+C This code was created from the generic x86 code, with minor improvements.
+C With more unrolling, it should be possible to approach 1 cycle/limb on P6-15.
+
+
+C INPUT PARAMETERS
+C rp rdi
+C up rsi
+C n rdx
+C cnt rcx
+
+ASM_START()
+ TEXT
+ ALIGN(8)
+PROLOGUE(mpn_lshift)
+
+ sub $8, %rsi C adjust src
+
+ mov (%rsi,%rdx,8), %r8 C read most significant limb
+ xorl %eax, %eax
+ shld %cl, %r8, %rax C compute carry limb
+ dec %rdx
+ jz L(end)
+ testb $1, %dl
+ jz L(1) C enter loop in the middle
+ inc %rdx
+ jmp L(mid)
+
+L(1): mov %r8, %r9
+
+ ALIGN(8)
+L(top): mov (%rsi,%rdx,8), %r8 C load next lower limb
+ shld %cl, %r8, %r9 C compute result limb
+ mov %r9, (%rdi,%rdx,8) C store it
+L(mid): mov -8(%rsi,%rdx,8), %r9
+ shld %cl, %r9, %r8
+ mov %r8, -8(%rdi,%rdx,8)
+ sub $2, %rdx
+ jnz L(top)
+
+ shl %cl, %r9 C compute least significant limb
+ mov %r9, (%rdi) C store it
+ ret
+
+L(end): shl %cl, %r8 C compute least significant limb
+ mov %r8, (%rdi) C store it
+ ret
+
+EPILOGUE()
diff --git a/mpn/x86_64/core2/rshift.asm b/mpn/x86_64/core2/rshift.asm
new file mode 100644
index 000000000..0fe5ef3ec
--- /dev/null
+++ b/mpn/x86_64/core2/rshift.asm
@@ -0,0 +1,80 @@
+dnl x86 mpn_rshift -- mpn right shift.
+
+dnl Copyright 1992, 1994, 1996, 1999, 2000, 2001, 2002, 2006 Free Software
+dnl Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 51 Franklin Street,
+dnl Fifth Floor, Boston, MA 02110-1301, USA.
+
+include(`../config.m4')
+
+
+C cycles/limb
+C K8: 4.5
+C P4: 13
+C P6-15: 1.75
+
+C This code was created from the generic x86 code, with minor improvements.
+C With more unrolling, it should be possible to approach 1 cycle/limb on P6-15.
+
+
+C INPUT PARAMETERS
+C rp rdi
+C up rsi
+C n rdx
+C cnt rcx
+
+ASM_START()
+ TEXT
+ ALIGN(8)
+PROLOGUE(mpn_rshift)
+
+ lea -8(%rdi,%rdx,8), %rdi
+ lea (%rsi,%rdx,8), %rsi
+ neg %rdx
+
+ mov (%rsi,%rdx,8), %r8 C read least significant limb
+ xorl %eax, %eax
+ shrd %cl, %r8, %rax C compute carry limb
+ inc %rdx
+ jz L(end)
+ testb $1, %dl
+ jz L(1) C enter loop in the middle
+ dec %rdx
+ jmp L(mid)
+
+L(1): mov %r8, %r9
+
+ ALIGN(8)
+L(top): mov (%rsi,%rdx,8), %r8 C load next higher limb
+ shrd %cl, %r8, %r9 C compute result limb
+ mov %r9, (%rdi,%rdx,8) C store it
+L(mid): mov 8(%rsi,%rdx,8), %r9
+ shrd %cl, %r9, %r8
+ mov %r8, 8(%rdi,%rdx,8)
+ add $2, %rdx
+ jnz L(top)
+
+ shr %cl, %r9 C compute most significant limb
+ mov %r9, (%rdi) C store it
+ ret
+
+L(end): shr %cl, %r8 C compute most significant limb
+ mov %r8, (%rdi) C store it
+ ret
+
+EPILOGUE()