diff options
author | Torbjorn Granlund <tege@gmplib.org> | 2011-11-15 00:53:06 +0100 |
---|---|---|
committer | Torbjorn Granlund <tege@gmplib.org> | 2011-11-15 00:53:06 +0100 |
commit | aebd2151218bded6e4278834b9f082808eef6590 (patch) | |
tree | 4a7f12237027bda254f48a12a5c674cd06c55417 /mpn/x86_64 | |
parent | e1d8e2b8173bbd8e9b034722206979eef782df2c (diff) | |
download | gmp-aebd2151218bded6e4278834b9f082808eef6590.tar.gz |
Add mpn_tabselect assembly support for powerpc64, x86, x86_64, ia64.
Diffstat (limited to 'mpn/x86_64')
-rw-r--r-- | mpn/x86_64/tabselect.asm | 108 |
1 files changed, 108 insertions, 0 deletions
diff --git a/mpn/x86_64/tabselect.asm b/mpn/x86_64/tabselect.asm new file mode 100644 index 000000000..f7de6a85b --- /dev/null +++ b/mpn/x86_64/tabselect.asm @@ -0,0 +1,108 @@ +dnl AMD64 mpn_tabselect. + +dnl Copyright 2011 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. + +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of the GNU Lesser General Public License as published +dnl by the Free Software Foundation; either version 3 of the License, or (at +dnl your option) any later version. + +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. + +include(`../config.m4') + + +C cycles/limb +C AMD K8,K9 ? +C AMD K10 ? +C Intel P4 ? +C Intel core2 ? +C Intel NHM ? +C Intel SBR ? +C Intel atom ? +C VIA nano ? + +C NOTES +C * This has not been tuned for any specific processor. Its speed should not +C be too bad, though. +C * Using SSE2/AVX2 could result in many-fold speedup. + +C mpn_tabselect (mp_limb_t *rp, mp_limb_t *tp, mp_size_t n, mp_size_t nents, mp_size_t which) +define(`rp', `%rdi') +define(`tp', `%rsi') +define(`n', `%rdx') +define(`nents', `%rcx') +define(`which', `%r8') + +define(`i', `%rbp') +define(`maskp', `%r11') +define(`maskn', `%r12') + +C rax rbx rcx rdx rdi rsi rbp (rsp) r8 r9 r10 r11 r12 r13 r14 r15 +C nents n rp tab which + +ASM_START() + TEXT + ALIGN(16) +PROLOGUE(mpn_tabselect) + push %rbx + push %rbp + push %r12 + + lea (rp,n,8), rp + lea (tp,n,8), tp + sub nents, which +L(outer): + lea (which,nents), %rax + neg %rax C set CF iff 'which' != k + sbb maskn, maskn + mov maskn, maskp + not maskp + + mov n, i + neg i + test $1, R32(n) + je L(top) + mov (tp,i,8), %rax + and maskp, %rax + mov (rp,i,8), %r9 + and maskn, %r9 + or %r9, %rax + mov %rax, (rp,i,8) + add $1, i + jns L(end) + + ALIGN(16) +L(top): mov (tp,i,8), %rax + mov 8(tp,i,8), %rbx + and maskp, %rax + and maskp, %rbx + mov (rp,i,8), %r9 + mov 8(rp,i,8), %r10 + and maskn, %r9 + and maskn, %r10 + or %r9, %rax + or %r10, %rbx + mov %rax, (rp,i,8) + mov %rbx, 8(rp,i,8) + add $2, i + js L(top) + +L(end): lea (tp,n,8), tp + dec nents + jne L(outer) + +L(outer_end): + pop %r12 + pop %rbp + pop %rbx + ret +EPILOGUE() |