summaryrefslogtreecommitdiff
path: root/mpn/alpha
diff options
context:
space:
mode:
authorTorbjorn Granlund <tege@gmplib.org>2013-04-18 18:44:51 +0200
committerTorbjorn Granlund <tege@gmplib.org>2013-04-18 18:44:51 +0200
commit91d23ac4381e67dc8add0634335d40f6b018d2bc (patch)
tree8b8dbf105e0e76c6e0ba51fe79894bbaabc3e878 /mpn/alpha
parent46578354d021d0159fb4cd17491b34c7af145c31 (diff)
downloadgmp-91d23ac4381e67dc8add0634335d40f6b018d2bc.tar.gz
New alpha tabselect.
Diffstat (limited to 'mpn/alpha')
-rw-r--r--mpn/alpha/tabselect.asm126
1 files changed, 126 insertions, 0 deletions
diff --git a/mpn/alpha/tabselect.asm b/mpn/alpha/tabselect.asm
new file mode 100644
index 000000000..117c01fe1
--- /dev/null
+++ b/mpn/alpha/tabselect.asm
@@ -0,0 +1,126 @@
+dnl Alpha mpn_tabselect.
+
+dnl Contributed to the GNU project by Torbjörn Granlund.
+
+dnl Copyright 2011, 2012, 2013 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C EV4: ?
+C EV5: 2.25
+C EV6: 1.64
+
+define(`rp', `r16')
+define(`tp', `r17')
+define(`n', `r18')
+define(`nents', `r19')
+define(`which', `r20')
+
+define(`i', `r21')
+define(`j', `r22')
+define(`stride', `r23')
+define(`mask', `r24')
+define(`k', `r25')
+
+
+ASM_START()
+PROLOGUE(mpn_tabselect)
+ subq n, 4, j C outer loop induction variable
+
+ blt j, L(outer_end)
+L(outer_top):
+ mov tp, r8
+ lda r0, 0(r31)
+ lda r1, 0(r31)
+ lda r2, 0(r31)
+ lda r3, 0(r31)
+ subq j, 4, j C outer loop induction variable
+ subq nents, which, k
+ mov nents, i
+
+ ALIGN(16)
+L(top): ldq r4, 0(tp)
+ ldq r5, 8(tp)
+ cmpeq k, i, mask
+ subq i, 1, i
+ subq r31, mask, mask
+ ldq r6, 16(tp)
+ ldq r7, 24(tp)
+ and r4, mask, r4
+ and r5, mask, r5
+ or r0, r4, r0
+ or r1, r5, r1
+ and r6, mask, r6
+ and r7, mask, r7
+ or r2, r6, r2
+ or r3, r7, r3
+ s8addq n, tp, tp
+ bne i, L(top)
+
+ stq r0, 0(rp)
+ stq r1, 8(rp)
+ stq r2, 16(rp)
+ stq r3, 24(rp)
+ addq r8, 32, tp
+ addq rp, 32, rp
+ bge j, L(outer_top)
+L(outer_end):
+
+ and n, 2, r0
+ beq r0, L(b0x)
+L(b1x): mov tp, r8
+ lda r0, 0(r31)
+ lda r1, 0(r31)
+ subq nents, which, k
+ mov nents, i
+ ALIGN(16)
+L(tp2): ldq r4, 0(tp)
+ ldq r5, 8(tp)
+ cmpeq k, i, mask
+ subq i, 1, i
+ subq r31, mask, mask
+ and r4, mask, r4
+ and r5, mask, r5
+ or r0, r4, r0
+ or r1, r5, r1
+ s8addq n, tp, tp
+ bne i, L(tp2)
+ stq r0, 0(rp)
+ stq r1, 8(rp)
+ addq r8, 16, tp
+ addq rp, 16, rp
+
+L(b0x): and n, 1, r0
+ beq r0, L(b00)
+L(b01): lda r0, 0(r31)
+ subq nents, which, k
+ mov nents, i
+ ALIGN(16)
+L(tp1): ldq r4, 0(tp)
+ cmpeq k, i, mask
+ subq i, 1, i
+ subq r31, mask, mask
+ and r4, mask, r4
+ or r0, r4, r0
+ s8addq n, tp, tp
+ bne i, L(tp1)
+ stq r0, 0(rp)
+
+L(b00): ret r31, (r26), 1
+EPILOGUE()