summaryrefslogtreecommitdiff
path: root/mpn/sparc64
diff options
context:
space:
mode:
authorTorbjorn Granlund <tege@gmplib.org>2013-04-15 21:53:23 +0200
committerTorbjorn Granlund <tege@gmplib.org>2013-04-15 21:53:23 +0200
commitdcedfd3be9fee5c66e8958caf8d39cfbc5ff30e6 (patch)
tree33bcb71411735db4adaf2abe57f5eba99b292f05 /mpn/sparc64
parent39829bbfa56350dd05fcda45b488f646a55b06cf (diff)
downloadgmp-dcedfd3be9fee5c66e8958caf8d39cfbc5ff30e6.tar.gz
Add generic sparc64 tabselect.
Diffstat (limited to 'mpn/sparc64')
-rw-r--r--mpn/sparc64/tabselect.asm151
1 files changed, 151 insertions, 0 deletions
diff --git a/mpn/sparc64/tabselect.asm b/mpn/sparc64/tabselect.asm
new file mode 100644
index 000000000..a29d5ead8
--- /dev/null
+++ b/mpn/sparc64/tabselect.asm
@@ -0,0 +1,151 @@
+dnl SPARC v9 mpn_tabselect.
+
+dnl Contributed to the GNU project by Torbjörn Granlund and David Miller.
+
+dnl Copyright 2013 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C UltraSPARC 1&2: 2 hopefully
+C UltraSPARC 3: 3
+C UltraSPARC T1: 17
+C UltraSPARC T3: ?
+C UltraSPARC T4/T5: 2.25 hopefully
+
+C INPUT PARAMETERS
+define(`rp', `%i0')
+define(`tp', `%i1')
+define(`n', `%i2')
+define(`nents', `%i3')
+define(`which', `%i4')
+
+define(`i', `%g1')
+define(`j', `%g3')
+define(`stride', `%g4')
+define(`tporig', `%g5')
+define(`mask', `%o0')
+
+define(`data0', `%l0')
+define(`data1', `%l1')
+define(`data2', `%l2')
+define(`data3', `%l3')
+define(`t0', `%l4')
+define(`t1', `%l5')
+define(`t2', `%l6')
+define(`t3', `%l7')
+
+ASM_START()
+ REGISTER(%g2,#scratch)
+ REGISTER(%g3,#scratch)
+PROLOGUE(mpn_tabselect)
+ save %sp, -176, %sp
+
+ sllx n, 3, stride
+ sub n, 4, j
+ brlz j, L(outer_end)
+ mov tp, tporig
+
+L(outer_loop):
+ clr data0
+ clr data1
+ clr data2
+ clr data3
+ mov tporig, tp
+ mov nents, i
+ mov which, %o1
+
+L(top): subcc %o1, 1, %o1 C set carry iff o1 = 0
+ ldx [tp + 0], t0
+ subc %g0, %g0, mask
+ ldx [tp + 8], t1
+ sub i, 1, i
+ ldx [tp + 16], t2
+ ldx [tp + 24], t3
+ add tp, stride, tp
+ and t0, mask, t0
+ and t1, mask, t1
+ or t0, data0, data0
+ and t2, mask, t2
+ or t1, data1, data1
+ and t3, mask, t3
+ or t2, data2, data2
+ brnz i, L(top)
+ or t3, data3, data3
+
+ stx data0, [rp + 0]
+ subcc j, 4, j
+ stx data1, [rp + 8]
+ stx data2, [rp + 16]
+ stx data3, [rp + 24]
+ add tporig, (4 * 8), tporig
+
+ brgez j, L(outer_loop)
+ add rp, (4 * 8), rp
+L(outer_end):
+
+
+ andcc n, 2, %g0
+ be L(b0x)
+ nop
+L(b1x): clr data0
+ clr data1
+ mov tporig, tp
+ mov nents, i
+ mov which, %o1
+
+L(tp2): subcc %o1, 1, %o1
+ ldx [tp + 0], t0
+ subc %g0, %g0, mask
+ ldx [tp + 8], t1
+ sub i, 1, i
+ add tp, stride, tp
+ and t0, mask, t0
+ and t1, mask, t1
+ or t0, data0, data0
+ brnz i, L(tp2)
+ or t1, data1, data1
+
+ stx data0, [rp + 0]
+ stx data1, [rp + 8]
+ add tporig, (2 * 8), tporig
+ add rp, (2 * 8), rp
+
+
+L(b0x): andcc n, 1, %g0
+ be L(b00)
+ nop
+L(b01): clr data0
+ mov tporig, tp
+ mov nents, i
+ mov which, %o1
+
+L(tp1): subcc %o1, 1, %o1
+ ldx [tp + 0], t0
+ subc %g0, %g0, mask
+ sub i, 1, i
+ add tp, stride, tp
+ and t0, mask, t0
+ brnz i, L(tp1)
+ or t0, data0, data0
+
+ stx data0, [rp + 0]
+
+L(b00): ret
+ restore
+EPILOGUE()