summaryrefslogtreecommitdiff
path: root/mpn
diff options
context:
space:
mode:
authorTorbjorn Granlund <tg@gmplib.org>2021-11-02 01:47:16 +0100
committerTorbjorn Granlund <tg@gmplib.org>2021-11-02 01:47:16 +0100
commitcc12fd3d8681d3aa29eaa6af9e7f6f9815f721a4 (patch)
tree873d8aed5e2c08aa03a4500e5eceddcc316a85e1 /mpn
parentc86a8d38231dbe7e15425b1303168088b37ccd03 (diff)
downloadgmp-cc12fd3d8681d3aa29eaa6af9e7f6f9815f721a4.tar.gz
Rewrite recently added s390_64 sec_tabselect.
Diffstat (limited to 'mpn')
-rw-r--r--mpn/s390_64/sec_tabselect.asm113
1 files changed, 80 insertions, 33 deletions
diff --git a/mpn/s390_64/sec_tabselect.asm b/mpn/s390_64/sec_tabselect.asm
index 4bfee09ed..f01cbd780 100644
--- a/mpn/s390_64/sec_tabselect.asm
+++ b/mpn/s390_64/sec_tabselect.asm
@@ -31,14 +31,14 @@ dnl see https://www.gnu.org/licenses/.
include(`../config.m4')
C cycles/limb
-C z900 ?
+C z900 - slfi unsupported
C z990 ?
C z9 ?
C z10 ?
C z196 ?
C z13 ?
C z14 ?
-C z15 ?
+C z15 1.6
dnl void
dnl mpn_sec_tabselect (volatile mp_limb_t *rp, volatile const mp_limb_t *tab,
@@ -48,42 +48,89 @@ define(`rp', `%r2')
define(`tp', `%r3')
define(`n', `%r4')
define(`nents', `%r5')
-define(`which', `%r6')
+define(`which_arg',`%r6') C magicked to stack
-ASM_START()
-PROLOGUE(mpn_sec_tabselect)
- stmg %r7, %r8, 56(%r15)
- lgr %r8, n
- sllg n, n, 3
+dnl r0 r1 r2 r3 r4 r5 r6 r7
+dnl r8 r9 r10 r11 r12 r13 r14 r15
-L(cpy): lg %r0, 0(tp)
- stg %r0, 0(rp)
- aghi tp, 8
- aghi rp, 8
- brctg %r8, L(cpy)
+define(`mask', `%r14')
+define(`k', `%r1')
+define(`which', `%r0')
- aghi nents, -1
- jle L(ret)
- slfi which, 1
-
-L(outer):
- slfi which, 1
- slbgr %r0, %r0
- sgr rp, n
- srlg %r8, n, 3
+define(`FRAME', 64)
-L(top): lg %r1, 0(rp)
- lg %r7, 0(tp)
- xgr %r7, %r1
- ngr %r7, %r0
- xgr %r1, %r7
- stg %r1, 0(rp)
- aghi tp, 8
- aghi rp, 8
- brctg %r8, L(top)
+ASM_START()
+PROLOGUE(mpn_sec_tabselect)
+ stmg %r5, %r15, 40(%r15)
+ aghi %r15, -FRAME
- brctg nents, L(outer)
+ sllg n, n, 3
+ msgr %r5, n
+ stg %r5, 16(%r15) C nents * n * LIMB_BYTES
-L(ret): lmg %r7, %r8, 56(%r15)
+ srlg %r5, n, 2+3
+ ngr %r5, %r5
+ je L(end4)
+L(outer):
+ lg which, eval(48+FRAME)(%r15)
+ lg k, eval(40+FRAME)(%r15) C nents
+ lghi %r6, 0
+ lghi %r7, 0
+ lghi %r8, 0
+ lghi %r9, 0
+L(tp4): slfi which, 1
+ slbgr mask, mask
+ lmg %r10, %r13, 0(tp)
+ ngr %r10, mask
+ ngr %r11, mask
+ ngr %r12, mask
+ ngr %r13, mask
+ agr %r6, %r10
+ agr %r7, %r11
+ agr %r8, %r12
+ agr %r9, %r13
+ agr tp, n
+ brctg k, L(tp4)
+ stmg %r6, %r9, 0(rp)
+ aghi rp, 32
+ slg tp, 16(%r15)
+ aghi tp, eval(4*8)
+ brctg %r5, L(outer)
+L(end4):
+ tmll n, 16
+ je L(end2)
+ lg which, eval(48+FRAME)(%r15)
+ lg k, eval(40+FRAME)(%r15) C nents
+ lghi %r6, 0
+ lghi %r7, 0
+L(tp2): slfi which, 1
+ slbgr mask, mask
+ lmg %r10, %r11, 0(tp)
+ ngr %r10, mask
+ ngr %r11, mask
+ agr %r6, %r10
+ agr %r7, %r11
+ agr tp, n
+ brctg k, L(tp2)
+ stmg %r6, %r7, 0(rp)
+ aghi rp, 16
+ slg tp, 16(%r15)
+ aghi tp, eval(2*8)
+L(end2):
+ tmll n, 8
+ je L(end1)
+ lg which, eval(48+FRAME)(%r15)
+ lg k, eval(40+FRAME)(%r15) C nents
+ lghi %r6, 0
+L(tp1): slfi which, 1
+ slbgr mask, mask
+ lg %r10, 0(tp)
+ ngr %r10, mask
+ agr %r6, %r10
+ agr tp, n
+ brctg k, L(tp1)
+ stg %r6, 0(rp)
+L(end1):
+ lmg %r5, %r15, eval(40+FRAME)(%r15)
br %r14
EPILOGUE()