From cc12fd3d8681d3aa29eaa6af9e7f6f9815f721a4 Mon Sep 17 00:00:00 2001 From: Torbjorn Granlund Date: Tue, 2 Nov 2021 01:47:16 +0100 Subject: Rewrite recently added s390_64 sec_tabselect. --- mpn/s390_64/sec_tabselect.asm | 113 ++++++++++++++++++++++++++++++------------ 1 file changed, 80 insertions(+), 33 deletions(-) (limited to 'mpn') diff --git a/mpn/s390_64/sec_tabselect.asm b/mpn/s390_64/sec_tabselect.asm index 4bfee09ed..f01cbd780 100644 --- a/mpn/s390_64/sec_tabselect.asm +++ b/mpn/s390_64/sec_tabselect.asm @@ -31,14 +31,14 @@ dnl see https://www.gnu.org/licenses/. include(`../config.m4') C cycles/limb -C z900 ? +C z900 - slfi unsupported C z990 ? C z9 ? C z10 ? C z196 ? C z13 ? C z14 ? -C z15 ? +C z15 1.6 dnl void dnl mpn_sec_tabselect (volatile mp_limb_t *rp, volatile const mp_limb_t *tab, @@ -48,42 +48,89 @@ define(`rp', `%r2') define(`tp', `%r3') define(`n', `%r4') define(`nents', `%r5') -define(`which', `%r6') +define(`which_arg',`%r6') C magicked to stack -ASM_START() -PROLOGUE(mpn_sec_tabselect) - stmg %r7, %r8, 56(%r15) - lgr %r8, n - sllg n, n, 3 +dnl r0 r1 r2 r3 r4 r5 r6 r7 +dnl r8 r9 r10 r11 r12 r13 r14 r15 -L(cpy): lg %r0, 0(tp) - stg %r0, 0(rp) - aghi tp, 8 - aghi rp, 8 - brctg %r8, L(cpy) +define(`mask', `%r14') +define(`k', `%r1') +define(`which', `%r0') - aghi nents, -1 - jle L(ret) - slfi which, 1 - -L(outer): - slfi which, 1 - slbgr %r0, %r0 - sgr rp, n - srlg %r8, n, 3 +define(`FRAME', 64) -L(top): lg %r1, 0(rp) - lg %r7, 0(tp) - xgr %r7, %r1 - ngr %r7, %r0 - xgr %r1, %r7 - stg %r1, 0(rp) - aghi tp, 8 - aghi rp, 8 - brctg %r8, L(top) +ASM_START() +PROLOGUE(mpn_sec_tabselect) + stmg %r5, %r15, 40(%r15) + aghi %r15, -FRAME - brctg nents, L(outer) + sllg n, n, 3 + msgr %r5, n + stg %r5, 16(%r15) C nents * n * LIMB_BYTES -L(ret): lmg %r7, %r8, 56(%r15) + srlg %r5, n, 2+3 + ngr %r5, %r5 + je L(end4) +L(outer): + lg which, eval(48+FRAME)(%r15) + lg k, eval(40+FRAME)(%r15) C nents + lghi %r6, 0 + lghi %r7, 0 + lghi %r8, 0 + lghi %r9, 0 +L(tp4): slfi which, 1 + slbgr mask, mask + lmg %r10, %r13, 0(tp) + ngr %r10, mask + ngr %r11, mask + ngr %r12, mask + ngr %r13, mask + agr %r6, %r10 + agr %r7, %r11 + agr %r8, %r12 + agr %r9, %r13 + agr tp, n + brctg k, L(tp4) + stmg %r6, %r9, 0(rp) + aghi rp, 32 + slg tp, 16(%r15) + aghi tp, eval(4*8) + brctg %r5, L(outer) +L(end4): + tmll n, 16 + je L(end2) + lg which, eval(48+FRAME)(%r15) + lg k, eval(40+FRAME)(%r15) C nents + lghi %r6, 0 + lghi %r7, 0 +L(tp2): slfi which, 1 + slbgr mask, mask + lmg %r10, %r11, 0(tp) + ngr %r10, mask + ngr %r11, mask + agr %r6, %r10 + agr %r7, %r11 + agr tp, n + brctg k, L(tp2) + stmg %r6, %r7, 0(rp) + aghi rp, 16 + slg tp, 16(%r15) + aghi tp, eval(2*8) +L(end2): + tmll n, 8 + je L(end1) + lg which, eval(48+FRAME)(%r15) + lg k, eval(40+FRAME)(%r15) C nents + lghi %r6, 0 +L(tp1): slfi which, 1 + slbgr mask, mask + lg %r10, 0(tp) + ngr %r10, mask + agr %r6, %r10 + agr tp, n + brctg k, L(tp1) + stg %r6, 0(rp) +L(end1): + lmg %r5, %r15, eval(40+FRAME)(%r15) br %r14 EPILOGUE() -- cgit v1.2.1