summaryrefslogtreecommitdiff
path: root/mpn/s390_64
diff options
context:
space:
mode:
authorTorbjorn Granlund <tege@gmplib.org>2011-10-22 19:24:58 +0200
committerTorbjorn Granlund <tege@gmplib.org>2011-10-22 19:24:58 +0200
commitcf11f78d594ad8b099c0f2c35eb81e3eaadae48e (patch)
treeed6fccbc462e66ecc6c4c1c21569b5775d6bc713 /mpn/s390_64
parent39844cd46da5507d963e984e5e68354341d0554a (diff)
downloadgmp-cf11f78d594ad8b099c0f2c35eb81e3eaadae48e.tar.gz
Rewrite s390_64 add/sub code, move result to aors_n.asm.
Diffstat (limited to 'mpn/s390_64')
-rw-r--r--mpn/s390_64/aors_n.asm (renamed from mpn/s390_64/add_n.asm)72
-rw-r--r--mpn/s390_64/sub_n.asm105
2 files changed, 47 insertions, 130 deletions
diff --git a/mpn/s390_64/add_n.asm b/mpn/s390_64/aors_n.asm
index 3fa914e91..c39026f4f 100644
--- a/mpn/s390_64/add_n.asm
+++ b/mpn/s390_64/aors_n.asm
@@ -1,4 +1,4 @@
-dnl S/390-64 mpn_add_n
+dnl S/390-64 mpn_add_n and mpn_sub_n.
dnl Copyright 2011 Free Software Foundation, Inc.
@@ -20,8 +20,8 @@ dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
C cycles/limb
-C z900 6.5
-C z990 3.5
+C z900 5.5
+C z990 3
C z9 ?
C z10 ?
C z196 ?
@@ -36,36 +36,61 @@ define(`up', `%r3')
define(`vp', `%r4')
define(`n', `%r5')
+ifdef(`OPERATION_add_n', `
+ define(ADSB, alg)
+ define(ADSBCR, alcgr)
+ define(ADSBC, alcg)
+ define(RETVAL,`
+ lghi %r2, 0
+ alcgr %r2, %r2')
+ define(func, mpn_add_n)
+ define(func_nc, mpn_add_nc)')
+ifdef(`OPERATION_sub_n', `
+ define(ADSB, slg)
+ define(ADSBCR, slbgr)
+ define(ADSBC, slbg)
+ define(RETVAL,`
+ slbgr %r2, %r2
+ lcgr %r2, %r2')
+ define(func, mpn_sub_n)
+ define(func_nc, mpn_sub_nc)')
+
+MULFUNC_PROLOGUE(mpn_add_n mpn_sub_n)
+
ASM_START()
-PROLOGUE(mpn_add_n)
+PROLOGUE(func)
stmg %r6, %r12, 48(%r15)
- la %r1, 3(n)
+ aghi n, 3
lghi %r7, 3
- srlg %r1, %r1, 2
+ srlg %r1, n, 2
ngr %r7, n C n mod 4
- je L(top) C The C flag is clear
+ je L(b1)
cghi %r7, 2
- jl L(b1)
- je L(b2)
+ jl L(b2)
+ jne L(b0)
L(b3): lmg %r5, %r7, 0(up)
la up, 24(up)
- lmg %r9, %r11, 0(vp)
+ ADSB %r5, 0(vp)
+ ADSBC %r6, 8(vp)
+ ADSBC %r7, 16(vp)
la vp, 24(vp)
- algr %r5, %r9
- alcgr %r6, %r10
- alcgr %r7, %r11
stmg %r5, %r7, 0(rp)
la rp, 24(rp)
brctg %r1, L(top)
j L(end)
+L(b0): lmg %r5, %r8, 0(up) C This redundant insns is no mistake,
+ la up, 32(up) C it is needed to make main loop run
+ ADSB %r5, 0(vp) C fast for n = 0 (mod 4).
+ ADSBC %r6, 8(vp)
+ j L(m0)
+
L(b1): lg %r5, 0(up)
la up, 8(up)
- lg %r9, 0(vp)
+ ADSB %r5, 0(vp)
la vp, 8(vp)
- algr %r5, %r9
stg %r5, 0(rp)
la rp, 8(rp)
brctg %r1, L(top)
@@ -73,10 +98,9 @@ L(b1): lg %r5, 0(up)
L(b2): lmg %r5, %r6, 0(up)
la up, 16(up)
- lmg %r9, %r10, 0(vp)
+ ADSB %r5, 0(vp)
+ ADSBC %r6, 8(vp)
la vp, 16(vp)
- algr %r5, %r9
- alcgr %r6, %r10
stmg %r5, %r6, 0(rp)
la rp, 16(rp)
brctg %r1, L(top)
@@ -84,18 +108,16 @@ L(b2): lmg %r5, %r6, 0(up)
L(top): lmg %r5, %r8, 0(up)
la up, 32(up)
- lmg %r9, %r12, 0(vp)
+ ADSBC %r5, 0(vp)
+ ADSBC %r6, 8(vp)
+L(m0): ADSBC %r7, 16(vp)
+ ADSBC %r8, 24(vp)
la vp, 32(vp)
- alcgr %r5, %r9
- alcgr %r6, %r10
- alcgr %r7, %r11
- alcgr %r8, %r12
stmg %r5, %r8, 0(rp)
la rp, 32(rp)
brctg %r1, L(top)
-L(end): lghi %r2, 0
- alcgr %r2, %r2
+L(end): RETVAL
lmg %r6, %r12, 48(%r15)
br %r14
diff --git a/mpn/s390_64/sub_n.asm b/mpn/s390_64/sub_n.asm
deleted file mode 100644
index b1ab2d310..000000000
--- a/mpn/s390_64/sub_n.asm
+++ /dev/null
@@ -1,105 +0,0 @@
-dnl S/390-64 mpn_sub_n
-
-dnl Copyright 2011 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of the GNU Lesser General Public License as published
-dnl by the Free Software Foundation; either version 3 of the License, or (at
-dnl your option) any later version.
-
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-dnl License for more details.
-
-dnl You should have received a copy of the GNU Lesser General Public License
-dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C cycles/limb
-C z900 6.5
-C z990 3.5
-C z9 ?
-C z10 ?
-C z196 ?
-
-C TODO
-C * Optimise for small n
-C * Use r0 and save/restore one less register
-
-C INPUT PARAMETERS
-define(`rp', `%r2')
-define(`up', `%r3')
-define(`vp', `%r4')
-define(`n', `%r5')
-
-ASM_START()
-PROLOGUE(mpn_sub_n)
- stmg %r6, %r12, 48(%r15)
-
- la %r1, 3(n)
- lghi %r7, 3
- srlg %r1, %r1, 2
- ngr %r7, n C n mod 4
- je L(b0)
- cghi %r7, 2
- jl L(b1)
- je L(b2)
-
-L(b3): lmg %r5, %r7, 0(up)
- la up, 24(up)
- lmg %r9, %r11, 0(vp)
- la vp, 24(vp)
- slgr %r5, %r9
- slbgr %r6, %r10
- slbgr %r7, %r11
- stmg %r5, %r7, 0(rp)
- la rp, 24(rp)
- brctg %r1, L(top)
- j L(end)
-
-L(b0): slgr %r5, %r5 C set C flag
- j L(top)
-
-L(b1): lg %r5, 0(up)
- la up, 8(up)
- lg %r9, 0(vp)
- la vp, 8(vp)
- slgr %r5, %r9
- stg %r5, 0(rp)
- la rp, 8(rp)
- brctg %r1, L(top)
- j L(end)
-
-L(b2): lmg %r5, %r6, 0(up)
- la up, 16(up)
- lmg %r9, %r10, 0(vp)
- la vp, 16(vp)
- slgr %r5, %r9
- slbgr %r6, %r10
- stmg %r5, %r6, 0(rp)
- la rp, 16(rp)
- brctg %r1, L(top)
- j L(end)
-
-L(top): lmg %r5, %r8, 0(up)
- la up, 32(up)
- lmg %r9, %r12, 0(vp)
- la vp, 32(vp)
- slbgr %r5, %r9
- slbgr %r6, %r10
- slbgr %r7, %r11
- slbgr %r8, %r12
- stmg %r5, %r8, 0(rp)
- la rp, 32(rp)
- brctg %r1, L(top)
-
-L(end): slbgr %r2, %r2
- lcgr %r2, %r2
-
- lmg %r6, %r12, 48(%r15)
- br %r14
-EPILOGUE()