summaryrefslogtreecommitdiff
path: root/mpn/s390_64
diff options
context:
space:
mode:
authorTorbjorn Granlund <tege@gmplib.org>2011-10-23 02:17:23 +0200
committerTorbjorn Granlund <tege@gmplib.org>2011-10-23 02:17:23 +0200
commitb09dd37bf6a15c8c25be2e52dff0527d180631cb (patch)
treeeccb990dc0e26099bb2511c87d1839eee5e2a0b1 /mpn/s390_64
parentc87818b52c365dca1cea4cccbbb55fe985f6ae63 (diff)
downloadgmp-b09dd37bf6a15c8c25be2e52dff0527d180631cb.tar.gz
Save/restore only used registers.
Diffstat (limited to 'mpn/s390_64')
-rw-r--r--mpn/s390_64/aors_n.asm11
-rw-r--r--mpn/s390_64/logops_n.asm136
2 files changed, 74 insertions, 73 deletions
diff --git a/mpn/s390_64/aors_n.asm b/mpn/s390_64/aors_n.asm
index c39026f4f..6d22ff9ea 100644
--- a/mpn/s390_64/aors_n.asm
+++ b/mpn/s390_64/aors_n.asm
@@ -29,6 +29,8 @@ C z196 ?
C TODO
C * Optimise for small n
C * Use r0 and save/restore one less register
+C * Using logops_n's v1 inner loop operand order make the loop about 20%
+C faster, at the expense of highly alignment-dependent performance.
C INPUT PARAMETERS
define(`rp', `%r2')
@@ -40,7 +42,7 @@ ifdef(`OPERATION_add_n', `
define(ADSB, alg)
define(ADSBCR, alcgr)
define(ADSBC, alcg)
- define(RETVAL,`
+ define(RETVAL,`dnl
lghi %r2, 0
alcgr %r2, %r2')
define(func, mpn_add_n)
@@ -49,7 +51,7 @@ ifdef(`OPERATION_sub_n', `
define(ADSB, slg)
define(ADSBCR, slbgr)
define(ADSBC, slbg)
- define(RETVAL,`
+ define(RETVAL,`dnl
slbgr %r2, %r2
lcgr %r2, %r2')
define(func, mpn_sub_n)
@@ -59,7 +61,7 @@ MULFUNC_PROLOGUE(mpn_add_n mpn_sub_n)
ASM_START()
PROLOGUE(func)
- stmg %r6, %r12, 48(%r15)
+ stmg %r6, %r8, 48(%r15)
aghi n, 3
lghi %r7, 3
@@ -118,7 +120,6 @@ L(m0): ADSBC %r7, 16(vp)
brctg %r1, L(top)
L(end): RETVAL
-
- lmg %r6, %r12, 48(%r15)
+ lmg %r6, %r8, 48(%r15)
br %r14
EPILOGUE()
diff --git a/mpn/s390_64/logops_n.asm b/mpn/s390_64/logops_n.asm
index 719007ed7..dc3652db5 100644
--- a/mpn/s390_64/logops_n.asm
+++ b/mpn/s390_64/logops_n.asm
@@ -71,7 +71,7 @@ MULFUNC_PROLOGUE(mpn_and_n mpn_andn_n mpn_nand_n mpn_ior_n mpn_iorn_n mpn_nior_n
ASM_START()
PROLOGUE(func)
ifdef(`VARIANT_1',`
- stmg %r6, %r13, 48(%r15)
+ stmg %r6, %r8, 48(%r15)
aghi n, 3
lghi %r7, 3
srlg %r0, n, 2
@@ -81,50 +81,50 @@ ifdef(`VARIANT_1',`
jl L(b2)
jne L(top)
-L(b3): lmg %r6, %r8, 0(up)
+L(b3): lmg %r5, %r7, 0(up)
la up, 24(up)
- LOGOP %r6, 0(vp)
- LOGOP %r7, 8(vp)
- LOGOP %r8, 16(vp)
- stmg %r6, %r8, 0(rp)
+ LOGOP %r5, 0(vp)
+ LOGOP %r6, 8(vp)
+ LOGOP %r7, 16(vp)
+ stmg %r5, %r7, 0(rp)
la rp, 24(rp)
la vp, 24(vp)
j L(mid)
-L(b1): lg %r6, 0(up)
+L(b1): lg %r5, 0(up)
la up, 8(up)
- LOGOP %r6, 0(vp)
- stg %r6, 0(rp)
+ LOGOP %r5, 0(vp)
+ stg %r5, 0(rp)
la rp, 8(rp)
la vp, 8(vp)
j L(mid)
-L(b2): lmg %r6, %r7, 0(up)
+L(b2): lmg %r5, %r6, 0(up)
la up, 16(up)
- LOGOP %r6, 0(vp)
- LOGOP %r7, 8(vp)
- stmg %r6, %r7, 0(rp)
+ LOGOP %r5, 0(vp)
+ LOGOP %r6, 8(vp)
+ stmg %r5, %r6, 0(rp)
la rp, 16(rp)
la vp, 16(vp)
j L(mid)
-L(top): lmg %r6, %r9, 0(up)
+L(top): lmg %r5, %r8, 0(up)
la up, 32(up)
- LOGOP %r6, 0(vp)
- LOGOP %r7, 8(vp)
- LOGOP %r8, 16(vp)
- LOGOP %r9, 24(vp)
- stmg %r6, %r9, 0(rp)
+ LOGOP %r5, 0(vp)
+ LOGOP %r6, 8(vp)
+ LOGOP %r7, 16(vp)
+ LOGOP %r8, 24(vp)
+ stmg %r5, %r8, 0(rp)
la rp, 32(rp)
la vp, 32(vp)
L(mid): brctg %r0, L(top)
- lmg %r6, %r13, 48(%r15)
+ lmg %r6, %r8, 48(%r15)
br %r14
')
ifdef(`VARIANT_2',`
- stmg %r6, %r13, 48(%r15)
+ stmg %r6, %r8, 48(%r15)
lghi %r1, -1
aghi n, 3
@@ -136,60 +136,60 @@ ifdef(`VARIANT_2',`
jl L(b2)
jne L(top)
-L(b3): lmg %r6, %r8, 0(vp)
+L(b3): lmg %r5, %r7, 0(vp)
la vp, 24(vp)
+ xgr %r5, %r1
xgr %r6, %r1
xgr %r7, %r1
- xgr %r8, %r1
- LOGOP %r6, 0(up)
- LOGOP %r7, 8(up)
- LOGOP %r8, 16(up)
- stmg %r6, %r8, 0(rp)
+ LOGOP %r5, 0(up)
+ LOGOP %r6, 8(up)
+ LOGOP %r7, 16(up)
+ stmg %r5, %r7, 0(rp)
la rp, 24(rp)
la up, 24(up)
j L(mid)
-L(b1): lg %r6, 0(vp)
+L(b1): lg %r5, 0(vp)
la vp, 8(vp)
- xgr %r6, %r1
- LOGOP %r6, 0(up)
- stg %r6, 0(rp)
+ xgr %r5, %r1
+ LOGOP %r5, 0(up)
+ stg %r5, 0(rp)
la rp, 8(rp)
la up, 8(up)
j L(mid)
-L(b2): lmg %r6, %r7, 0(vp)
+L(b2): lmg %r5, %r6, 0(vp)
la vp, 16(vp)
+ xgr %r5, %r1
xgr %r6, %r1
- xgr %r7, %r1
- LOGOP %r6, 0(up)
- LOGOP %r7, 8(up)
- stmg %r6, %r7, 0(rp)
+ LOGOP %r5, 0(up)
+ LOGOP %r6, 8(up)
+ stmg %r5, %r6, 0(rp)
la rp, 16(rp)
la up, 16(up)
j L(mid)
-L(top): lmg %r6, %r9, 0(vp)
+L(top): lmg %r5, %r8, 0(vp)
la vp, 32(vp)
+ xgr %r5, %r1
xgr %r6, %r1
xgr %r7, %r1
xgr %r8, %r1
- xgr %r9, %r1
- LOGOP %r6, 0(up)
- LOGOP %r7, 8(up)
- LOGOP %r8, 16(up)
- LOGOP %r9, 24(up)
+ LOGOP %r5, 0(up)
+ LOGOP %r6, 8(up)
+ LOGOP %r7, 16(up)
+ LOGOP %r8, 24(up)
la up, 32(up)
- stmg %r6, %r9, 0(rp)
+ stmg %r5, %r8, 0(rp)
la rp, 32(rp)
L(mid): brctg %r0, L(top)
- lmg %r6, %r13, 48(%r15)
+ lmg %r6, %r8, 48(%r15)
br %r14
')
ifdef(`VARIANT_3',`
- stmg %r6, %r13, 48(%r15)
+ stmg %r6, %r8, 48(%r15)
srlg %r0, n, 2
lghi %r1, -1
@@ -202,55 +202,55 @@ ifdef(`VARIANT_3',`
jl L(b2)
jne L(top)
-L(b3): lmg %r6, %r8, 0(vp)
+L(b3): lmg %r5, %r7, 0(vp)
la vp, 24(vp)
- LOGOP %r6, 0(up)
- LOGOP %r7, 8(up)
+ LOGOP %r5, 0(up)
+ LOGOP %r6, 8(up)
+ xgr %r5, %r1
xgr %r6, %r1
+ LOGOP %r7, 16(up)
xgr %r7, %r1
- LOGOP %r8, 16(up)
- xgr %r8, %r1
- stmg %r6, %r8, 0(rp)
+ stmg %r5, %r7, 0(rp)
la rp, 24(rp)
la up, 24(up)
j L(mid)
-L(b1): lg %r6, 0(vp)
+L(b1): lg %r5, 0(vp)
la vp, 8(vp)
- LOGOP %r6, 0(up)
- xgr %r6, %r1
- stg %r6, 0(rp)
+ LOGOP %r5, 0(up)
+ xgr %r5, %r1
+ stg %r5, 0(rp)
la rp, 8(rp)
la up, 8(up)
j L(mid)
-L(b2): lmg %r6, %r7, 0(vp)
+L(b2): lmg %r5, %r6, 0(vp)
la vp, 16(vp)
- LOGOP %r6, 0(up)
- LOGOP %r7, 8(up)
+ LOGOP %r5, 0(up)
+ LOGOP %r6, 8(up)
+ xgr %r5, %r1
xgr %r6, %r1
- xgr %r7, %r1
- stmg %r6, %r7, 0(rp)
+ stmg %r5, %r6, 0(rp)
la rp, 16(rp)
la up, 16(up)
j L(mid)
-L(top): lmg %r6, %r9, 0(vp)
+L(top): lmg %r5, %r8, 0(vp)
la vp, 32(vp)
- LOGOP %r6, 0(up)
- LOGOP %r7, 8(up)
+ LOGOP %r5, 0(up)
+ LOGOP %r6, 8(up)
+ xgr %r5, %r1
xgr %r6, %r1
+ LOGOP %r7, 16(up)
+ LOGOP %r8, 24(up)
xgr %r7, %r1
- LOGOP %r8, 16(up)
- LOGOP %r9, 24(up)
xgr %r8, %r1
- xgr %r9, %r1
- stmg %r6, %r9, 0(rp)
+ stmg %r5, %r8, 0(rp)
la up, 32(up)
la rp, 32(rp)
L(mid): brctg %r0, L(top)
- lmg %r6, %r13, 48(%r15)
+ lmg %r6, %r8, 48(%r15)
br %r14
')