summaryrefslogtreecommitdiff
path: root/sysdeps/s390
diff options
context:
space:
mode:
authorStefan Liebler <stli@linux.ibm.com>2020-06-26 09:45:11 +0200
committerStefan Liebler <stli@linux.ibm.com>2020-06-26 09:45:11 +0200
commit1d21fb1061cbeb50414a8f371abb36548d90f150 (patch)
treef1bc239f508120eb85c1d8c53b51906edbe72500 /sysdeps/s390
parent0792c8ae1aebf538de45ff9a0e2e401a60525de2 (diff)
downloadglibc-1d21fb1061cbeb50414a8f371abb36548d90f150.tar.gz
S390: Optimize __memset_z196.
It turned out that an 256b-mvc instruction which depends on the result of a previous 256b-mvc instruction is counterproductive. Therefore this patch adjusts the 256b-loop by storing the first byte with stc and setting the remaining 255b with mvc. Now the 255b-mvc instruction depends on the stc instruction.
Diffstat (limited to 'sysdeps/s390')
-rw-r--r--sysdeps/s390/memset-z900.S19
1 files changed, 9 insertions, 10 deletions
diff --git a/sysdeps/s390/memset-z900.S b/sysdeps/s390/memset-z900.S
index ca3eac0522..1e0c334156 100644
--- a/sysdeps/s390/memset-z900.S
+++ b/sysdeps/s390/memset-z900.S
@@ -157,28 +157,27 @@ ENTRY(MEMSET_Z196)
# if !defined __s390x__
llgfr %r4,%r4
# endif /* !defined __s390x__ */
- ltgr %r4,%r4
- je .L_Z196_4
+ clgfi %r4,1
+ jl .L_Z196_4 # n == 0
stc %r3,0(%r2)
+ je .L_Z196_4 # n == 1
+ aghi %r4,-2
lgr %r1,%r2
- cghi %r4,1
- je .L_Z196_4
- aghi %r4,-2
- srlg %r5,%r4,8
- ltgr %r5,%r5
- jne .L_Z196_1
+ risbg %r5,%r4,8,128+63,56 # r5 = n / 256
+ jne .L_Z196_1 # Jump away if r5 != 0
.L_Z196_3:
exrl %r4,.L_Z196_17
.L_Z196_4:
br %r14
.L_Z196_1:
cgfi %r5,1048576
- jh __memset_mvcle # Switch to mvcle for >256MB
+ jh __memset_mvcle # Switch to mvcle for >256MB
.L_Z196_2:
pfd 2,1024(%r1)
- mvc 1(256,%r1),0(%r1)
+ mvc 1(255,%r1),0(%r1)
aghi %r5,-1
la %r1,256(%r1)
+ stc %r3,0(%r1)
jne .L_Z196_2
j .L_Z196_3
.L_Z196_17: