summaryrefslogtreecommitdiff
path: root/sysdeps/powerpc/powerpc64/multiarch/memset.c
diff options
context:
space:
mode:
authorAdhemerval Zanella <azanella@linux.vnet.ibm.com>2014-07-15 12:19:09 -0400
committerAdhemerval Zanella <azanella@linux.vnet.ibm.com>2014-09-10 07:39:46 -0400
commit71ae86478edc7b21872464f43fb29ff650c1681a (patch)
treea75679fa464a1d19543020ef0c4f4f982d099d99 /sysdeps/powerpc/powerpc64/multiarch/memset.c
parent3b473fecdf4c52989cd915b649bb6d26c042d048 (diff)
downloadglibc-71ae86478edc7b21872464f43fb29ff650c1681a.tar.gz
PowerPC: memset optimization for POWER8/PPC64
This patch adds an optimized memset implementation for POWER8. For sizes from 0 to 255 bytes, a word/doubleword algorithm similar to POWER7 optimized one is used. For size higher than 255 two strategies are used: 1. If the constant is different than 0, the memory is written with altivec vector instruction; 2. If constant is 0, dbcz instructions are used. The loop is unrolled to clear 512 byte at time. Using vector instructions increases throughput considerable, with a double performance for sizes larger than 1024. The dcbz loops unrolls also shows performance improvement, by doubling throughput for sizes larger than 8192 bytes.
Diffstat (limited to 'sysdeps/powerpc/powerpc64/multiarch/memset.c')
-rw-r--r--sysdeps/powerpc/powerpc64/multiarch/memset.c11
1 files changed, 7 insertions, 4 deletions
diff --git a/sysdeps/powerpc/powerpc64/multiarch/memset.c b/sysdeps/powerpc/powerpc64/multiarch/memset.c
index aa2ae7056e..9c7ed10c87 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/memset.c
+++ b/sysdeps/powerpc/powerpc64/multiarch/memset.c
@@ -32,16 +32,19 @@ extern __typeof (__redirect_memset) __memset_ppc attribute_hidden;
extern __typeof (__redirect_memset) __memset_power4 attribute_hidden;
extern __typeof (__redirect_memset) __memset_power6 attribute_hidden;
extern __typeof (__redirect_memset) __memset_power7 attribute_hidden;
+extern __typeof (__redirect_memset) __memset_power8 attribute_hidden;
/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle
ifunc symbol properly. */
libc_ifunc (__libc_memset,
- (hwcap & PPC_FEATURE_HAS_VSX)
- ? __memset_power7 :
- (hwcap & PPC_FEATURE_ARCH_2_05)
+ (hwcap2 & PPC_FEATURE2_ARCH_2_07)
+ ? __memset_power8 :
+ (hwcap & PPC_FEATURE_HAS_VSX)
+ ? __memset_power7 :
+ (hwcap & PPC_FEATURE_ARCH_2_05)
? __memset_power6 :
(hwcap & PPC_FEATURE_POWER4)
- ? __memset_power4
+ ? __memset_power4
: __memset_ppc);
#undef memset