summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSteve Ellcey <sellcey@caviumnetworks.com>2018-02-22 08:38:47 -0800
committerSteve Ellcey <sellcey@caviumnetworks.com>2018-02-22 08:38:47 -0800
commite9537dddc7c7c7b60b55ed845542c8d586164488 (patch)
treeae653efab8d31c8d5056f0d29fef30c19c0fd260
parentda81ae645d8ee89052f109c814a68a9489f562e6 (diff)
downloadglibc-e9537dddc7c7c7b60b55ed845542c8d586164488.tar.gz
IFUNC for Cavium ThunderX2
* sysdeps/aarch64/multiarch/Makefile (sysdep_routines): Add memcpy_thunderx2. * sysdeps/aarch64/multiarch/ifunc-impl-list.c (MAX_IFUNC): Increment to 4. (__libc_ifunc_impl_list): Add __memcpy_thunderx2. * sysdeps/aarch64/multiarch/memcpy.c (libc_ifunc): Add IS_THUNDERX2 and IS_THUNDERX2PA checks. * sysdeps/aarch64/multiarch/memcpy_thunderx.S (USE_THUNDERX2): Use macro to set name appropriately. (memcpy): Use USE_THUNDERX2 macro to modify prefetches. * sysdeps/aarch64/multiarch/memcpy_thunderx2.S: New file. * sysdeps/unix/sysv/linux/aarch64/cpu-features.h (IS_THUNDERX2PA): New macro. (IS_THUNDERX2): New macro.
-rw-r--r--ChangeLog17
-rw-r--r--sysdeps/aarch64/multiarch/Makefile4
-rw-r--r--sysdeps/aarch64/multiarch/ifunc-impl-list.c3
-rw-r--r--sysdeps/aarch64/multiarch/memcpy.c5
-rw-r--r--sysdeps/aarch64/multiarch/memcpy_thunderx.S22
-rw-r--r--sysdeps/aarch64/multiarch/memcpy_thunderx2.S27
-rw-r--r--sysdeps/unix/sysv/linux/aarch64/cpu-features.h5
7 files changed, 73 insertions, 10 deletions
diff --git a/ChangeLog b/ChangeLog
index d33b47053b..b47ef63266 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,20 @@
+2018-02-22 Steve Ellcey <sellcey@cavium.com>
+
+ * sysdeps/aarch64/multiarch/Makefile (sysdep_routines):
+ Add memcpy_thunderx2.
+ * sysdeps/aarch64/multiarch/ifunc-impl-list.c (MAX_IFUNC):
+ Increment to 4.
+ (__libc_ifunc_impl_list): Add __memcpy_thunderx2.
+ * sysdeps/aarch64/multiarch/memcpy.c (libc_ifunc): Add IS_THUNDERX2
+ and IS_THUNDERX2PA checks.
+ * sysdeps/aarch64/multiarch/memcpy_thunderx.S (USE_THUNDERX2):
+ Use macro to set name appropriately.
+ (memcpy): Use USE_THUNDERX2 macro to modify prefetches.
+ * sysdeps/aarch64/multiarch/memcpy_thunderx2.S: New file.
+ * sysdeps/unix/sysv/linux/aarch64/cpu-features.h (IS_THUNDERX2PA):
+ New macro.
+ (IS_THUNDERX2): New macro.
+
2018-02-22 Stefan Liebler <stli@linux.vnet.ibm.com>
* sysdeps/s390/fpu/libm-test-ulps: Regenerated.
diff --git a/sysdeps/aarch64/multiarch/Makefile b/sysdeps/aarch64/multiarch/Makefile
index aa179c499e..57ffdf7238 100644
--- a/sysdeps/aarch64/multiarch/Makefile
+++ b/sysdeps/aarch64/multiarch/Makefile
@@ -1,4 +1,4 @@
ifeq ($(subdir),string)
-sysdep_routines += memcpy_generic memcpy_thunderx memcpy_falkor \
- memmove_falkor memset_generic memset_falkor
+sysdep_routines += memcpy_generic memcpy_thunderx memcpy_thunderx2 \
+ memcpy_falkor memmove_falkor memset_generic memset_falkor
endif
diff --git a/sysdeps/aarch64/multiarch/ifunc-impl-list.c b/sysdeps/aarch64/multiarch/ifunc-impl-list.c
index f84956c023..e55be80103 100644
--- a/sysdeps/aarch64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/aarch64/multiarch/ifunc-impl-list.c
@@ -25,7 +25,7 @@
#include <stdio.h>
/* Maximum number of IFUNC implementations. */
-#define MAX_IFUNC 3
+#define MAX_IFUNC 4
size_t
__libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
@@ -40,6 +40,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
/* Support sysdeps/aarch64/multiarch/memcpy.c and memmove.c. */
IFUNC_IMPL (i, name, memcpy,
IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_thunderx)
+ IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_thunderx2)
IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_falkor)
IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_generic))
IFUNC_IMPL (i, name, memmove,
diff --git a/sysdeps/aarch64/multiarch/memcpy.c b/sysdeps/aarch64/multiarch/memcpy.c
index 3efea2c644..b94c655f9d 100644
--- a/sysdeps/aarch64/multiarch/memcpy.c
+++ b/sysdeps/aarch64/multiarch/memcpy.c
@@ -30,6 +30,7 @@ extern __typeof (__redirect_memcpy) __libc_memcpy;
extern __typeof (__redirect_memcpy) __memcpy_generic attribute_hidden;
extern __typeof (__redirect_memcpy) __memcpy_thunderx attribute_hidden;
+extern __typeof (__redirect_memcpy) __memcpy_thunderx2 attribute_hidden;
extern __typeof (__redirect_memcpy) __memcpy_falkor attribute_hidden;
libc_ifunc (__libc_memcpy,
@@ -37,7 +38,9 @@ libc_ifunc (__libc_memcpy,
? __memcpy_thunderx
: (IS_FALKOR (midr)
? __memcpy_falkor
- : __memcpy_generic)));
+ : (IS_THUNDERX2 (midr) || IS_THUNDERX2PA (midr)
+ ? __memcpy_thunderx2
+ : __memcpy_generic))));
# undef memcpy
strong_alias (__libc_memcpy, memcpy);
diff --git a/sysdeps/aarch64/multiarch/memcpy_thunderx.S b/sysdeps/aarch64/multiarch/memcpy_thunderx.S
index 4f6921d680..de494d933d 100644
--- a/sysdeps/aarch64/multiarch/memcpy_thunderx.S
+++ b/sysdeps/aarch64/multiarch/memcpy_thunderx.S
@@ -74,11 +74,13 @@
#if IS_IN (libc)
-# undef MEMCPY
-# define MEMCPY __memcpy_thunderx
-# undef MEMMOVE
-# define MEMMOVE __memmove_thunderx
-# define USE_THUNDERX
+# ifndef USE_THUNDERX2
+# undef MEMCPY
+# define MEMCPY __memcpy_thunderx
+# undef MEMMOVE
+# define MEMMOVE __memmove_thunderx
+# define USE_THUNDERX
+# endif
ENTRY_ALIGN (MEMMOVE, 6)
@@ -180,7 +182,7 @@ L(copy96):
.p2align 4
L(copy_long):
-# ifdef USE_THUNDERX
+# if defined(USE_THUNDERX) || defined (USE_THUNDERX2)
/* On thunderx, large memcpy's are helped by software prefetching.
This loop is identical to the one below it but with prefetching
@@ -194,7 +196,11 @@ L(copy_long):
bic dst, dstin, 15
ldp D_l, D_h, [src]
sub src, src, tmp1
+# if defined(USE_THUNDERX)
prfm pldl1strm, [src, 384]
+# elif defined(USE_THUNDERX2)
+ prfm pldl1strm, [src, 256]
+# endif
add count, count, tmp1 /* Count is now 16 too large. */
ldp A_l, A_h, [src, 16]
stp D_l, D_h, [dstin]
@@ -204,9 +210,13 @@ L(copy_long):
subs count, count, 128 + 16 /* Test and readjust count. */
L(prefetch_loop64):
+# if defined(USE_THUNDERX)
tbz src, #6, 1f
prfm pldl1strm, [src, 512]
1:
+# elif defined(USE_THUNDERX2)
+ prfm pldl1strm, [src, 256]
+# endif
stp A_l, A_h, [dst, 16]
ldp A_l, A_h, [src, 16]
stp B_l, B_h, [dst, 32]
diff --git a/sysdeps/aarch64/multiarch/memcpy_thunderx2.S b/sysdeps/aarch64/multiarch/memcpy_thunderx2.S
new file mode 100644
index 0000000000..8501abf725
--- /dev/null
+++ b/sysdeps/aarch64/multiarch/memcpy_thunderx2.S
@@ -0,0 +1,27 @@
+/* A Thunderx2 Optimized memcpy implementation for AARCH64.
+ Copyright (C) 2018 Free Software Foundation, Inc.
+
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* The actual code in this memcpy and memmove is in memcpy_thunderx.S.
+ The only real differences are with the prefetching instructions. */
+
+#define MEMCPY __memcpy_thunderx2
+#define MEMMOVE __memmove_thunderx2
+#define USE_THUNDERX2
+
+#include "memcpy_thunderx.S"
diff --git a/sysdeps/unix/sysv/linux/aarch64/cpu-features.h b/sysdeps/unix/sysv/linux/aarch64/cpu-features.h
index c646f9dad1..cde655b9bd 100644
--- a/sysdeps/unix/sysv/linux/aarch64/cpu-features.h
+++ b/sysdeps/unix/sysv/linux/aarch64/cpu-features.h
@@ -41,6 +41,11 @@
#define IS_THUNDERX(midr) (MIDR_IMPLEMENTOR(midr) == 'C' \
&& MIDR_PARTNUM(midr) == 0x0a1)
+#define IS_THUNDERX2PA(midr) (MIDR_IMPLEMENTOR(midr) == 'B' \
+ && MIDR_PARTNUM(midr) == 0x516)
+#define IS_THUNDERX2(midr) (MIDR_IMPLEMENTOR(midr) == 'C' \
+ && MIDR_PARTNUM(midr) == 0xaf)
+
#define IS_FALKOR(midr) (MIDR_IMPLEMENTOR(midr) == 'Q' \
&& MIDR_PARTNUM(midr) == 0xc00)