summaryrefslogtreecommitdiff
path: root/sysdeps/i386/i686/multiarch/ifunc-impl-list.c
diff options
context:
space:
mode:
authorAndrew Senkevich <andrew.senkevich@intel.com>2014-12-29 14:39:46 +0300
committerH.J. Lu <hjl.tools@gmail.com>2014-12-30 07:19:38 -0800
commit8b4416d83c79ba77b0669203741c712880a09ae4 (patch)
treec0701090d02b9e3c9ddcb840e2ad62084c498b4a /sysdeps/i386/i686/multiarch/ifunc-impl-list.c
parent6d6d7fde04c8ef830205a9900bf101597a2f4b18 (diff)
downloadglibc-8b4416d83c79ba77b0669203741c712880a09ae4.tar.gz
i386: memcpy functions with SSE2 unaligned load/store
These new memcpy functions are the 32-bit version of x86_64 SSE2 unaligned memcpy. Memcpy average performace benefit is 18% on Silvermont, other platforms also improved about 35%, benchmarked on Silvermont, Haswell, Ivy Bridge, Sandy Bridge and Westmere, performance results attached in https://sourceware.org/ml/libc-alpha/2014-07/msg00157.html * sysdeps/i386/i686/multiarch/bcopy-sse2-unaligned.S: New file. * sysdeps/i386/i686/multiarch/memcpy-sse2-unaligned.S: Likewise. * sysdeps/i386/i686/multiarch/memmove-sse2-unaligned.S: Likewise. * sysdeps/i386/i686/multiarch/mempcpy-sse2-unaligned.S: Likewise. * sysdeps/i386/i686/multiarch/bcopy.S: Select the sse2_unaligned version if bit_Fast_Unaligned_Load is set. * sysdeps/i386/i686/multiarch/memcpy.S: Likewise. * sysdeps/i386/i686/multiarch/memcpy_chk.S: Likewise. * sysdeps/i386/i686/multiarch/memmove.S: Likewise. * sysdeps/i386/i686/multiarch/memmove_chk.S: Likewise. * sysdeps/i386/i686/multiarch/mempcpy.S: Likewise. * sysdeps/i386/i686/multiarch/mempcpy_chk.S: Likewise. * sysdeps/i386/i686/multiarch/Makefile (sysdep_routines): Add bcopy-sse2-unaligned, memcpy-sse2-unaligned, memmove-sse2-unaligned and mempcpy-sse2-unaligned. * sysdeps/i386/i686/multiarch/ifunc-impl-list.c (MAX_IFUNC): Set to 4. (__libc_ifunc_impl_list): Test __bcopy_sse2_unaligned, __memmove_chk_sse2_unaligned, __memmove_sse2_unaligned, __memcpy_chk_sse2_unaligned, __memcpy_sse2_unaligned, __mempcpy_chk_sse2_unaligned, and __mempcpy_sse2_unaligned.
Diffstat (limited to 'sysdeps/i386/i686/multiarch/ifunc-impl-list.c')
-rw-r--r--sysdeps/i386/i686/multiarch/ifunc-impl-list.c16
1 files changed, 15 insertions, 1 deletions
diff --git a/sysdeps/i386/i686/multiarch/ifunc-impl-list.c b/sysdeps/i386/i686/multiarch/ifunc-impl-list.c
index e47577690d..4efa9c5ef9 100644
--- a/sysdeps/i386/i686/multiarch/ifunc-impl-list.c
+++ b/sysdeps/i386/i686/multiarch/ifunc-impl-list.c
@@ -23,7 +23,7 @@
#include "init-arch.h"
/* Maximum number of IFUNC implementations. */
-#define MAX_IFUNC 3
+#define MAX_IFUNC 4
/* Fill ARRAY of MAX elements with IFUNC implementations for function
NAME and return the number of valid entries. */
@@ -41,6 +41,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
IFUNC_IMPL_ADD (array, i, bcopy, HAS_SSSE3,
__bcopy_ssse3_rep)
IFUNC_IMPL_ADD (array, i, bcopy, HAS_SSSE3, __bcopy_ssse3)
+ IFUNC_IMPL_ADD (array, i, bcopy, HAS_SSE2,
+ __bcopy_sse2_unaligned)
IFUNC_IMPL_ADD (array, i, bcopy, 1, __bcopy_ia32))
/* Support sysdeps/i386/i686/multiarch/bzero.S. */
@@ -69,6 +71,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
__memmove_chk_ssse3_rep)
IFUNC_IMPL_ADD (array, i, __memmove_chk, HAS_SSSE3,
__memmove_chk_ssse3)
+ IFUNC_IMPL_ADD (array, i, __memmove_chk, HAS_SSE2,
+ __memmove_chk_sse2_unaligned)
IFUNC_IMPL_ADD (array, i, __memmove_chk, 1,
__memmove_chk_ia32))
@@ -78,6 +82,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
__memmove_ssse3_rep)
IFUNC_IMPL_ADD (array, i, memmove, HAS_SSSE3,
__memmove_ssse3)
+ IFUNC_IMPL_ADD (array, i, memmove, HAS_SSE2,
+ __memmove_sse2_unaligned)
IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_ia32))
/* Support sysdeps/i386/i686/multiarch/memrchr.S. */
@@ -268,6 +274,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
__memcpy_chk_ssse3_rep)
IFUNC_IMPL_ADD (array, i, __memcpy_chk, HAS_SSSE3,
__memcpy_chk_ssse3)
+ IFUNC_IMPL_ADD (array, i, __memcpy_chk, HAS_SSE2,
+ __memcpy_chk_sse2_unaligned)
IFUNC_IMPL_ADD (array, i, __memcpy_chk, 1,
__memcpy_chk_ia32))
@@ -276,6 +284,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
IFUNC_IMPL_ADD (array, i, memcpy, HAS_SSSE3,
__memcpy_ssse3_rep)
IFUNC_IMPL_ADD (array, i, memcpy, HAS_SSSE3, __memcpy_ssse3)
+ IFUNC_IMPL_ADD (array, i, memcpy, HAS_SSE2,
+ __memcpy_sse2_unaligned)
IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_ia32))
/* Support sysdeps/i386/i686/multiarch/mempcpy_chk.S. */
@@ -284,6 +294,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
__mempcpy_chk_ssse3_rep)
IFUNC_IMPL_ADD (array, i, __mempcpy_chk, HAS_SSSE3,
__mempcpy_chk_ssse3)
+ IFUNC_IMPL_ADD (array, i, __mempcpy_chk, HAS_SSE2,
+ __mempcpy_chk_sse2_unaligned)
IFUNC_IMPL_ADD (array, i, __mempcpy_chk, 1,
__mempcpy_chk_ia32))
@@ -293,6 +305,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
__mempcpy_ssse3_rep)
IFUNC_IMPL_ADD (array, i, mempcpy, HAS_SSSE3,
__mempcpy_ssse3)
+ IFUNC_IMPL_ADD (array, i, mempcpy, HAS_SSE2,
+ __mempcpy_sse2_unaligned)
IFUNC_IMPL_ADD (array, i, mempcpy, 1, __mempcpy_ia32))
/* Support sysdeps/i386/i686/multiarch/strlen.S. */