summaryrefslogtreecommitdiff
path: root/sysdeps/x86_64/multiarch
diff options
context:
space:
mode:
Diffstat (limited to 'sysdeps/x86_64/multiarch')
-rw-r--r--sysdeps/x86_64/multiarch/Makefile23
-rw-r--r--sysdeps/x86_64/multiarch/Versions13
-rw-r--r--sysdeps/x86_64/multiarch/bcopy.S5
-rw-r--r--sysdeps/x86_64/multiarch/ifunc-impl-list.c6
-rw-r--r--sysdeps/x86_64/multiarch/memcmp-sse4.S339
-rw-r--r--sysdeps/x86_64/multiarch/memcpy-c.c80
-rw-r--r--sysdeps/x86_64/multiarch/memcpy-ssse3-back-1.S5
-rw-r--r--sysdeps/x86_64/multiarch/memcpy-ssse3-back.S26
-rw-r--r--sysdeps/x86_64/multiarch/memcpy.S35
-rw-r--r--sysdeps/x86_64/multiarch/memcpy_chk-c.c1
-rw-r--r--sysdeps/x86_64/multiarch/memcpy_chk.S14
-rw-r--r--sysdeps/x86_64/multiarch/memmove-c.c118
-rw-r--r--sysdeps/x86_64/multiarch/memmove-ssse3-back-1.S6
-rw-r--r--sysdeps/x86_64/multiarch/memmove-ssse3-back.S8
-rw-r--r--sysdeps/x86_64/multiarch/memmove.c34
-rw-r--r--sysdeps/x86_64/multiarch/memmove_chk-c.c1
-rw-r--r--sysdeps/x86_64/multiarch/memmove_chk.c10
-rw-r--r--sysdeps/x86_64/multiarch/mempcpy-c.c36
-rw-r--r--sysdeps/x86_64/multiarch/mempcpy-ssse3-back-1.S6
-rw-r--r--sysdeps/x86_64/multiarch/mempcpy-ssse3-back.S12
-rw-r--r--sysdeps/x86_64/multiarch/mempcpy.S37
-rw-r--r--sysdeps/x86_64/multiarch/mempcpy_chk-c.c1
-rw-r--r--sysdeps/x86_64/multiarch/mempcpy_chk.S14
-rw-r--r--sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S76
-rw-r--r--sysdeps/x86_64/multiarch/strchr.S22
-rw-r--r--sysdeps/x86_64/multiarch/strcmp-sse42.S238
-rw-r--r--sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S174
-rw-r--r--sysdeps/x86_64/multiarch/strrchr.S16
-rw-r--r--sysdeps/x86_64/multiarch/wcscpy-ssse3.S171
29 files changed, 1456 insertions, 71 deletions
diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile
index 203d16eed3..bdf7964d14 100644
--- a/sysdeps/x86_64/multiarch/Makefile
+++ b/sysdeps/x86_64/multiarch/Makefile
@@ -26,6 +26,29 @@ CFLAGS-strstr.c += -msse4
CFLAGS-strcasestr.c += -msse4
CFLAGS-strcasestr-nonascii.c += -msse4
endif
+
+ifeq ($(enable-mpx), yes)
+sysdep_routines += memcpy-ssse3-back-1 mempcpy-ssse3-back-1 memmove-ssse3-back-1 \
+ memcpy-c memmove-c mempcpy-c memcpy_chk-c mempcpy_chk-c memmove_chk-c
+#These are C versions written with intrinsics. We need to add checks as intrinsics manually
+CFLAGS-varshift.c += -fno-chkp-check-read -fno-chkp-check-write
+CFLAGS-strcspn-c.c += -fno-chkp-check-read -fno-chkp-check-write
+CFLAGS-strpbrk-c.c += -fno-chkp-check-read -fno-chkp-check-write
+CFLAGS-strspn-c.c += -fno-chkp-check-read -fno-chkp-check-write
+CFLAGS-strstr.c += -fno-chkp-check-read -fno-chkp-check-write
+CFLAGS-strcasestr.c += -fno-chkp-check-read -fno-chkp-check-write
+CFLAGS-strcasestr-nonascii.c += -fno-chkp-check-read -fno-chkp-check-write
+#Checks are put manually for these routines.
+CFLAGS-memcpy-c.c += -fno-chkp-check-read -fno-chkp-check-write
+CFLAGS-mempcpy-c.c += -fno-chkp-check-read -fno-chkp-check-write
+CFLAGS-memmove-c.c += -fno-chkp-check-read -fno-chkp-check-write
+endif
+
+ifeq ($(enable-mpx-write-only), yes)
+CFLAGS-memcpy-c.c += -D__CHKWR__
+CFLAGS-memmove-c.c += -D__CHKWR__
+endif
+
endif
ifeq ($(subdir),wcsmbs)
diff --git a/sysdeps/x86_64/multiarch/Versions b/sysdeps/x86_64/multiarch/Versions
index 59b185ac8d..5325bdece6 100644
--- a/sysdeps/x86_64/multiarch/Versions
+++ b/sysdeps/x86_64/multiarch/Versions
@@ -2,4 +2,17 @@ libc {
GLIBC_PRIVATE {
__get_cpu_features;
}
+%ifdef __CHKP__
+ GLIBC_2.17 {
+ chkp_memcpy_nobnd;
+ chkp_memmove_nobnd;
+ chkp_mempcpy_nobnd;
+ chkp_memcpy_nobnd_nochk;
+ chkp_memmove_nobnd_nochk;
+ chkp_mempcpy_nobnd_nochk;
+ chkp_memcpy_nochk;
+ chkp_memmove_nochk;
+ chkp_mempcpy_nochk;
+ }
+%endif
}
diff --git a/sysdeps/x86_64/multiarch/bcopy.S b/sysdeps/x86_64/multiarch/bcopy.S
index 639f02bde3..9809d471ba 100644
--- a/sysdeps/x86_64/multiarch/bcopy.S
+++ b/sysdeps/x86_64/multiarch/bcopy.S
@@ -3,5 +3,10 @@
.text
ENTRY(bcopy)
xchg %rdi, %rsi
+#ifdef __CHKP__
+ bndmov %bnd0, %bnd2
+ bndmov %bnd1, %bnd0
+ bndmov %bnd2, %bnd1
+#endif
jmp __libc_memmove /* Branch to IFUNC memmove. */
END(bcopy)
diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
index d0992e113f..e3a4163c5b 100644
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
@@ -44,6 +44,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
IFUNC_IMPL_ADD (array, i, memcmp, HAS_SSSE3, __memcmp_ssse3)
IFUNC_IMPL_ADD (array, i, memcmp, 1, __memcmp_sse2))
+#ifndef __CHKP__
+ /* We use specific version for MPX glibc */
/* Support sysdeps/x86_64/multiarch/memmove_chk.S. */
IFUNC_IMPL (i, name, __memmove_chk,
IFUNC_IMPL_ADD (array, i, __memmove_chk, HAS_SSSE3,
@@ -60,6 +62,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
IFUNC_IMPL_ADD (array, i, memmove, HAS_SSSE3,
__memmove_ssse3)
IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_sse2))
+#endif
/* Support sysdeps/x86_64/multiarch/stpncpy.S. */
IFUNC_IMPL (i, name, stpncpy,
@@ -207,6 +210,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
IFUNC_IMPL_ADD (array, i, wmemcmp, 1, __wmemcmp_sse2))
#ifdef SHARED
+#ifndef __CHKP__
+ /* We use specific version of memcpy, memcpy_chk, mempcpy if Intel MPX is enabled. */
/* Support sysdeps/x86_64/multiarch/memcpy_chk.S. */
IFUNC_IMPL (i, name, __memcpy_chk,
IFUNC_IMPL_ADD (array, i, __memcpy_chk, HAS_SSSE3,
@@ -240,6 +245,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
IFUNC_IMPL_ADD (array, i, mempcpy, HAS_SSSE3,
__mempcpy_ssse3)
IFUNC_IMPL_ADD (array, i, mempcpy, 1, __mempcpy_sse2))
+#endif
/* Support sysdeps/x86_64/multiarch/strncmp.S. */
IFUNC_IMPL (i, name, strncmp,
diff --git a/sysdeps/x86_64/multiarch/memcmp-sse4.S b/sysdeps/x86_64/multiarch/memcmp-sse4.S
index 1ed4200f4c..b5c6675d31 100644
--- a/sysdeps/x86_64/multiarch/memcmp-sse4.S
+++ b/sysdeps/x86_64/multiarch/memcmp-sse4.S
@@ -48,6 +48,13 @@ ENTRY (MEMCMP)
# ifdef USE_AS_WMEMCMP
shl $2, %rdx
# endif
+# ifdef __CHKP__
+ testq %rdx, %rdx
+ jz L(NoEntryCheck)
+ bndcl (%rdi), %bnd0
+ bndcl (%rsi), %bnd1
+L(NoEntryCheck):
+# endif
pxor %xmm0, %xmm0
cmp $79, %rdx
ja L(79bytesormore)
@@ -70,6 +77,10 @@ L(firstbyte):
ALIGN (4)
L(79bytesormore):
+# ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rsi), %bnd1
+# endif
movdqu (%rsi), %xmm1
movdqu (%rdi), %xmm2
pxor %xmm1, %xmm2
@@ -90,21 +101,37 @@ L(79bytesormore):
L(less128bytes):
sub $64, %rdx
+# ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rsi), %bnd1
+# endif
movdqu (%rdi), %xmm2
pxor (%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(16bytesin256)
+# ifdef __CHKP__
+ bndcu 16(%rdi), %bnd0
+ bndcu 16(%rsi), %bnd1
+# endif
movdqu 16(%rdi), %xmm2
pxor 16(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(32bytesin256)
+# ifdef __CHKP__
+ bndcu 32(%rdi), %bnd0
+ bndcu 32(%rsi), %bnd1
+# endif
movdqu 32(%rdi), %xmm2
pxor 32(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(48bytesin256)
+# ifdef __CHKP__
+ bndcu 48(%rdi), %bnd0
+ bndcu 48(%rsi), %bnd1
+# endif
movdqu 48(%rdi), %xmm2
pxor 48(%rsi), %xmm2
ptest %xmm2, %xmm0
@@ -112,11 +139,19 @@ L(less128bytes):
cmp $32, %rdx
jb L(less32bytesin64)
+# ifdef __CHKP__
+ bndcu 64(%rdi), %bnd0
+ bndcu 64(%rsi), %bnd1
+# endif
movdqu 64(%rdi), %xmm2
pxor 64(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(80bytesin256)
+# ifdef __CHKP__
+ bndcu 80(%rdi), %bnd0
+ bndcu 80(%rsi), %bnd1
+# endif
movdqu 80(%rdi), %xmm2
pxor 80(%rsi), %xmm2
ptest %xmm2, %xmm0
@@ -139,41 +174,73 @@ L(128bytesormore):
L(less256bytes):
sub $128, %rdx
+# ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rsi), %bnd1
+# endif
movdqu (%rdi), %xmm2
pxor (%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(16bytesin256)
+# ifdef __CHKP__
+ bndcu 16(%rdi), %bnd0
+ bndcu 16(%rsi), %bnd1
+# endif
movdqu 16(%rdi), %xmm2
pxor 16(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(32bytesin256)
+# ifdef __CHKP__
+ bndcu 32(%rdi), %bnd0
+ bndcu 32(%rsi), %bnd1
+# endif
movdqu 32(%rdi), %xmm2
pxor 32(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(48bytesin256)
+# ifdef __CHKP__
+ bndcu 48(%rdi), %bnd0
+ bndcu 48(%rsi), %bnd1
+# endif
movdqu 48(%rdi), %xmm2
pxor 48(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(64bytesin256)
+# ifdef __CHKP__
+ bndcu 64(%rdi), %bnd0
+ bndcu 64(%rsi), %bnd1
+# endif
movdqu 64(%rdi), %xmm2
pxor 64(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(80bytesin256)
+# ifdef __CHKP__
+ bndcu 80(%rdi), %bnd0
+ bndcu 80(%rsi), %bnd1
+# endif
movdqu 80(%rdi), %xmm2
pxor 80(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(96bytesin256)
+# ifdef __CHKP__
+ bndcu 96(%rdi), %bnd0
+ bndcu 96(%rsi), %bnd1
+# endif
movdqu 96(%rdi), %xmm2
pxor 96(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(112bytesin256)
+# ifdef __CHKP__
+ bndcu 112(%rdi), %bnd0
+ bndcu 112(%rsi), %bnd1
+# endif
movdqu 112(%rdi), %xmm2
pxor 112(%rsi), %xmm2
ptest %xmm2, %xmm0
@@ -188,11 +255,19 @@ L(less256bytes):
cmp $32, %rdx
jb L(less32bytesin128)
+# ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rsi), %bnd1
+# endif
movdqu (%rdi), %xmm2
pxor (%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(16bytesin256)
+# ifdef __CHKP__
+ bndcu 16(%rdi), %bnd0
+ bndcu 16(%rsi), %bnd1
+# endif
movdqu 16(%rdi), %xmm2
pxor 16(%rsi), %xmm2
ptest %xmm2, %xmm0
@@ -207,81 +282,145 @@ L(less32bytesin128):
L(less512bytes):
sub $256, %rdx
+# ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rsi), %bnd1
+# endif
movdqu (%rdi), %xmm2
pxor (%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(16bytesin256)
+# ifdef __CHKP__
+ bndcu 16(%rdi), %bnd0
+ bndcu 16(%rsi), %bnd1
+# endif
movdqu 16(%rdi), %xmm2
pxor 16(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(32bytesin256)
+# ifdef __CHKP__
+ bndcu 32(%rdi), %bnd0
+ bndcu 32(%rsi), %bnd1
+# endif
movdqu 32(%rdi), %xmm2
pxor 32(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(48bytesin256)
+# ifdef __CHKP__
+ bndcu 48(%rdi), %bnd0
+ bndcu 48(%rsi), %bnd1
+# endif
movdqu 48(%rdi), %xmm2
pxor 48(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(64bytesin256)
+# ifdef __CHKP__
+ bndcu 64(%rdi), %bnd0
+ bndcu 64(%rsi), %bnd1
+# endif
movdqu 64(%rdi), %xmm2
pxor 64(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(80bytesin256)
+# ifdef __CHKP__
+ bndcu 80(%rdi), %bnd0
+ bndcu 80(%rsi), %bnd1
+# endif
movdqu 80(%rdi), %xmm2
pxor 80(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(96bytesin256)
+# ifdef __CHKP__
+ bndcu 96(%rdi), %bnd0
+ bndcu 96(%rsi), %bnd1
+# endif
movdqu 96(%rdi), %xmm2
pxor 96(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(112bytesin256)
+# ifdef __CHKP__
+ bndcu 112(%rdi), %bnd0
+ bndcu 112(%rsi), %bnd1
+# endif
movdqu 112(%rdi), %xmm2
pxor 112(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(128bytesin256)
+# ifdef __CHKP__
+ bndcu 128(%rdi), %bnd0
+ bndcu 128(%rsi), %bnd1
+# endif
movdqu 128(%rdi), %xmm2
pxor 128(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(144bytesin256)
+# ifdef __CHKP__
+ bndcu 144(%rdi), %bnd0
+ bndcu 144(%rsi), %bnd1
+# endif
movdqu 144(%rdi), %xmm2
pxor 144(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(160bytesin256)
+# ifdef __CHKP__
+ bndcu 160(%rdi), %bnd0
+ bndcu 160(%rsi), %bnd1
+# endif
movdqu 160(%rdi), %xmm2
pxor 160(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(176bytesin256)
+# ifdef __CHKP__
+ bndcu 176(%rdi), %bnd0
+ bndcu 176(%rsi), %bnd1
+# endif
movdqu 176(%rdi), %xmm2
pxor 176(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(192bytesin256)
+# ifdef __CHKP__
+ bndcu 192(%rdi), %bnd0
+ bndcu 192(%rsi), %bnd1
+# endif
movdqu 192(%rdi), %xmm2
pxor 192(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(208bytesin256)
+# ifdef __CHKP__
+ bndcu 208(%rdi), %bnd0
+ bndcu 208(%rsi), %bnd1
+# endif
movdqu 208(%rdi), %xmm2
pxor 208(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(224bytesin256)
+# ifdef __CHKP__
+ bndcu 224(%rdi), %bnd0
+ bndcu 224(%rsi), %bnd1
+# endif
movdqu 224(%rdi), %xmm2
pxor 224(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(240bytesin256)
+# ifdef __CHKP__
+ bndcu 240(%rdi), %bnd0
+ bndcu 240(%rsi), %bnd1
+# endif
movdqu 240(%rdi), %xmm2
pxor 240(%rsi), %xmm2
ptest %xmm2, %xmm0
@@ -299,11 +438,19 @@ L(less512bytes):
cmp $32, %rdx
jb L(less32bytesin256)
+# ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rsi), %bnd1
+# endif
movdqu (%rdi), %xmm2
pxor (%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(16bytesin256)
+# ifdef __CHKP__
+ bndcu 16(%rdi), %bnd0
+ bndcu 16(%rsi), %bnd1
+# endif
movdqu 16(%rdi), %xmm2
pxor 16(%rsi), %xmm2
ptest %xmm2, %xmm0
@@ -331,18 +478,34 @@ L(512bytesormore):
sub $64, %rdx
ALIGN (4)
L(64bytesormore_loop):
+# ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rsi), %bnd1
+# endif
movdqu (%rdi), %xmm2
pxor (%rsi), %xmm2
movdqa %xmm2, %xmm1
+# ifdef __CHKP__
+ bndcu 16(%rdi), %bnd0
+ bndcu 16(%rsi), %bnd1
+# endif
movdqu 16(%rdi), %xmm3
pxor 16(%rsi), %xmm3
por %xmm3, %xmm1
+# ifdef __CHKP__
+ bndcu 32(%rdi), %bnd0
+ bndcu 32(%rsi), %bnd1
+# endif
movdqu 32(%rdi), %xmm4
pxor 32(%rsi), %xmm4
por %xmm4, %xmm1
+# ifdef __CHKP__
+ bndcu 48(%rdi), %bnd0
+ bndcu 48(%rsi), %bnd1
+# endif
movdqu 48(%rdi), %xmm5
pxor 48(%rsi), %xmm5
por %xmm5, %xmm1
@@ -365,18 +528,34 @@ L(L2_L3_cache_unaglined):
L(L2_L3_unaligned_128bytes_loop):
prefetchnta 0x1c0(%rdi)
prefetchnta 0x1c0(%rsi)
+# ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rsi), %bnd1
+# endif
movdqu (%rdi), %xmm2
pxor (%rsi), %xmm2
movdqa %xmm2, %xmm1
+# ifdef __CHKP__
+ bndcu 16(%rdi), %bnd0
+ bndcu 16(%rsi), %bnd1
+# endif
movdqu 16(%rdi), %xmm3
pxor 16(%rsi), %xmm3
por %xmm3, %xmm1
+# ifdef __CHKP__
+ bndcu 32(%rdi), %bnd0
+ bndcu 32(%rsi), %bnd1
+# endif
movdqu 32(%rdi), %xmm4
pxor 32(%rsi), %xmm4
por %xmm4, %xmm1
+# ifdef __CHKP__
+ bndcu 48(%rdi), %bnd0
+ bndcu 48(%rsi), %bnd1
+# endif
movdqu 48(%rdi), %xmm5
pxor 48(%rsi), %xmm5
por %xmm5, %xmm1
@@ -403,21 +582,37 @@ L(2aligned):
L(less128bytesin2aligned):
sub $64, %rdx
+# ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rsi), %bnd1
+# endif
movdqa (%rdi), %xmm2
pxor (%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(16bytesin256)
+# ifdef __CHKP__
+ bndcu 16(%rdi), %bnd0
+ bndcu 16(%rsi), %bnd1
+# endif
movdqa 16(%rdi), %xmm2
pxor 16(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(32bytesin256)
+# ifdef __CHKP__
+ bndcu 32(%rdi), %bnd0
+ bndcu 32(%rsi), %bnd1
+# endif
movdqa 32(%rdi), %xmm2
pxor 32(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(48bytesin256)
+# ifdef __CHKP__
+ bndcu 48(%rdi), %bnd0
+ bndcu 48(%rsi), %bnd1
+# endif
movdqa 48(%rdi), %xmm2
pxor 48(%rsi), %xmm2
ptest %xmm2, %xmm0
@@ -425,11 +620,19 @@ L(less128bytesin2aligned):
cmp $32, %rdx
jb L(less32bytesin64in2alinged)
+# ifdef __CHKP__
+ bndcu 64(%rdi), %bnd0
+ bndcu 64(%rsi), %bnd1
+# endif
movdqa 64(%rdi), %xmm2
pxor 64(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(80bytesin256)
+# ifdef __CHKP__
+ bndcu 80(%rdi), %bnd0
+ bndcu 80(%rsi), %bnd1
+# endif
movdqa 80(%rdi), %xmm2
pxor 80(%rsi), %xmm2
ptest %xmm2, %xmm0
@@ -453,41 +656,73 @@ L(128bytesormorein2aligned):
L(less256bytesin2alinged):
sub $128, %rdx
+# ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rsi), %bnd1
+# endif
movdqa (%rdi), %xmm2
pxor (%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(16bytesin256)
+# ifdef __CHKP__
+ bndcu 16(%rdi), %bnd0
+ bndcu 16(%rsi), %bnd1
+# endif
movdqa 16(%rdi), %xmm2
pxor 16(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(32bytesin256)
+# ifdef __CHKP__
+ bndcu 32(%rdi), %bnd0
+ bndcu 32(%rsi), %bnd1
+# endif
movdqa 32(%rdi), %xmm2
pxor 32(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(48bytesin256)
+# ifdef __CHKP__
+ bndcu 48(%rdi), %bnd0
+ bndcu 48(%rsi), %bnd1
+# endif
movdqa 48(%rdi), %xmm2
pxor 48(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(64bytesin256)
+# ifdef __CHKP__
+ bndcu 64(%rdi), %bnd0
+ bndcu 64(%rsi), %bnd1
+# endif
movdqa 64(%rdi), %xmm2
pxor 64(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(80bytesin256)
+# ifdef __CHKP__
+ bndcu 80(%rdi), %bnd0
+ bndcu 80(%rsi), %bnd1
+# endif
movdqa 80(%rdi), %xmm2
pxor 80(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(96bytesin256)
+# ifdef __CHKP__
+ bndcu 96(%rdi), %bnd0
+ bndcu 96(%rsi), %bnd1
+# endif
movdqa 96(%rdi), %xmm2
pxor 96(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(112bytesin256)
+# ifdef __CHKP__
+ bndcu 112(%rdi), %bnd0
+ bndcu 112(%rsi), %bnd1
+# endif
movdqa 112(%rdi), %xmm2
pxor 112(%rsi), %xmm2
ptest %xmm2, %xmm0
@@ -502,11 +737,19 @@ L(less256bytesin2alinged):
cmp $32, %rdx
jb L(less32bytesin128in2aligned)
+# ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rsi), %bnd1
+# endif
movdqu (%rdi), %xmm2
pxor (%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(16bytesin256)
+# ifdef __CHKP__
+ bndcu 16(%rdi), %bnd0
+ bndcu 16(%rsi), %bnd1
+# endif
movdqu 16(%rdi), %xmm2
pxor 16(%rsi), %xmm2
ptest %xmm2, %xmm0
@@ -523,81 +766,145 @@ L(less32bytesin128in2aligned):
L(256bytesormorein2aligned):
sub $256, %rdx
+# ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rsi), %bnd1
+# endif
movdqa (%rdi), %xmm2
pxor (%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(16bytesin256)
+# ifdef __CHKP__
+ bndcu 16(%rdi), %bnd0
+ bndcu 16(%rsi), %bnd1
+# endif
movdqa 16(%rdi), %xmm2
pxor 16(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(32bytesin256)
+# ifdef __CHKP__
+ bndcu 32(%rdi), %bnd0
+ bndcu 32(%rsi), %bnd1
+# endif
movdqa 32(%rdi), %xmm2
pxor 32(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(48bytesin256)
+# ifdef __CHKP__
+ bndcu 48(%rdi), %bnd0
+ bndcu 48(%rsi), %bnd1
+# endif
movdqa 48(%rdi), %xmm2
pxor 48(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(64bytesin256)
+# ifdef __CHKP__
+ bndcu 64(%rdi), %bnd0
+ bndcu 64(%rsi), %bnd1
+# endif
movdqa 64(%rdi), %xmm2
pxor 64(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(80bytesin256)
+# ifdef __CHKP__
+ bndcu 80(%rdi), %bnd0
+ bndcu 80(%rsi), %bnd1
+# endif
movdqa 80(%rdi), %xmm2
pxor 80(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(96bytesin256)
+# ifdef __CHKP__
+ bndcu 96(%rdi), %bnd0
+ bndcu 96(%rsi), %bnd1
+# endif
movdqa 96(%rdi), %xmm2
pxor 96(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(112bytesin256)
+# ifdef __CHKP__
+ bndcu 112(%rdi), %bnd0
+ bndcu 112(%rsi), %bnd1
+# endif
movdqa 112(%rdi), %xmm2
pxor 112(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(128bytesin256)
+# ifdef __CHKP__
+ bndcu 128(%rdi), %bnd0
+ bndcu 128(%rsi), %bnd1
+# endif
movdqa 128(%rdi), %xmm2
pxor 128(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(144bytesin256)
+# ifdef __CHKP__
+ bndcu 144(%rdi), %bnd0
+ bndcu 144(%rsi), %bnd1
+# endif
movdqa 144(%rdi), %xmm2
pxor 144(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(160bytesin256)
+# ifdef __CHKP__
+ bndcu 160(%rdi), %bnd0
+ bndcu 160(%rsi), %bnd1
+# endif
movdqa 160(%rdi), %xmm2
pxor 160(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(176bytesin256)
+# ifdef __CHKP__
+ bndcu 176(%rdi), %bnd0
+ bndcu 176(%rsi), %bnd1
+# endif
movdqa 176(%rdi), %xmm2
pxor 176(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(192bytesin256)
+# ifdef __CHKP__
+ bndcu 192(%rdi), %bnd0
+ bndcu 192(%rsi), %bnd1
+# endif
movdqa 192(%rdi), %xmm2
pxor 192(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(208bytesin256)
+# ifdef __CHKP__
+ bndcu 208(%rdi), %bnd0
+ bndcu 208(%rsi), %bnd1
+# endif
movdqa 208(%rdi), %xmm2
pxor 208(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(224bytesin256)
+# ifdef __CHKP__
+ bndcu 224(%rdi), %bnd0
+ bndcu 224(%rsi), %bnd1
+# endif
movdqa 224(%rdi), %xmm2
pxor 224(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(240bytesin256)
+# ifdef __CHKP__
+ bndcu 240(%rdi), %bnd0
+ bndcu 240(%rsi), %bnd1
+# endif
movdqa 240(%rdi), %xmm2
pxor 240(%rsi), %xmm2
ptest %xmm2, %xmm0
@@ -648,18 +955,34 @@ L(512bytesormorein2aligned):
sub $64, %rdx
ALIGN (4)
L(64bytesormore_loopin2aligned):
+# ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rsi), %bnd1
+# endif
movdqa (%rdi), %xmm2
pxor (%rsi), %xmm2
movdqa %xmm2, %xmm1
+# ifdef __CHKP__
+ bndcu 16(%rdi), %bnd0
+ bndcu 16(%rsi), %bnd1
+# endif
movdqa 16(%rdi), %xmm3
pxor 16(%rsi), %xmm3
por %xmm3, %xmm1
+# ifdef __CHKP__
+ bndcu 32(%rdi), %bnd0
+ bndcu 32(%rsi), %bnd1
+# endif
movdqa 32(%rdi), %xmm4
pxor 32(%rsi), %xmm4
por %xmm4, %xmm1
+# ifdef __CHKP__
+ bndcu 48(%rdi), %bnd0
+ bndcu 48(%rsi), %bnd1
+# endif
movdqa 48(%rdi), %xmm5
pxor 48(%rsi), %xmm5
por %xmm5, %xmm1
@@ -682,18 +1005,34 @@ L(L2_L3_cache_aglined):
L(L2_L3_aligned_128bytes_loop):
prefetchnta 0x1c0(%rdi)
prefetchnta 0x1c0(%rsi)
+# ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rsi), %bnd1
+# endif
movdqa (%rdi), %xmm2
pxor (%rsi), %xmm2
movdqa %xmm2, %xmm1
+# ifdef __CHKP__
+ bndcu 16(%rdi), %bnd0
+ bndcu 16(%rsi), %bnd1
+# endif
movdqa 16(%rdi), %xmm3
pxor 16(%rsi), %xmm3
por %xmm3, %xmm1
+# ifdef __CHKP__
+ bndcu 32(%rdi), %bnd0
+ bndcu 32(%rsi), %bnd1
+# endif
movdqa 32(%rdi), %xmm4
pxor 32(%rsi), %xmm4
por %xmm4, %xmm1
+# ifdef __CHKP__
+ bndcu 48(%rdi), %bnd0
+ bndcu 48(%rsi), %bnd1
+# endif
movdqa 48(%rdi), %xmm5
pxor 48(%rsi), %xmm5
por %xmm5, %xmm1
diff --git a/sysdeps/x86_64/multiarch/memcpy-c.c b/sysdeps/x86_64/multiarch/memcpy-c.c
new file mode 100644
index 0000000000..6fa50eada1
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memcpy-c.c
@@ -0,0 +1,80 @@
+/* C-version of memcpy for using when Intel MPX is on
+ in order to prosess with a buffer of pointers correctly.
+ Copyright (C) 2013 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <stddef.h>
+
+void *
+__memcpy (void *dst, const void *src, size_t n)
+{
+ if (!n) return dst;
+
+ __bnd_chk_ptr_lbounds(dst);
+ __bnd_chk_ptr_ubounds(dst+n-1);
+#ifndef __CHKWR__
+ __bnd_chk_ptr_lbounds(src);
+ __bnd_chk_ptr_ubounds(src+n-1);
+#endif
+
+ return chkp_memcpy_nochk(dst, src, n);
+}
+
+void *
+chkp_memcpy_nochk (void *dst, const void *src, size_t n)
+{
+ const char *s = src;
+ char *d = dst;
+ void *ret = dst;
+ size_t offset_src = ((size_t) s) & (sizeof(size_t) - 1);
+ size_t offset_dst = ((size_t) d) & (sizeof(size_t) - 1);
+
+ if (offset_src != offset_dst)
+ {
+ while (n--)
+ *d++ = *s++;
+ }
+ else
+ {
+ if (offset_src) offset_src = sizeof(size_t) - offset_src;
+ while (n-- && offset_src--)
+ *d++ = *s++;
+ n++;
+ if (!n) return ret;
+ void **d1 = (void **)d;
+ void **s1 = (void **)s;
+ while (n >= sizeof(void *))
+ {
+ n -= sizeof(void *);
+ *d1++ = *s1++;
+ }
+ s = (char *)s1;
+ d = (char *)d1;
+ while (n--)
+ *d++ = *s++;
+ }
+ return ret;
+}
+
+weak_alias (__memcpy, __GI_memcpy)
+
+# if defined SHARED && !defined NOT_IN_libc && !defined IA32
+# include <shlib-compat.h>
+versioned_symbol (libc, __memcpy, memcpy, GLIBC_2_14);
+# else
+weak_alias (__memcpy, memcpy)
+# endif
diff --git a/sysdeps/x86_64/multiarch/memcpy-ssse3-back-1.S b/sysdeps/x86_64/multiarch/memcpy-ssse3-back-1.S
new file mode 100644
index 0000000000..7fedbeef8e
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memcpy-ssse3-back-1.S
@@ -0,0 +1,5 @@
+/* optimized version of memcpy without any checks or copying bounds. */
+#define MEMCPY chkp_memcpy_nobnd_nochk
+#undef __CHKP__
+#undef __CHKWR__
+#include "memcpy-ssse3-back.S"
diff --git a/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S b/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S
index fc9fcef27d..16b4e680a1 100644
--- a/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S
+++ b/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S
@@ -27,7 +27,11 @@
#include "asm-syntax.h"
#ifndef MEMCPY
-# define MEMCPY __memcpy_ssse3_back
+# if defined __CHKP__ || defined __CHKWR__
+# define MEMCPY chkp_memcpy_nobnd
+# else
+# define MEMCPY __memcpy_ssse3_back
+# endif
# define MEMCPY_CHK __memcpy_chk_ssse3_back
#endif
@@ -48,7 +52,7 @@
ud2
.section .text.ssse3,"ax",@progbits
-#if !defined USE_AS_BCOPY
+#if !defined USE_AS_BCOPY && defined MEMCPY_CHK
ENTRY (MEMCPY_CHK)
cmpq %rdx, %rcx
jb HIDDEN_JUMPTARGET (__chk_fail)
@@ -56,6 +60,15 @@ END (MEMCPY_CHK)
#endif
ENTRY (MEMCPY)
+#ifdef __CHKP__
+ testq %rdx, %rdx
+ jz L(NoEntryCheck)
+ bndcl (%rdi), %bnd0
+ bndcu -1(%rdi, %rdx), %bnd0
+ bndcl (%rsi), %bnd1
+ bndcu -1(%rsi, %rdx), %bnd1
+#endif
+
mov %rdi, %rax
#ifdef USE_AS_MEMPCPY
add %rdx, %rax
@@ -87,6 +100,15 @@ L(bk_write):
BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
#endif
+#ifdef __CHKP__
+L(NoEntryCheck):
+ mov %rdi, %rax
+# ifdef USE_AS_MEMPCPY
+ add %rdx, %rax
+# endif
+ ret
+#endif
+
ALIGN (4)
L(144bytesormore):
diff --git a/sysdeps/x86_64/multiarch/memcpy.S b/sysdeps/x86_64/multiarch/memcpy.S
index a1e5031376..fc5ab2da03 100644
--- a/sysdeps/x86_64/multiarch/memcpy.S
+++ b/sysdeps/x86_64/multiarch/memcpy.S
@@ -18,14 +18,15 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-#include <sysdep.h>
-#include <shlib-compat.h>
-#include <init-arch.h>
+#if !defined __CHKP__ && !defined __CHKWR__
+# include <sysdep.h>
+# include <shlib-compat.h>
+# include <init-arch.h>
/* Define multiple versions only for the definition in lib and for
DSO. In static binaries we need memcpy before the initialization
happened. */
-#if defined SHARED && !defined NOT_IN_libc
+# if defined SHARED && !defined NOT_IN_libc
.text
ENTRY(__new_memcpy)
.type __new_memcpy, @gnu_indirect_function
@@ -43,37 +44,39 @@ ENTRY(__new_memcpy)
3: ret
END(__new_memcpy)
-# undef ENTRY
-# define ENTRY(name) \
+# undef ENTRY
+# define ENTRY(name) \
.type __memcpy_sse2, @function; \
.globl __memcpy_sse2; \
.hidden __memcpy_sse2; \
.p2align 4; \
__memcpy_sse2: cfi_startproc; \
CALL_MCOUNT
-# undef END
-# define END(name) \
+# undef END
+# define END(name) \
cfi_endproc; .size __memcpy_sse2, .-__memcpy_sse2
-# undef ENTRY_CHK
-# define ENTRY_CHK(name) \
+# undef ENTRY_CHK
+# define ENTRY_CHK(name) \
.type __memcpy_chk_sse2, @function; \
.globl __memcpy_chk_sse2; \
.p2align 4; \
__memcpy_chk_sse2: cfi_startproc; \
CALL_MCOUNT
-# undef END_CHK
-# define END_CHK(name) \
+# undef END_CHK
+# define END_CHK(name) \
cfi_endproc; .size __memcpy_chk_sse2, .-__memcpy_chk_sse2
-# undef libc_hidden_builtin_def
+# undef libc_hidden_builtin_def
/* It doesn't make sense to send libc-internal memcpy calls through a PLT.
The speedup we get from using SSSE3 instruction is likely eaten away
by the indirect call in the PLT. */
-# define libc_hidden_builtin_def(name) \
+# define libc_hidden_builtin_def(name) \
.globl __GI_memcpy; __GI_memcpy = __memcpy_sse2
versioned_symbol (libc, __new_memcpy, memcpy, GLIBC_2_14);
-#endif
+# endif
+
+# include "../memcpy.S"
-#include "../memcpy.S"
+#endif
diff --git a/sysdeps/x86_64/multiarch/memcpy_chk-c.c b/sysdeps/x86_64/multiarch/memcpy_chk-c.c
new file mode 100644
index 0000000000..1eee86c639
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memcpy_chk-c.c
@@ -0,0 +1 @@
+#include <debug/memcpy_chk.c>
diff --git a/sysdeps/x86_64/multiarch/memcpy_chk.S b/sysdeps/x86_64/multiarch/memcpy_chk.S
index ad01d8cd9f..6f87f2686d 100644
--- a/sysdeps/x86_64/multiarch/memcpy_chk.S
+++ b/sysdeps/x86_64/multiarch/memcpy_chk.S
@@ -18,14 +18,15 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-#include <sysdep.h>
-#include <init-arch.h>
+#if !defined __CHKP__ && !defined __CHKWR__
+# include <sysdep.h>
+# include <init-arch.h>
/* Define multiple versions only for the definition in lib and for
DSO. There are no multiarch memcpy functions for static binaries.
*/
-#ifndef NOT_IN_libc
-# ifdef SHARED
+# ifndef NOT_IN_libc
+# ifdef SHARED
.text
ENTRY(__memcpy_chk)
.type __memcpy_chk, @gnu_indirect_function
@@ -41,7 +42,8 @@ ENTRY(__memcpy_chk)
leaq __memcpy_chk_ssse3_back(%rip), %rax
2: ret
END(__memcpy_chk)
-# else
-# include "../memcpy_chk.S"
+# else
+# include "../memcpy_chk.S"
+# endif
# endif
#endif
diff --git a/sysdeps/x86_64/multiarch/memmove-c.c b/sysdeps/x86_64/multiarch/memmove-c.c
new file mode 100644
index 0000000000..7111128e75
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memmove-c.c
@@ -0,0 +1,118 @@
+/* C-version of memmove for using when Intel MPX is enabled
+ in order to prosess with a buffer of pointers correctly.
+ Copyright (C) 2013 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <stddef.h>
+
+void *
+__memmove (void *dst, const void *src, size_t n)
+{
+ if (n == 0) return dst;
+
+ __bnd_chk_ptr_lbounds(dst);
+ __bnd_chk_ptr_ubounds(dst+n-1);
+#ifndef __CHKWR__
+ __bnd_chk_ptr_lbounds(src);
+ __bnd_chk_ptr_ubounds(src+n-1);
+#endif
+ return chkp_memmove_nochk(dst, src, n);
+}
+
+
+void *
+chkp_memmove_nochk (void *dst, const void *src, size_t n)
+{
+ const char *s = src;
+ char *d = dst;
+ void *ret = dst;
+ size_t offset_src = ((size_t) s) & (sizeof(size_t) - 1);
+ size_t offset_dst = ((size_t) d) & (sizeof(size_t) - 1);
+
+ if (offset_src != offset_dst)
+ {
+ if (s < d)
+ {
+ /* backward copying */
+ d += n;
+ s += n;
+ while (n--)
+ *--d = *--s;
+ }
+ else
+ /* forward copying */
+ while (n--)
+ *d++ = *s++;
+ }
+ else
+ {
+ if (s < d)
+ {
+ offset_src = (offset_src + (size_t)src) & (sizeof(size_t) - 1);
+ /* backward copying */
+ d += n;
+ s += n;
+ while (n-- && offset_src--)
+ *--d = *--s;
+ n++;
+ if (!n) return ret;
+ void **d1 = (void **)d;
+ void **s1 = (void **)s;
+ while (n >= sizeof(void *))
+ {
+ n -= sizeof(void *);
+ *--d1 = *--s1;
+ }
+ s = (char *)s1;
+ d = (char *)d1;
+ while (n--)
+ *--d = *--s;
+ }
+ else
+ {
+ if (offset_src) offset_src = sizeof(size_t) - offset_src;
+ /* forward copying */
+ while (n-- && offset_src--)
+ *d++ = *s++;
+ n++;
+ if (!n) return ret;
+ void **d1 = (void **)d;
+ void **s1 = (void **)s;
+ while (n >= sizeof(void *))
+ {
+ n -= sizeof(void *);
+ *d1++ = *s1++;
+ }
+ s = (char *)s1;
+ d = (char *)d1;
+ while (n--)
+ *d++ = *s++;
+ }
+ }
+ return ret;
+}
+
+weak_alias (__memmove, __libc_memmove)
+weak_alias (__memmove, __GI_memmove)
+weak_alias (__memmove, memmove)
+
+# if defined SHARED && !defined NOT_IN_libc
+# include <shlib-compat.h>
+# if SHLIB_COMPAT (libc, GLIBC_2_2_5, GLIBC_2_14)
+compat_symbol (libc, memmove, memcpy, GLIBC_2_2_5);
+# endif
+# endif
diff --git a/sysdeps/x86_64/multiarch/memmove-ssse3-back-1.S b/sysdeps/x86_64/multiarch/memmove-ssse3-back-1.S
new file mode 100644
index 0000000000..2a1f3e67b7
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memmove-ssse3-back-1.S
@@ -0,0 +1,6 @@
+/* optimized version of memmove without any checks or copying bounds. */
+#define USE_AS_MEMMOVE
+#define MEMCPY chkp_memmove_nobnd_nochk
+#undef __CHKP__
+#undef __CHKWR__
+#include "memcpy-ssse3-back.S"
diff --git a/sysdeps/x86_64/multiarch/memmove-ssse3-back.S b/sysdeps/x86_64/multiarch/memmove-ssse3-back.S
index f9a4e9aff9..478141b14a 100644
--- a/sysdeps/x86_64/multiarch/memmove-ssse3-back.S
+++ b/sysdeps/x86_64/multiarch/memmove-ssse3-back.S
@@ -1,4 +1,10 @@
#define USE_AS_MEMMOVE
-#define MEMCPY __memmove_ssse3_back
+#if defined __CHKP__ || defined __CHKWR__
+/* version of memmove with no copying of bounds support
+ if there are pointers in the source buffer. */
+# define MEMCPY chkp_memmove_nobnd
+# else
+# define MEMCPY __memmove_ssse3_back
+#endif
#define MEMCPY_CHK __memmove_chk_ssse3_back
#include "memcpy-ssse3-back.S"
diff --git a/sysdeps/x86_64/multiarch/memmove.c b/sysdeps/x86_64/multiarch/memmove.c
index 8149c487d5..0d2c6f0266 100644
--- a/sysdeps/x86_64/multiarch/memmove.c
+++ b/sysdeps/x86_64/multiarch/memmove.c
@@ -17,31 +17,32 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-#ifndef NOT_IN_libc
-# define MEMMOVE __memmove_sse2
-# ifdef SHARED
-# undef libc_hidden_builtin_def
-# define libc_hidden_builtin_def(name) \
+#ifndef __CHKP__
+# ifndef NOT_IN_libc
+# define MEMMOVE __memmove_sse2
+# ifdef SHARED
+# undef libc_hidden_builtin_def
+# define libc_hidden_builtin_def(name) \
__hidden_ver1 (__memmove_sse2, __GI_memmove, __memmove_sse2);
-# endif
+# endif
/* Redefine memmove so that the compiler won't complain about the type
mismatch with the IFUNC selector in strong_alias, below. */
-# undef memmove
-# define memmove __redirect_memmove
-# include <string.h>
-# undef memmove
+# undef memmove
+# define memmove __redirect_memmove
+# include <string.h>
+# undef memmove
extern __typeof (__redirect_memmove) __memmove_sse2 attribute_hidden;
extern __typeof (__redirect_memmove) __memmove_ssse3 attribute_hidden;
extern __typeof (__redirect_memmove) __memmove_ssse3_back attribute_hidden;
-#endif
+# endif
-#include "string/memmove.c"
+# include "string/memmove.c"
-#ifndef NOT_IN_libc
-# include <shlib-compat.h>
-# include "init-arch.h"
+# ifndef NOT_IN_libc
+# include <shlib-compat.h>
+# include "init-arch.h"
/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle
ifunc symbol properly. */
@@ -54,7 +55,8 @@ libc_ifunc (__libc_memmove,
strong_alias (__libc_memmove, memmove)
-# if SHLIB_COMPAT (libc, GLIBC_2_2_5, GLIBC_2_14)
+# if SHLIB_COMPAT (libc, GLIBC_2_2_5, GLIBC_2_14)
compat_symbol (libc, memmove, memcpy, GLIBC_2_2_5);
+# endif
# endif
#endif
diff --git a/sysdeps/x86_64/multiarch/memmove_chk-c.c b/sysdeps/x86_64/multiarch/memmove_chk-c.c
new file mode 100644
index 0000000000..bbf53d00d3
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memmove_chk-c.c
@@ -0,0 +1 @@
+#include <debug/memmove_chk.c>
diff --git a/sysdeps/x86_64/multiarch/memmove_chk.c b/sysdeps/x86_64/multiarch/memmove_chk.c
index 17ed460324..c1b0b9304b 100644
--- a/sysdeps/x86_64/multiarch/memmove_chk.c
+++ b/sysdeps/x86_64/multiarch/memmove_chk.c
@@ -17,19 +17,21 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-#include <string.h>
-#include "init-arch.h"
+#ifndef __CHKP__
+# include <string.h>
+# include "init-arch.h"
-#define MEMMOVE_CHK __memmove_chk_sse2
+# define MEMMOVE_CHK __memmove_chk_sse2
extern __typeof (__memmove_chk) __memmove_chk_sse2 attribute_hidden;
extern __typeof (__memmove_chk) __memmove_chk_ssse3 attribute_hidden;
extern __typeof (__memmove_chk) __memmove_chk_ssse3_back attribute_hidden;
-#include "debug/memmove_chk.c"
+# include "debug/memmove_chk.c"
libc_ifunc (__memmove_chk,
HAS_SSSE3
? (HAS_FAST_COPY_BACKWARD
? __memmove_chk_ssse3_back : __memmove_chk_ssse3)
: __memmove_chk_sse2);
+#endif
diff --git a/sysdeps/x86_64/multiarch/mempcpy-c.c b/sysdeps/x86_64/multiarch/mempcpy-c.c
new file mode 100644
index 0000000000..522fb86e3e
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/mempcpy-c.c
@@ -0,0 +1,36 @@
+/* C-version of mempcpy for using when Intel MPX is enabled
+ in order to process with an array of pointers correctly.
+ Copyright (C) 2013 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <stddef.h>
+
+void *
+mempcpy (void *dst, const void *src, size_t n)
+{
+ return memcpy(dst, src, n) + n;
+}
+
+void *
+chkp_mempcpy_nochk (void *dst, const void *src, size_t n)
+{
+ return chkp_memcpy_nochk(dst, src, n) + n;
+}
+
+weak_alias (mempcpy, __GI_mempcpy)
+weak_alias (mempcpy, __GI___mempcpy)
+weak_alias (mempcpy, __mempcpy)
diff --git a/sysdeps/x86_64/multiarch/mempcpy-ssse3-back-1.S b/sysdeps/x86_64/multiarch/mempcpy-ssse3-back-1.S
new file mode 100644
index 0000000000..eb929f4182
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/mempcpy-ssse3-back-1.S
@@ -0,0 +1,6 @@
+/* optimized version of mempcpy without any checks or copying bounds. */
+#define USE_AS_MEMPCPY
+#define MEMCPY chkp_mempcpy_nobnd_nochk
+#undef __CHKP__
+#undef __CHKWR__
+#include "memcpy-ssse3-back.S"
diff --git a/sysdeps/x86_64/multiarch/mempcpy-ssse3-back.S b/sysdeps/x86_64/multiarch/mempcpy-ssse3-back.S
index 82ffacb8fb..f32ecfc76e 100644
--- a/sysdeps/x86_64/multiarch/mempcpy-ssse3-back.S
+++ b/sysdeps/x86_64/multiarch/mempcpy-ssse3-back.S
@@ -1,4 +1,12 @@
#define USE_AS_MEMPCPY
-#define MEMCPY __mempcpy_ssse3_back
-#define MEMCPY_CHK __mempcpy_chk_ssse3_back
+
+#if defined __CHKP__ || defined __CHKWR__
+/* version of mempcpy with no copying of bounds support
+ if there are pointers in the source buffer. */
+# define MEMCPY chkp_mempcpy_nobnd
+#else
+# define MEMCPY __mempcpy_ssse3_back
+#endif
+
+#define MEMCPY_CHK __mempcpy_chk_ssse3_back
#include "memcpy-ssse3-back.S"
diff --git a/sysdeps/x86_64/multiarch/mempcpy.S b/sysdeps/x86_64/multiarch/mempcpy.S
index b8b7fcd121..4ec5825989 100644
--- a/sysdeps/x86_64/multiarch/mempcpy.S
+++ b/sysdeps/x86_64/multiarch/mempcpy.S
@@ -18,13 +18,14 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-#include <sysdep.h>
-#include <init-arch.h>
+#if !defined __CHKP__ && !defined __CHKWR__
+# include <sysdep.h>
+# include <init-arch.h>
/* Define multiple versions only for the definition in lib and for
DSO. In static binaries we need mempcpy before the initialization
happened. */
-#if defined SHARED && !defined NOT_IN_libc
+# if defined SHARED && !defined NOT_IN_libc
ENTRY(__mempcpy)
.type __mempcpy, @gnu_indirect_function
cmpl $0, KIND_OFFSET+__cpu_features(%rip)
@@ -40,38 +41,40 @@ ENTRY(__mempcpy)
2: ret
END(__mempcpy)
-# undef ENTRY
-# define ENTRY(name) \
+# undef ENTRY
+# define ENTRY(name) \
.type __mempcpy_sse2, @function; \
.p2align 4; \
.globl __mempcpy_sse2; \
.hidden __mempcpy_sse2; \
__mempcpy_sse2: cfi_startproc; \
CALL_MCOUNT
-# undef END
-# define END(name) \
+# undef END
+# define END(name) \
cfi_endproc; .size __mempcpy_sse2, .-__mempcpy_sse2
-# undef ENTRY_CHK
-# define ENTRY_CHK(name) \
+# undef ENTRY_CHK
+# define ENTRY_CHK(name) \
.type __mempcpy_chk_sse2, @function; \
.globl __mempcpy_chk_sse2; \
.p2align 4; \
__mempcpy_chk_sse2: cfi_startproc; \
CALL_MCOUNT
-# undef END_CHK
-# define END_CHK(name) \
+# undef END_CHK
+# define END_CHK(name) \
cfi_endproc; .size __mempcpy_chk_sse2, .-__mempcpy_chk_sse2
-# undef libc_hidden_def
-# undef libc_hidden_builtin_def
+# undef libc_hidden_def
+# undef libc_hidden_builtin_def
/* It doesn't make sense to send libc-internal mempcpy calls through a PLT.
The speedup we get from using SSSE3 instruction is likely eaten away
by the indirect call in the PLT. */
-# define libc_hidden_def(name) \
+# define libc_hidden_def(name) \
.globl __GI_mempcpy; __GI_mempcpy = __mempcpy_sse2
-# define libc_hidden_builtin_def(name) \
+# define libc_hidden_builtin_def(name) \
.globl __GI___mempcpy; __GI___mempcpy = __mempcpy_sse2
-#endif
+# endif
+
+# include "../mempcpy.S"
-#include "../mempcpy.S"
+#endif
diff --git a/sysdeps/x86_64/multiarch/mempcpy_chk-c.c b/sysdeps/x86_64/multiarch/mempcpy_chk-c.c
new file mode 100644
index 0000000000..ba170784c3
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/mempcpy_chk-c.c
@@ -0,0 +1 @@
+#include <debug/mempcpy_chk.c>
diff --git a/sysdeps/x86_64/multiarch/mempcpy_chk.S b/sysdeps/x86_64/multiarch/mempcpy_chk.S
index 3801db399b..98acf9691c 100644
--- a/sysdeps/x86_64/multiarch/mempcpy_chk.S
+++ b/sysdeps/x86_64/multiarch/mempcpy_chk.S
@@ -18,14 +18,15 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-#include <sysdep.h>
-#include <init-arch.h>
+#if !defined __CHKP__ && !defined __CHKWR__
+# include <sysdep.h>
+# include <init-arch.h>
/* Define multiple versions only for the definition in lib and for
DSO. There are no multiarch mempcpy functions for static binaries.
*/
-#ifndef NOT_IN_libc
-# ifdef SHARED
+# ifndef NOT_IN_libc
+# ifdef SHARED
.text
ENTRY(__mempcpy_chk)
.type __mempcpy_chk, @gnu_indirect_function
@@ -41,7 +42,8 @@ ENTRY(__mempcpy_chk)
leaq __mempcpy_chk_ssse3_back(%rip), %rax
2: ret
END(__mempcpy_chk)
-# else
-# include "../mempcpy_chk.S"
+# else
+# include "../mempcpy_chk.S"
+# endif
# endif
#endif
diff --git a/sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S b/sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S
index 028c6d3d74..a3535ad500 100644
--- a/sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S
+++ b/sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S
@@ -25,6 +25,14 @@
# define STRCAT __strcat_sse2_unaligned
# endif
+# ifdef __CHKP__
+# define RETURN \
+ bndcu -1(%rdi, %rax), %bnd0; \
+ ret
+# else
+# define RETURN ret
+# endif
+
# define USE_AS_STRCAT
.text
@@ -37,6 +45,10 @@ ENTRY (STRCAT)
/* Inline corresponding strlen file, temporary until new strcpy
implementation gets merged. */
+# ifdef __CHKP__
+ bndcl (%rdi), %bnd0
+ bndcu (%rdi), %bnd0
+# endif
xor %rax, %rax
mov %edi, %ecx
and $0x3f, %ecx
@@ -67,84 +79,132 @@ L(align16_start):
pxor %xmm1, %xmm1
pxor %xmm2, %xmm2
pxor %xmm3, %xmm3
+# ifdef __CHKP__
+ bndcu 16(%rax), %bnd0
+# endif
pcmpeqb 16(%rax), %xmm0
pmovmskb %xmm0, %edx
test %edx, %edx
jnz L(exit16)
+# ifdef __CHKP__
+ bndcu 32(%rax), %bnd0
+# endif
pcmpeqb 32(%rax), %xmm1
pmovmskb %xmm1, %edx
test %edx, %edx
jnz L(exit32)
+# ifdef __CHKP__
+ bndcu 48(%rax), %bnd0
+# endif
pcmpeqb 48(%rax), %xmm2
pmovmskb %xmm2, %edx
test %edx, %edx
jnz L(exit48)
+# ifdef __CHKP__
+ bndcu 64(%rax), %bnd0
+# endif
pcmpeqb 64(%rax), %xmm3
pmovmskb %xmm3, %edx
test %edx, %edx
jnz L(exit64)
+# ifdef __CHKP__
+ bndcu 80(%rax), %bnd0
+# endif
pcmpeqb 80(%rax), %xmm0
add $64, %rax
pmovmskb %xmm0, %edx
test %edx, %edx
jnz L(exit16)
+# ifdef __CHKP__
+ bndcu 32(%rax), %bnd0
+# endif
pcmpeqb 32(%rax), %xmm1
pmovmskb %xmm1, %edx
test %edx, %edx
jnz L(exit32)
+# ifdef __CHKP__
+ bndcu 48(%rax), %bnd0
+# endif
pcmpeqb 48(%rax), %xmm2
pmovmskb %xmm2, %edx
test %edx, %edx
jnz L(exit48)
+# ifdef __CHKP__
+ bndcu 64(%rax), %bnd0
+# endif
pcmpeqb 64(%rax), %xmm3
pmovmskb %xmm3, %edx
test %edx, %edx
jnz L(exit64)
+# ifdef __CHKP__
+ bndcu 80(%rax), %bnd0
+# endif
pcmpeqb 80(%rax), %xmm0
add $64, %rax
pmovmskb %xmm0, %edx
test %edx, %edx
jnz L(exit16)
+# ifdef __CHKP__
+ bndcu 32(%rax), %bnd0
+# endif
pcmpeqb 32(%rax), %xmm1
pmovmskb %xmm1, %edx
test %edx, %edx
jnz L(exit32)
+# ifdef __CHKP__
+ bndcu 48(%rax), %bnd0
+# endif
pcmpeqb 48(%rax), %xmm2
pmovmskb %xmm2, %edx
test %edx, %edx
jnz L(exit48)
+# ifdef __CHKP__
+ bndcu 64(%rax), %bnd0
+# endif
pcmpeqb 64(%rax), %xmm3
pmovmskb %xmm3, %edx
test %edx, %edx
jnz L(exit64)
+# ifdef __CHKP__
+ bndcu 80(%rax), %bnd0
+# endif
pcmpeqb 80(%rax), %xmm0
add $64, %rax
pmovmskb %xmm0, %edx
test %edx, %edx
jnz L(exit16)
+# ifdef __CHKP__
+ bndcu 32(%rax), %bnd0
+# endif
pcmpeqb 32(%rax), %xmm1
pmovmskb %xmm1, %edx
test %edx, %edx
jnz L(exit32)
+# ifdef __CHKP__
+ bndcu 48(%rax), %bnd0
+# endif
pcmpeqb 48(%rax), %xmm2
pmovmskb %xmm2, %edx
test %edx, %edx
jnz L(exit48)
+# ifdef __CHKP__
+ bndcu 64(%rax), %bnd0
+# endif
pcmpeqb 64(%rax), %xmm3
pmovmskb %xmm3, %edx
test %edx, %edx
@@ -153,6 +213,9 @@ L(align16_start):
test $0x3f, %rax
jz L(align64_loop)
+# ifdef __CHKP__
+ bndcu 80(%rax), %bnd0
+# endif
pcmpeqb 80(%rax), %xmm0
add $80, %rax
pmovmskb %xmm0, %edx
@@ -162,6 +225,9 @@ L(align16_start):
test $0x3f, %rax
jz L(align64_loop)
+# ifdef __CHKP__
+ bndcu 16(%rax), %bnd0
+# endif
pcmpeqb 16(%rax), %xmm1
add $16, %rax
pmovmskb %xmm1, %edx
@@ -171,6 +237,9 @@ L(align16_start):
test $0x3f, %rax
jz L(align64_loop)
+# ifdef __CHKP__
+ bndcu 16(%rax), %bnd0
+# endif
pcmpeqb 16(%rax), %xmm2
add $16, %rax
pmovmskb %xmm2, %edx
@@ -180,6 +249,9 @@ L(align16_start):
test $0x3f, %rax
jz L(align64_loop)
+# ifdef __CHKP__
+ bndcu 16(%rax), %bnd0
+# endif
pcmpeqb 16(%rax), %xmm3
add $16, %rax
pmovmskb %xmm3, %edx
@@ -187,8 +259,12 @@ L(align16_start):
jnz L(exit)
add $16, %rax
+
.p2align 4
L(align64_loop):
+# ifdef __CHKP__
+ bndcu (%rax), %bnd0
+# endif
movaps (%rax), %xmm4
pminub 16(%rax), %xmm4
movaps 32(%rax), %xmm5
diff --git a/sysdeps/x86_64/multiarch/strchr.S b/sysdeps/x86_64/multiarch/strchr.S
index f170238b55..4311e8689c 100644
--- a/sysdeps/x86_64/multiarch/strchr.S
+++ b/sysdeps/x86_64/multiarch/strchr.S
@@ -91,6 +91,10 @@ __strchr_sse42:
CALL_MCOUNT
testb %sil, %sil
je __strend_sse4
+# ifdef __CHKP__
+ bndcl (%rdi), %bnd0
+ bndcu (%rdi), %bnd0
+# endif
pxor %xmm2, %xmm2
movd %esi, %xmm1
movl %edi, %ecx
@@ -124,6 +128,9 @@ __strchr_sse42:
ja L(return_null)
L(unaligned_match):
addq %rdi, %rax
+# ifdef __CHKP__
+ bndcu (%rax), %bnd0
+# endif
ret
.p2align 4
@@ -135,15 +142,27 @@ L(unaligned_no_match):
L(loop):
addq $16, %r8
L(aligned_start):
+# ifdef __CHKP__
+ bndcu (%r8), %bnd0
+# endif
pcmpistri $0x2, (%r8), %xmm1
jbe L(wrap)
addq $16, %r8
+# ifdef __CHKP__
+ bndcu (%r8), %bnd0
+# endif
pcmpistri $0x2, (%r8), %xmm1
jbe L(wrap)
addq $16, %r8
+# ifdef __CHKP__
+ bndcu (%r8), %bnd0
+# endif
pcmpistri $0x2, (%r8), %xmm1
jbe L(wrap)
addq $16, %r8
+# ifdef __CHKP__
+ bndcu (%r8), %bnd0
+# endif
pcmpistri $0x2, (%r8), %xmm1
jbe L(wrap)
jmp L(loop)
@@ -159,6 +178,9 @@ L(return_null):
.p2align 4
L(loop_exit):
leaq (%r8,%rcx), %rax
+# ifdef __CHKP__
+ bndcu (%rax), %bnd0
+# endif
ret
cfi_endproc
.size __strchr_sse42, .-__strchr_sse42
diff --git a/sysdeps/x86_64/multiarch/strcmp-sse42.S b/sysdeps/x86_64/multiarch/strcmp-sse42.S
index c84f1c2b31..edfa915707 100644
--- a/sysdeps/x86_64/multiarch/strcmp-sse42.S
+++ b/sysdeps/x86_64/multiarch/strcmp-sse42.S
@@ -127,6 +127,14 @@ STRCMP_SSE42:
je LABEL(Byte0)
mov %rdx, %r11
#endif
+
+#ifdef __CHKP__
+ bndcl (%rdi), %bnd0
+ bndcu (%rdi), %bnd0
+ bndcl (%rsi), %bnd1
+ bndcu (%rsi), %bnd1
+#endif
+
mov %esi, %ecx
mov %edi, %eax
/* Use 64bit AND here to avoid long NOP padding. */
@@ -210,6 +218,10 @@ LABEL(touppermask):
#endif
add $16, %rsi /* prepare to search next 16 bytes */
add $16, %rdi /* prepare to search next 16 bytes */
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rsi), %bnd1
+#endif
/*
* Determine source and destination string offsets from 16-byte
@@ -231,6 +243,11 @@ LABEL(crosscache):
mov %edx, %r8d /* r8d is offset flag for exit tail */
xchg %ecx, %eax
xchg %rsi, %rdi
+#ifdef __CHKP__
+ bndmov %bnd0, %bnd2
+ bndmov %bnd1, %bnd0
+ bndmov %bnd2, %bnd1
+#endif
LABEL(bigger):
movdqa (%rdi), %xmm2
movdqa (%rsi), %xmm1
@@ -280,6 +297,10 @@ LABEL(ashr_0):
mov %rcx, %rdx /* only for offset of sse4 instruction loop*/
.p2align 4
LABEL(ashr_0_use):
+#ifdef __CHKP__
+ bndcu -1(%rdi, %rdx), %bnd0
+ bndcu -1(%rsi, %rdx), %bnd1
+#endif
movdqa (%rdi,%rdx), %xmm0
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a,(%rsi,%rdx), %xmm0
@@ -295,6 +316,10 @@ LABEL(ashr_0_use):
jbe LABEL(strcmp_exitz)
#endif
+#ifdef __CHKP__
+ bndcu -1(%rdi, %rdx), %bnd0
+ bndcu -1(%rsi, %rdx), %bnd1
+#endif
movdqa (%rdi,%rdx), %xmm0
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a,(%rsi,%rdx), %xmm0
@@ -320,6 +345,10 @@ LABEL(ashr_0_exit_use):
jbe LABEL(strcmp_exitz)
#endif
lea -16(%rdx, %rcx), %rcx
+#ifdef __CHKP__
+ bndcu -1(%rdi, %rcx), %bnd0
+ bndcu -1(%rsi, %rcx), %bnd1
+#endif
movzbl (%rdi, %rcx), %eax
movzbl (%rsi, %rcx), %edx
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
@@ -362,6 +391,15 @@ LABEL(ashr_1):
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
mov %rcx, %rdx /* only for offset of sse4 instruction loop*/
+#ifdef __CHKP__
+ bndcu -16(%rdi, %rdx), %bnd0
+ bndcu -16(%rsi, %rdx), %bnd1
+ jmp LABEL(loop_ashr_1_use)
+LABEL(ashr_1_check):
+ bndcu (%rdi, %rdx), %bnd0
+ bndcu (%rsi, %rdx), %bnd1
+ jmp LABEL(nibble_ashr_1_restart_use)
+#endif
.p2align 4
LABEL(loop_ashr_1_use):
@@ -416,7 +454,11 @@ LABEL(nibble_ashr_1_use):
jae LABEL(nibble_ashr_exit_use)
#endif
cmp $14, %ecx
+#ifdef __CHKP__
+ ja LABEL(ashr_1_check)
+#else
ja LABEL(nibble_ashr_1_restart_use)
+#endif
jmp LABEL(nibble_ashr_exit_use)
@@ -450,6 +492,15 @@ LABEL(ashr_2):
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
mov %rcx, %rdx /* only for offset of sse4 instruction loop*/
+#ifdef __CHKP__
+ bndcu -16(%rdi, %rdx), %bnd0
+ bndcu -16(%rsi, %rdx), %bnd1
+ jmp LABEL(loop_ashr_2_use)
+LABEL(ashr_2_check):
+ bndcu (%rdi, %rdx), %bnd0
+ bndcu (%rsi, %rdx), %bnd1
+ jmp LABEL(nibble_ashr_2_restart_use)
+#endif
.p2align 4
LABEL(loop_ashr_2_use):
@@ -504,7 +555,11 @@ LABEL(nibble_ashr_2_use):
jae LABEL(nibble_ashr_exit_use)
#endif
cmp $13, %ecx
+#ifdef __CHKP__
+ ja LABEL(ashr_2_check)
+#else
ja LABEL(nibble_ashr_2_restart_use)
+#endif
jmp LABEL(nibble_ashr_exit_use)
@@ -539,6 +594,15 @@ LABEL(ashr_3):
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
mov %rcx, %rdx /* only for offset of sse4 instruction loop*/
+#ifdef __CHKP__
+ bndcu -16(%rdi, %rdx), %bnd0
+ bndcu -16(%rsi, %rdx), %bnd1
+ jmp LABEL(loop_ashr_3_use)
+LABEL(ashr_3_check):
+ bndcu (%rdi, %rdx), %bnd0
+ bndcu (%rsi, %rdx), %bnd1
+ jmp LABEL(nibble_ashr_3_restart_use)
+#endif
LABEL(loop_ashr_3_use):
add $16, %r10
@@ -592,7 +656,11 @@ LABEL(nibble_ashr_3_use):
jae LABEL(nibble_ashr_exit_use)
#endif
cmp $12, %ecx
+#ifdef __CHKP__
+ ja LABEL(ashr_3_check)
+#else
ja LABEL(nibble_ashr_3_restart_use)
+#endif
jmp LABEL(nibble_ashr_exit_use)
@@ -627,6 +695,15 @@ LABEL(ashr_4):
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
mov %rcx, %rdx /* only for offset of sse4 instruction loop*/
+#ifdef __CHKP__
+ bndcu -16(%rdi, %rdx), %bnd0
+ bndcu -16(%rsi, %rdx), %bnd1
+ jmp LABEL(loop_ashr_4_use)
+LABEL(ashr_4_check):
+ bndcu (%rdi, %rdx), %bnd0
+ bndcu (%rsi, %rdx), %bnd1
+ jmp LABEL(nibble_ashr_4_restart_use)
+#endif
.p2align 4
LABEL(loop_ashr_4_use):
@@ -681,7 +758,11 @@ LABEL(nibble_ashr_4_use):
jae LABEL(nibble_ashr_exit_use)
#endif
cmp $11, %ecx
+#ifdef __CHKP__
+ ja LABEL(ashr_4_check)
+#else
ja LABEL(nibble_ashr_4_restart_use)
+#endif
jmp LABEL(nibble_ashr_exit_use)
@@ -716,6 +797,15 @@ LABEL(ashr_5):
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
mov %rcx, %rdx /* only for offset of sse4 instruction loop*/
+#ifdef __CHKP__
+ bndcu -16(%rdi, %rdx), %bnd0
+ bndcu -16(%rsi, %rdx), %bnd1
+ jmp LABEL(loop_ashr_5_use)
+LABEL(ashr_5_check):
+ bndcu (%rdi, %rdx), %bnd0
+ bndcu (%rsi, %rdx), %bnd1
+ jmp LABEL(nibble_ashr_5_restart_use)
+#endif
.p2align 4
LABEL(loop_ashr_5_use):
@@ -771,7 +861,11 @@ LABEL(nibble_ashr_5_use):
jae LABEL(nibble_ashr_exit_use)
#endif
cmp $10, %ecx
+#ifdef __CHKP__
+ ja LABEL(ashr_5_check)
+#else
ja LABEL(nibble_ashr_5_restart_use)
+#endif
jmp LABEL(nibble_ashr_exit_use)
@@ -806,6 +900,15 @@ LABEL(ashr_6):
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
mov %rcx, %rdx /* only for offset of sse4 instruction loop*/
+#ifdef __CHKP__
+ bndcu -16(%rdi, %rdx), %bnd0
+ bndcu -16(%rsi, %rdx), %bnd1
+ jmp LABEL(loop_ashr_6_use)
+LABEL(ashr_6_check):
+ bndcu (%rdi, %rdx), %bnd0
+ bndcu (%rsi, %rdx), %bnd1
+ jmp LABEL(nibble_ashr_6_restart_use)
+#endif
.p2align 4
LABEL(loop_ashr_6_use):
@@ -860,7 +963,11 @@ LABEL(nibble_ashr_6_use):
jae LABEL(nibble_ashr_exit_use)
#endif
cmp $9, %ecx
+#ifdef __CHKP__
+ ja LABEL(ashr_6_check)
+#else
ja LABEL(nibble_ashr_6_restart_use)
+#endif
jmp LABEL(nibble_ashr_exit_use)
@@ -895,6 +1002,15 @@ LABEL(ashr_7):
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
mov %rcx, %rdx /* only for offset of sse4 instruction loop*/
+#ifdef __CHKP__
+ bndcu -16(%rdi, %rdx), %bnd0
+ bndcu -16(%rsi, %rdx), %bnd1
+ jmp LABEL(loop_ashr_7_use)
+LABEL(ashr_7_check):
+ bndcu (%rdi, %rdx), %bnd0
+ bndcu (%rsi, %rdx), %bnd1
+ jmp LABEL(nibble_ashr_7_restart_use)
+#endif
.p2align 4
LABEL(loop_ashr_7_use):
@@ -949,7 +1065,11 @@ LABEL(nibble_ashr_7_use):
jae LABEL(nibble_ashr_exit_use)
#endif
cmp $8, %ecx
+#ifdef __CHKP__
+ ja LABEL(ashr_7_check)
+#else
ja LABEL(nibble_ashr_7_restart_use)
+#endif
jmp LABEL(nibble_ashr_exit_use)
@@ -984,6 +1104,15 @@ LABEL(ashr_8):
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
mov %rcx, %rdx /* only for offset of sse4 instruction loop*/
+#ifdef __CHKP__
+ bndcu -16(%rdi, %rdx), %bnd0
+ bndcu -16(%rsi, %rdx), %bnd1
+ jmp LABEL(loop_ashr_8_use)
+LABEL(ashr_8_check):
+ bndcu (%rdi, %rdx), %bnd0
+ bndcu (%rsi, %rdx), %bnd1
+ jmp LABEL(nibble_ashr_8_restart_use)
+#endif
.p2align 4
LABEL(loop_ashr_8_use):
@@ -1038,7 +1167,11 @@ LABEL(nibble_ashr_8_use):
jae LABEL(nibble_ashr_exit_use)
#endif
cmp $7, %ecx
+#ifdef __CHKP__
+ ja LABEL(ashr_8_check)
+#else
ja LABEL(nibble_ashr_8_restart_use)
+#endif
jmp LABEL(nibble_ashr_exit_use)
@@ -1073,6 +1206,15 @@ LABEL(ashr_9):
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
mov %rcx, %rdx /* only for offset of sse4 instruction loop*/
+#ifdef __CHKP__
+ bndcu -16(%rdi, %rdx), %bnd0
+ bndcu -16(%rsi, %rdx), %bnd1
+ jmp LABEL(loop_ashr_9_use)
+LABEL(ashr_9_check):
+ bndcu (%rdi, %rdx), %bnd0
+ bndcu (%rsi, %rdx), %bnd1
+ jmp LABEL(nibble_ashr_9_restart_use)
+#endif
.p2align 4
LABEL(loop_ashr_9_use):
@@ -1128,7 +1270,11 @@ LABEL(nibble_ashr_9_use):
jae LABEL(nibble_ashr_exit_use)
#endif
cmp $6, %ecx
+#ifdef __CHKP__
+ ja LABEL(ashr_9_check)
+#else
ja LABEL(nibble_ashr_9_restart_use)
+#endif
jmp LABEL(nibble_ashr_exit_use)
@@ -1163,6 +1309,15 @@ LABEL(ashr_10):
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
mov %rcx, %rdx /* only for offset of sse4 instruction loop*/
+#ifdef __CHKP__
+ bndcu -16(%rdi, %rdx), %bnd0
+ bndcu -16(%rsi, %rdx), %bnd1
+ jmp LABEL(loop_ashr_10_use)
+LABEL(ashr_10_check):
+ bndcu (%rdi, %rdx), %bnd0
+ bndcu (%rsi, %rdx), %bnd1
+ jmp LABEL(nibble_ashr_10_restart_use)
+#endif
.p2align 4
LABEL(loop_ashr_10_use):
@@ -1217,7 +1372,11 @@ LABEL(nibble_ashr_10_use):
jae LABEL(nibble_ashr_exit_use)
#endif
cmp $5, %ecx
+#ifdef __CHKP__
+ ja LABEL(ashr_10_check)
+#else
ja LABEL(nibble_ashr_10_restart_use)
+#endif
jmp LABEL(nibble_ashr_exit_use)
@@ -1252,6 +1411,15 @@ LABEL(ashr_11):
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
mov %rcx, %rdx /* only for offset of sse4 instruction loop*/
+#ifdef __CHKP__
+ bndcu -16(%rdi, %rdx), %bnd0
+ bndcu -16(%rsi, %rdx), %bnd1
+ jmp LABEL(loop_ashr_11_use)
+LABEL(ashr_11_check):
+ bndcu (%rdi, %rdx), %bnd0
+ bndcu (%rsi, %rdx), %bnd1
+ jmp LABEL(nibble_ashr_11_restart_use)
+#endif
.p2align 4
LABEL(loop_ashr_11_use):
@@ -1306,7 +1474,11 @@ LABEL(nibble_ashr_11_use):
jae LABEL(nibble_ashr_exit_use)
#endif
cmp $4, %ecx
+#ifdef __CHKP__
+ ja LABEL(ashr_11_check)
+#else
ja LABEL(nibble_ashr_11_restart_use)
+#endif
jmp LABEL(nibble_ashr_exit_use)
@@ -1341,6 +1513,15 @@ LABEL(ashr_12):
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
mov %rcx, %rdx /* only for offset of sse4 instruction loop*/
+#ifdef __CHKP__
+ bndcu -16(%rdi, %rdx), %bnd0
+ bndcu -16(%rsi, %rdx), %bnd1
+ jmp LABEL(loop_ashr_12_use)
+LABEL(ashr_12_check):
+ bndcu (%rdi, %rdx), %bnd0
+ bndcu (%rsi, %rdx), %bnd1
+ jmp LABEL(nibble_ashr_12_restart_use)
+#endif
.p2align 4
LABEL(loop_ashr_12_use):
@@ -1395,7 +1576,11 @@ LABEL(nibble_ashr_12_use):
jae LABEL(nibble_ashr_exit_use)
#endif
cmp $3, %ecx
+#ifdef __CHKP__
+ ja LABEL(ashr_12_check)
+#else
ja LABEL(nibble_ashr_12_restart_use)
+#endif
jmp LABEL(nibble_ashr_exit_use)
@@ -1431,6 +1616,15 @@ LABEL(ashr_13):
sub $0x1000, %r10 /* subtract 4K pagesize */
mov %rcx, %rdx /* only for offset of sse4 instruction loop*/
+#ifdef __CHKP__
+ bndcu -16(%rdi, %rdx), %bnd0
+ bndcu -16(%rsi, %rdx), %bnd1
+ jmp LABEL(loop_ashr_13_use)
+LABEL(ashr_13_check):
+ bndcu (%rdi, %rdx), %bnd0
+ bndcu (%rsi, %rdx), %bnd1
+ jmp LABEL(nibble_ashr_13_restart_use)
+#endif
.p2align 4
LABEL(loop_ashr_13_use):
@@ -1485,7 +1679,11 @@ LABEL(nibble_ashr_13_use):
jae LABEL(nibble_ashr_exit_use)
#endif
cmp $2, %ecx
+#ifdef __CHKP__
+ ja LABEL(ashr_13_check)
+#else
ja LABEL(nibble_ashr_13_restart_use)
+#endif
jmp LABEL(nibble_ashr_exit_use)
@@ -1521,6 +1719,15 @@ LABEL(ashr_14):
sub $0x1000, %r10 /* subtract 4K pagesize */
mov %rcx, %rdx /* only for offset of sse4 instruction loop*/
+#ifdef __CHKP__
+ bndcu -16(%rdi, %rdx), %bnd0
+ bndcu -16(%rsi, %rdx), %bnd1
+ jmp LABEL(loop_ashr_14_use)
+LABEL(ashr_14_check):
+ bndcu (%rdi, %rdx), %bnd0
+ bndcu (%rsi, %rdx), %bnd1
+ jmp LABEL(nibble_ashr_14_restart_use)
+#endif
.p2align 4
LABEL(loop_ashr_14_use):
@@ -1575,7 +1782,11 @@ LABEL(nibble_ashr_14_use):
jae LABEL(nibble_ashr_exit_use)
#endif
cmp $1, %ecx
+#ifdef __CHKP__
+ ja LABEL(ashr_14_check)
+#else
ja LABEL(nibble_ashr_14_restart_use)
+#endif
jmp LABEL(nibble_ashr_exit_use)
@@ -1613,6 +1824,15 @@ LABEL(ashr_15):
sub $0x1000, %r10 /* subtract 4K pagesize */
mov %rcx, %rdx /* only for offset of sse4 instruction loop*/
+#ifdef __CHKP__
+ bndcu -16(%rdi, %rdx), %bnd0
+ bndcu -16(%rsi, %rdx), %bnd1
+ jmp LABEL(loop_ashr_15_use)
+LABEL(ashr_15_check):
+ bndcu (%rdi, %rdx), %bnd0
+ bndcu (%rsi, %rdx), %bnd1
+ jmp LABEL(nibble_ashr_15_restart_use)
+#endif
.p2align 4
LABEL(loop_ashr_15_use):
@@ -1667,7 +1887,11 @@ LABEL(nibble_ashr_15_use):
jae LABEL(nibble_ashr_exit_use)
#endif
cmp $0, %ecx
+#ifdef __CHKP__
+ ja LABEL(ashr_15_check)
+#else
ja LABEL(nibble_ashr_15_restart_use)
+#endif
LABEL(nibble_ashr_exit_use):
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
@@ -1691,6 +1915,11 @@ LABEL(exit_use):
test %r8d, %r8d
jz LABEL(ret_use)
xchg %eax, %edx
+#ifdef __CHKP__
+ bndmov %bnd0, %bnd2
+ bndmov %bnd1, %bnd0
+ bndmov %bnd2, %bnd1
+#endif
LABEL(ret_use):
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
leaq _nl_C_LC_CTYPE_tolower+128*4(%rip), %rcx
@@ -1707,6 +1936,11 @@ LABEL(less32bytes):
test %r8d, %r8d
jz LABEL(ret)
xchg %rsi, %rdi /* recover original order according to flag(%r8d) */
+#ifdef __CHKP__
+ bndmov %bnd0, %bnd2
+ bndmov %bnd1, %bnd0
+ bndmov %bnd2, %bnd1
+#endif
.p2align 4
LABEL(ret):
@@ -1717,6 +1951,10 @@ LABEL(less16bytes):
sub %rdx, %r11
jbe LABEL(strcmp_exitz)
#endif
+#ifdef __CHKP__
+ bndcu (%rdi, %rdx), %bnd0
+ bndcu (%rsi, %rdx), %bnd1
+#endif
movzbl (%rsi, %rdx), %ecx
movzbl (%rdi, %rdx), %eax
diff --git a/sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S b/sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S
index 7710173c68..e6baee92db 100644
--- a/sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S
+++ b/sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S
@@ -33,7 +33,7 @@
lea TABLE(%rip), %r11; \
movslq (%r11, INDEX, SCALE), %rcx; \
lea (%r11, %rcx), %rcx; \
- jmp *%rcx
+ jmp *%rcx
# ifndef USE_AS_STRCAT
@@ -51,6 +51,16 @@ ENTRY (STRCPY)
# endif
+# ifdef __CHKP__
+ bndcl (%rdi), %bnd0
+ bndcu (%rdi), %bnd0
+ bndcl (%rsi), %bnd1
+ bndcu (%rsi), %bnd1
+# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
+ bndcu -1(%rdi, %rdx), %bnd0
+# endif
+# endif
+
and $63, %rcx
cmp $32, %rcx
jbe L(SourceStringAlignmentLess32)
@@ -79,6 +89,9 @@ ENTRY (STRCPY)
test %rdx, %rdx
jnz L(CopyFrom1To16BytesTail)
+# ifdef __CHKP__
+ bndcu 16(%rsi), %bnd1
+# endif
pcmpeqb 16(%rsi), %xmm0
pmovmskb %xmm0, %rdx
@@ -91,6 +104,9 @@ ENTRY (STRCPY)
jnz L(CopyFrom1To32Bytes)
movdqu (%rsi, %rcx), %xmm1 /* copy 16 bytes */
+# ifdef __CHKP__
+ bndcu 15(%rdi), %bnd0
+# endif
movdqu %xmm1, (%rdi)
/* If source address alignment != destination address alignment */
@@ -101,6 +117,10 @@ L(Unalign16Both):
add %rcx, %r8
# endif
mov $16, %rcx
+# ifdef __CHKP__
+ bndcu 16(%rsi, %rcx), %bnd1
+ bndcu 15(%rdi, %rcx), %bnd0
+# endif
movdqa (%rsi, %rcx), %xmm1
movaps 16(%rsi, %rcx), %xmm2
movdqu %xmm1, (%rdi, %rcx)
@@ -118,6 +138,10 @@ L(Unalign16Both):
jnz L(CopyFrom1To16Bytes)
# endif
+# ifdef __CHKP__
+ bndcu 16(%rsi, %rcx), %bnd1
+ bndcu 15(%rdi, %rcx), %bnd0
+# endif
movaps 16(%rsi, %rcx), %xmm3
movdqu %xmm2, (%rdi, %rcx)
pcmpeqb %xmm3, %xmm0
@@ -134,6 +158,10 @@ L(Unalign16Both):
jnz L(CopyFrom1To16Bytes)
# endif
+# ifdef __CHKP__
+ bndcu 16(%rsi, %rcx), %bnd1
+ bndcu 15(%rdi, %rcx), %bnd0
+# endif
movaps 16(%rsi, %rcx), %xmm4
movdqu %xmm3, (%rdi, %rcx)
pcmpeqb %xmm4, %xmm0
@@ -150,6 +178,10 @@ L(Unalign16Both):
jnz L(CopyFrom1To16Bytes)
# endif
+# ifdef __CHKP__
+ bndcu 16(%rsi, %rcx), %bnd1
+ bndcu 15(%rdi, %rcx), %bnd0
+# endif
movaps 16(%rsi, %rcx), %xmm1
movdqu %xmm4, (%rdi, %rcx)
pcmpeqb %xmm1, %xmm0
@@ -166,6 +198,10 @@ L(Unalign16Both):
jnz L(CopyFrom1To16Bytes)
# endif
+# ifdef __CHKP__
+ bndcu 16(%rsi, %rcx), %bnd1
+ bndcu 15(%rdi, %rcx), %bnd0
+# endif
movaps 16(%rsi, %rcx), %xmm2
movdqu %xmm1, (%rdi, %rcx)
pcmpeqb %xmm2, %xmm0
@@ -182,6 +218,10 @@ L(Unalign16Both):
jnz L(CopyFrom1To16Bytes)
# endif
+# ifdef __CHKP__
+ bndcu 16(%rsi, %rcx), %bnd1
+ bndcu 15(%rdi, %rcx), %bnd0
+# endif
movaps 16(%rsi, %rcx), %xmm3
movdqu %xmm2, (%rdi, %rcx)
pcmpeqb %xmm3, %xmm0
@@ -198,6 +238,10 @@ L(Unalign16Both):
jnz L(CopyFrom1To16Bytes)
# endif
+# ifdef __CHKP__
+ bndcu 16(%rsi, %rcx), %bnd1
+ bndcu 15(%rdi, %rcx), %bnd0
+# endif
movdqu %xmm3, (%rdi, %rcx)
mov %rsi, %rdx
lea 16(%rsi, %rcx), %rsi
@@ -208,6 +252,9 @@ L(Unalign16Both):
lea 128(%r8, %rdx), %r8
# endif
L(Unaligned64Loop):
+# ifdef __CHKP__
+ bndcu 48(%rsi), %bnd1
+# endif
movaps (%rsi), %xmm2
movaps %xmm2, %xmm4
movaps 16(%rsi), %xmm5
@@ -229,6 +276,10 @@ L(Unaligned64Loop):
L(Unaligned64Loop_start):
add $64, %rdi
add $64, %rsi
+# ifdef __CHKP__
+ bndcu (%rsi), %bnd1
+ bndcu (%rdi), %bnd0
+# endif
movdqu %xmm4, -64(%rdi)
movaps (%rsi), %xmm2
movdqa %xmm2, %xmm4
@@ -271,16 +322,28 @@ L(Unaligned64Leave):
jnz L(CopyFrom1To16BytesUnaligned_32)
bsf %rcx, %rdx
+# ifdef __CHKP__
+ bndcu 47(%rdi), %bnd0
+# endif
movdqu %xmm4, (%rdi)
movdqu %xmm5, 16(%rdi)
movdqu %xmm6, 32(%rdi)
# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
# ifdef USE_AS_STPCPY
+# ifdef __CHKP__
+ bndcu 48(%rdi, %rdx), %bnd0
+# endif
lea 48(%rdi, %rdx), %rax
# endif
+# ifdef __CHKP__
+ bndcu 63(%rdi), %bnd0
+# endif
movdqu %xmm7, 48(%rdi)
add $15, %r8
sub %rdx, %r8
+# ifdef __CHKP__
+ bndcu 49(%rdi, %rdx), %bnd0
+# endif
lea 49(%rdi, %rdx), %rdi
jmp L(StrncpyFillTailWithZero)
# else
@@ -309,6 +372,10 @@ L(SourceStringAlignmentLess32):
test %rdx, %rdx
jnz L(CopyFrom1To16BytesTail1)
+# ifdef __CHKP__
+ bndcu 16(%rsi), %bnd1
+ bndcu 15(%rdi), %bnd0
+# endif
pcmpeqb %xmm2, %xmm0
movdqu %xmm1, (%rdi)
pmovmskb %xmm0, %rdx
@@ -372,6 +439,9 @@ L(CopyFrom1To16BytesUnaligned_0):
# ifdef USE_AS_STPCPY
lea (%rdi, %rdx), %rax
# endif
+# ifdef __CHKP__
+ bndcu 15(%rdi), %bnd0
+# endif
movdqu %xmm4, (%rdi)
add $63, %r8
sub %rdx, %r8
@@ -384,6 +454,9 @@ L(CopyFrom1To16BytesUnaligned_0):
.p2align 4
L(CopyFrom1To16BytesUnaligned_16):
bsf %rcx, %rdx
+# ifdef __CHKP__
+ bndcu 31(%rdi), %bnd0
+# endif
movdqu %xmm4, (%rdi)
# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
# ifdef USE_AS_STPCPY
@@ -403,6 +476,9 @@ L(CopyFrom1To16BytesUnaligned_16):
.p2align 4
L(CopyFrom1To16BytesUnaligned_32):
bsf %rdx, %rdx
+# ifdef __CHKP__
+ bndcu 47(%rdi), %bnd0
+# endif
movdqu %xmm4, (%rdi)
movdqu %xmm5, 16(%rdi)
# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
@@ -529,6 +605,9 @@ L(CopyFrom1To16BytesTail1Case2OrCase3):
.p2align 4
L(Exit1):
+# ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+# endif
mov %dh, (%rdi)
# ifdef USE_AS_STPCPY
lea (%rdi), %rax
@@ -543,6 +622,9 @@ L(Exit1):
.p2align 4
L(Exit2):
mov (%rsi), %dx
+# ifdef __CHKP__
+ bndcu 1(%rdi), %bnd0
+# endif
mov %dx, (%rdi)
# ifdef USE_AS_STPCPY
lea 1(%rdi), %rax
@@ -557,6 +639,9 @@ L(Exit2):
.p2align 4
L(Exit3):
mov (%rsi), %cx
+# ifdef __CHKP__
+ bndcu 2(%rdi), %bnd0
+# endif
mov %cx, (%rdi)
mov %dh, 2(%rdi)
# ifdef USE_AS_STPCPY
@@ -572,6 +657,9 @@ L(Exit3):
.p2align 4
L(Exit4):
mov (%rsi), %edx
+# ifdef __CHKP__
+ bndcu 3(%rdi), %bnd0
+# endif
mov %edx, (%rdi)
# ifdef USE_AS_STPCPY
lea 3(%rdi), %rax
@@ -586,6 +674,9 @@ L(Exit4):
.p2align 4
L(Exit5):
mov (%rsi), %ecx
+# ifdef __CHKP__
+ bndcu 4(%rdi), %bnd0
+# endif
mov %dh, 4(%rdi)
mov %ecx, (%rdi)
# ifdef USE_AS_STPCPY
@@ -602,6 +693,9 @@ L(Exit5):
L(Exit6):
mov (%rsi), %ecx
mov 4(%rsi), %dx
+# ifdef __CHKP__
+ bndcu 5(%rdi), %bnd0
+# endif
mov %ecx, (%rdi)
mov %dx, 4(%rdi)
# ifdef USE_AS_STPCPY
@@ -618,6 +712,9 @@ L(Exit6):
L(Exit7):
mov (%rsi), %ecx
mov 3(%rsi), %edx
+# ifdef __CHKP__
+ bndcu 6(%rdi), %bnd0
+# endif
mov %ecx, (%rdi)
mov %edx, 3(%rdi)
# ifdef USE_AS_STPCPY
@@ -633,6 +730,9 @@ L(Exit7):
.p2align 4
L(Exit8):
mov (%rsi), %rdx
+# ifdef __CHKP__
+ bndcu 7(%rdi), %bnd0
+# endif
mov %rdx, (%rdi)
# ifdef USE_AS_STPCPY
lea 7(%rdi), %rax
@@ -647,6 +747,9 @@ L(Exit8):
.p2align 4
L(Exit9):
mov (%rsi), %rcx
+# ifdef __CHKP__
+ bndcu 8(%rdi), %bnd0
+# endif
mov %dh, 8(%rdi)
mov %rcx, (%rdi)
# ifdef USE_AS_STPCPY
@@ -663,6 +766,9 @@ L(Exit9):
L(Exit10):
mov (%rsi), %rcx
mov 8(%rsi), %dx
+# ifdef __CHKP__
+ bndcu 9(%rdi), %bnd0
+# endif
mov %rcx, (%rdi)
mov %dx, 8(%rdi)
# ifdef USE_AS_STPCPY
@@ -679,6 +785,9 @@ L(Exit10):
L(Exit11):
mov (%rsi), %rcx
mov 7(%rsi), %edx
+# ifdef __CHKP__
+ bndcu 10(%rdi), %bnd0
+# endif
mov %rcx, (%rdi)
mov %edx, 7(%rdi)
# ifdef USE_AS_STPCPY
@@ -695,6 +804,9 @@ L(Exit11):
L(Exit12):
mov (%rsi), %rcx
mov 8(%rsi), %edx
+# ifdef __CHKP__
+ bndcu 11(%rdi), %bnd0
+# endif
mov %rcx, (%rdi)
mov %edx, 8(%rdi)
# ifdef USE_AS_STPCPY
@@ -711,6 +823,9 @@ L(Exit12):
L(Exit13):
mov (%rsi), %rcx
mov 5(%rsi), %rdx
+# ifdef __CHKP__
+ bndcu 12(%rdi), %bnd0
+# endif
mov %rcx, (%rdi)
mov %rdx, 5(%rdi)
# ifdef USE_AS_STPCPY
@@ -727,6 +842,9 @@ L(Exit13):
L(Exit14):
mov (%rsi), %rcx
mov 6(%rsi), %rdx
+# ifdef __CHKP__
+ bndcu 13(%rdi), %bnd0
+# endif
mov %rcx, (%rdi)
mov %rdx, 6(%rdi)
# ifdef USE_AS_STPCPY
@@ -743,6 +861,9 @@ L(Exit14):
L(Exit15):
mov (%rsi), %rcx
mov 7(%rsi), %rdx
+# ifdef __CHKP__
+ bndcu 14(%rdi), %bnd0
+# endif
mov %rcx, (%rdi)
mov %rdx, 7(%rdi)
# ifdef USE_AS_STPCPY
@@ -758,6 +879,9 @@ L(Exit15):
.p2align 4
L(Exit16):
movdqu (%rsi), %xmm0
+# ifdef __CHKP__
+ bndcu 15(%rdi), %bnd0
+# endif
movdqu %xmm0, (%rdi)
# ifdef USE_AS_STPCPY
lea 15(%rdi), %rax
@@ -772,6 +896,9 @@ L(Exit16):
.p2align 4
L(Exit17):
movdqu (%rsi), %xmm0
+# ifdef __CHKP__
+ bndcu 16(%rdi), %bnd0
+# endif
movdqu %xmm0, (%rdi)
mov %dh, 16(%rdi)
# ifdef USE_AS_STPCPY
@@ -788,6 +915,9 @@ L(Exit17):
L(Exit18):
movdqu (%rsi), %xmm0
mov 16(%rsi), %cx
+# ifdef __CHKP__
+ bndcu 17(%rdi), %bnd0
+# endif
movdqu %xmm0, (%rdi)
mov %cx, 16(%rdi)
# ifdef USE_AS_STPCPY
@@ -804,6 +934,9 @@ L(Exit18):
L(Exit19):
movdqu (%rsi), %xmm0
mov 15(%rsi), %ecx
+# ifdef __CHKP__
+ bndcu 18(%rdi), %bnd0
+# endif
movdqu %xmm0, (%rdi)
mov %ecx, 15(%rdi)
# ifdef USE_AS_STPCPY
@@ -820,6 +953,9 @@ L(Exit19):
L(Exit20):
movdqu (%rsi), %xmm0
mov 16(%rsi), %ecx
+# ifdef __CHKP__
+ bndcu 19(%rdi), %bnd0
+# endif
movdqu %xmm0, (%rdi)
mov %ecx, 16(%rdi)
# ifdef USE_AS_STPCPY
@@ -836,6 +972,9 @@ L(Exit20):
L(Exit21):
movdqu (%rsi), %xmm0
mov 16(%rsi), %ecx
+# ifdef __CHKP__
+ bndcu 20(%rdi), %bnd0
+# endif
movdqu %xmm0, (%rdi)
mov %ecx, 16(%rdi)
mov %dh, 20(%rdi)
@@ -853,6 +992,9 @@ L(Exit21):
L(Exit22):
movdqu (%rsi), %xmm0
mov 14(%rsi), %rcx
+# ifdef __CHKP__
+ bndcu 21(%rdi), %bnd0
+# endif
movdqu %xmm0, (%rdi)
mov %rcx, 14(%rdi)
# ifdef USE_AS_STPCPY
@@ -869,6 +1011,9 @@ L(Exit22):
L(Exit23):
movdqu (%rsi), %xmm0
mov 15(%rsi), %rcx
+# ifdef __CHKP__
+ bndcu 22(%rdi), %bnd0
+# endif
movdqu %xmm0, (%rdi)
mov %rcx, 15(%rdi)
# ifdef USE_AS_STPCPY
@@ -885,6 +1030,9 @@ L(Exit23):
L(Exit24):
movdqu (%rsi), %xmm0
mov 16(%rsi), %rcx
+# ifdef __CHKP__
+ bndcu 23(%rdi), %bnd0
+# endif
movdqu %xmm0, (%rdi)
mov %rcx, 16(%rdi)
# ifdef USE_AS_STPCPY
@@ -901,6 +1049,9 @@ L(Exit24):
L(Exit25):
movdqu (%rsi), %xmm0
mov 16(%rsi), %rcx
+# ifdef __CHKP__
+ bndcu 24(%rdi), %bnd0
+# endif
movdqu %xmm0, (%rdi)
mov %rcx, 16(%rdi)
mov %dh, 24(%rdi)
@@ -919,6 +1070,9 @@ L(Exit26):
movdqu (%rsi), %xmm0
mov 16(%rsi), %rdx
mov 24(%rsi), %cx
+# ifdef __CHKP__
+ bndcu 25(%rdi), %bnd0
+# endif
movdqu %xmm0, (%rdi)
mov %rdx, 16(%rdi)
mov %cx, 24(%rdi)
@@ -937,6 +1091,9 @@ L(Exit27):
movdqu (%rsi), %xmm0
mov 16(%rsi), %rdx
mov 23(%rsi), %ecx
+# ifdef __CHKP__
+ bndcu 26(%rdi), %bnd0
+# endif
movdqu %xmm0, (%rdi)
mov %rdx, 16(%rdi)
mov %ecx, 23(%rdi)
@@ -955,6 +1112,9 @@ L(Exit28):
movdqu (%rsi), %xmm0
mov 16(%rsi), %rdx
mov 24(%rsi), %ecx
+# ifdef __CHKP__
+ bndcu 27(%rdi), %bnd0
+# endif
movdqu %xmm0, (%rdi)
mov %rdx, 16(%rdi)
mov %ecx, 24(%rdi)
@@ -972,6 +1132,9 @@ L(Exit28):
L(Exit29):
movdqu (%rsi), %xmm0
movdqu 13(%rsi), %xmm2
+# ifdef __CHKP__
+ bndcu 28(%rdi), %bnd0
+# endif
movdqu %xmm0, (%rdi)
movdqu %xmm2, 13(%rdi)
# ifdef USE_AS_STPCPY
@@ -988,6 +1151,9 @@ L(Exit29):
L(Exit30):
movdqu (%rsi), %xmm0
movdqu 14(%rsi), %xmm2
+# ifdef __CHKP__
+ bndcu 29(%rdi), %bnd0
+# endif
movdqu %xmm0, (%rdi)
movdqu %xmm2, 14(%rdi)
# ifdef USE_AS_STPCPY
@@ -1004,6 +1170,9 @@ L(Exit30):
L(Exit31):
movdqu (%rsi), %xmm0
movdqu 15(%rsi), %xmm2
+# ifdef __CHKP__
+ bndcu 30(%rdi), %bnd0
+# endif
movdqu %xmm0, (%rdi)
movdqu %xmm2, 15(%rdi)
# ifdef USE_AS_STPCPY
@@ -1020,6 +1189,9 @@ L(Exit31):
L(Exit32):
movdqu (%rsi), %xmm0
movdqu 16(%rsi), %xmm2
+# ifdef __CHKP__
+ bndcu 31(%rdi), %bnd0
+# endif
movdqu %xmm0, (%rdi)
movdqu %xmm2, 16(%rdi)
# ifdef USE_AS_STPCPY
diff --git a/sysdeps/x86_64/multiarch/strrchr.S b/sysdeps/x86_64/multiarch/strrchr.S
index 3f92a41ef9..1fed105bf0 100644
--- a/sysdeps/x86_64/multiarch/strrchr.S
+++ b/sysdeps/x86_64/multiarch/strrchr.S
@@ -97,6 +97,10 @@ __strrchr_sse42:
CALL_MCOUNT
testb %sil, %sil
je __strend_sse4
+# ifdef __CHKP__
+ bndcl (%rdi), %bnd0
+ bndcu (%rdi), %bnd0
+# endif
xor %eax,%eax /* RAX has the last occurrence of s. */
movd %esi, %xmm1
punpcklbw %xmm1, %xmm1
@@ -135,6 +139,9 @@ L(unaligned_no_byte):
contain the NULL terminator. */
jg L(exit)
addq $16, %r8
+# ifdef __CHKP__
+ bndcu (%r8), %bnd0
+# endif
/* Loop start on aligned string. */
.p2align 4
@@ -142,6 +149,9 @@ L(loop):
pcmpistri $0x4a, (%r8), %xmm1
jbe L(match_or_eos)
addq $16, %r8
+# ifdef __CHKP__
+ bndcu (%r8), %bnd0
+# endif
jmp L(loop)
.p2align 4
L(match_or_eos):
@@ -149,11 +159,17 @@ L(match_or_eos):
L(match_no_eos):
leaq (%r8,%rcx), %rax
addq $16, %r8
+# ifdef __CHKP__
+ bndcu (%r8), %bnd0
+# endif
jmp L(loop)
.p2align 4
L(had_eos):
jnc L(exit)
leaq (%r8,%rcx), %rax
+# ifdef __CHKP__
+ bndcu (%rax), %bnd0
+# endif
.p2align 4
L(exit):
ret
diff --git a/sysdeps/x86_64/multiarch/wcscpy-ssse3.S b/sysdeps/x86_64/multiarch/wcscpy-ssse3.S
index b7de092228..77889dd555 100644
--- a/sysdeps/x86_64/multiarch/wcscpy-ssse3.S
+++ b/sysdeps/x86_64/multiarch/wcscpy-ssse3.S
@@ -25,13 +25,27 @@ ENTRY (__wcscpy_ssse3)
mov %rsi, %rcx
mov %rdi, %rdx
+# ifdef __CHKP__
+ bndcl (%rdi), %bnd0
+ bndcl (%rsi), %bnd1
+ bndcu (%rsi), %bnd1
+# endif
cmpl $0, (%rcx)
jz L(Exit4)
+# ifdef __CHKP__
+ bndcu 4(%rcx), %bnd1
+# endif
cmpl $0, 4(%rcx)
jz L(Exit8)
+# ifdef __CHKP__
+ bndcu 8(%rcx), %bnd1
+# endif
cmpl $0, 8(%rcx)
jz L(Exit12)
+# ifdef __CHKP__
+ bndcu 12(%rcx), %bnd1
+# endif
cmpl $0, 12(%rcx)
jz L(Exit16)
@@ -40,10 +54,19 @@ ENTRY (__wcscpy_ssse3)
pxor %xmm0, %xmm0
mov (%rcx), %r9
+# ifdef __CHKP__
+ bndcu 7(%rdx), %bnd0
+# endif
mov %r9, (%rdx)
+# ifdef __CHKP__
+ bndcu (%rsi), %bnd1
+# endif
pcmpeqd (%rsi), %xmm0
mov 8(%rcx), %r9
+# ifdef __CHKP__
+ bndcu 15(%rdx), %bnd0
+# endif
mov %r9, 8(%rdx)
pmovmskb %xmm0, %rax
@@ -72,6 +95,10 @@ ENTRY (__wcscpy_ssse3)
jmp L(Shl12)
L(Align16Both):
+# ifdef __CHKP__
+ bndcu 16(%rcx), %bnd1
+ bndcu 15(%rdx), %bnd0
+# endif
movaps (%rcx), %xmm1
movaps 16(%rcx), %xmm2
movaps %xmm1, (%rdx)
@@ -82,6 +109,10 @@ L(Align16Both):
test %rax, %rax
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 16(%rcx, %rsi), %bnd1
+ bndcu 15(%rdx, %rsi), %bnd0
+# endif
movaps 16(%rcx, %rsi), %xmm3
movaps %xmm2, (%rdx, %rsi)
pcmpeqd %xmm3, %xmm0
@@ -91,6 +122,10 @@ L(Align16Both):
test %rax, %rax
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 16(%rcx, %rsi), %bnd1
+ bndcu 15(%rdx, %rsi), %bnd0
+# endif
movaps 16(%rcx, %rsi), %xmm4
movaps %xmm3, (%rdx, %rsi)
pcmpeqd %xmm4, %xmm0
@@ -100,6 +135,10 @@ L(Align16Both):
test %rax, %rax
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 16(%rcx, %rsi), %bnd1
+ bndcu 15(%rdx, %rsi), %bnd0
+# endif
movaps 16(%rcx, %rsi), %xmm1
movaps %xmm4, (%rdx, %rsi)
pcmpeqd %xmm1, %xmm0
@@ -109,6 +148,10 @@ L(Align16Both):
test %rax, %rax
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 16(%rcx, %rsi), %bnd1
+ bndcu 15(%rdx, %rsi), %bnd0
+# endif
movaps 16(%rcx, %rsi), %xmm2
movaps %xmm1, (%rdx, %rsi)
pcmpeqd %xmm2, %xmm0
@@ -118,6 +161,10 @@ L(Align16Both):
test %rax, %rax
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 16(%rcx, %rsi), %bnd1
+ bndcu 15(%rdx, %rsi), %bnd0
+# endif
movaps 16(%rcx, %rsi), %xmm3
movaps %xmm2, (%rdx, %rsi)
pcmpeqd %xmm3, %xmm0
@@ -127,6 +174,10 @@ L(Align16Both):
test %rax, %rax
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 16(%rcx, %rsi), %bnd1
+ bndcu 15(%rdx, %rsi), %bnd0
+# endif
movaps %xmm3, (%rdx, %rsi)
mov %rcx, %rax
lea 16(%rcx, %rsi), %rcx
@@ -138,6 +189,10 @@ L(Align16Both):
.p2align 4
L(Aligned64Loop):
+# ifdef __CHKP__
+ bndcu (%rcx), %bnd1
+ bndcu 63(%rdx), %bnd0
+# endif
movaps (%rcx), %xmm2
movaps %xmm2, %xmm4
movaps 16(%rcx), %xmm5
@@ -168,6 +223,9 @@ L(Aligned64Leave):
pcmpeqd %xmm5, %xmm0
pmovmskb %xmm0, %rax
+# ifdef __CHKP__
+ bndcu -49(%rdx), %bnd0
+# endif
movaps %xmm4, -64(%rdx)
test %rax, %rax
lea 16(%rsi), %rsi
@@ -176,11 +234,17 @@ L(Aligned64Leave):
pcmpeqd %xmm6, %xmm0
pmovmskb %xmm0, %rax
+# ifdef __CHKP__
+ bndcu -33(%rdx), %bnd0
+# endif
movaps %xmm5, -48(%rdx)
test %rax, %rax
lea 16(%rsi), %rsi
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu -17(%rdx), %bnd0
+# endif
movaps %xmm6, -32(%rdx)
pcmpeqd %xmm7, %xmm0
@@ -190,11 +254,17 @@ L(Aligned64Leave):
jnz L(CopyFrom1To16Bytes)
mov $-0x40, %rsi
+# ifdef __CHKP__
+ bndcu -1(%rdx), %bnd0
+# endif
movaps %xmm7, -16(%rdx)
jmp L(Aligned64Loop)
.p2align 4
L(Shl4):
+# ifdef __CHKP__
+ bndcu 12(%rcx), %bnd1
+# endif
movaps -4(%rcx), %xmm1
movaps 12(%rcx), %xmm2
L(Shl4Start):
@@ -206,6 +276,10 @@ L(Shl4Start):
jnz L(Shl4LoopExit)
palignr $4, %xmm1, %xmm2
+# ifdef __CHKP__
+ bndcu 28(%rcx), %bnd1
+ bndcu 15(%rdx), %bnd0
+# endif
movaps %xmm2, (%rdx)
movaps 28(%rcx), %xmm2
@@ -219,6 +293,10 @@ L(Shl4Start):
jnz L(Shl4LoopExit)
palignr $4, %xmm3, %xmm2
+# ifdef __CHKP__
+ bndcu 28(%rcx), %bnd1
+ bndcu 15(%rdx), %bnd0
+# endif
movaps %xmm2, (%rdx)
movaps 28(%rcx), %xmm2
@@ -232,6 +310,10 @@ L(Shl4Start):
jnz L(Shl4LoopExit)
palignr $4, %xmm1, %xmm2
+# ifdef __CHKP__
+ bndcu 28(%rcx), %bnd1
+ bndcu 15(%rdx), %bnd0
+# endif
movaps %xmm2, (%rdx)
movaps 28(%rcx), %xmm2
@@ -244,6 +326,9 @@ L(Shl4Start):
jnz L(Shl4LoopExit)
palignr $4, %xmm3, %xmm2
+# ifdef __CHKP__
+ bndcu 15(%rdx), %bnd0
+# endif
movaps %xmm2, (%rdx)
lea 28(%rcx), %rcx
lea 16(%rdx), %rdx
@@ -258,6 +343,9 @@ L(Shl4Start):
.p2align 4
L(Shl4LoopStart):
+# ifdef __CHKP__
+ bndcu 12(%rcx), %bnd1
+# endif
movaps 12(%rcx), %xmm2
movaps 28(%rcx), %xmm3
movaps %xmm3, %xmm6
@@ -279,6 +367,9 @@ L(Shl4LoopStart):
lea 64(%rcx), %rcx
palignr $4, %xmm1, %xmm2
movaps %xmm7, %xmm1
+# ifdef __CHKP__
+ bndcu 63(%rdx), %bnd0
+# endif
movaps %xmm5, 48(%rdx)
movaps %xmm4, 32(%rdx)
movaps %xmm3, 16(%rdx)
@@ -287,6 +378,10 @@ L(Shl4LoopStart):
jmp L(Shl4LoopStart)
L(Shl4LoopExit):
+# ifdef __CHKP__
+ bndcu -4(%rcx), %bnd1
+ bndcu 11(%rdx), %bnd0
+# endif
movdqu -4(%rcx), %xmm1
mov $12, %rsi
movdqu %xmm1, -4(%rdx)
@@ -294,6 +389,9 @@ L(Shl4LoopExit):
.p2align 4
L(Shl8):
+# ifdef __CHKP__
+ bndcu 8(%rcx), %bnd1
+# endif
movaps -8(%rcx), %xmm1
movaps 8(%rcx), %xmm2
L(Shl8Start):
@@ -305,6 +403,10 @@ L(Shl8Start):
jnz L(Shl8LoopExit)
palignr $8, %xmm1, %xmm2
+# ifdef __CHKP__
+ bndcu 24(%rcx), %bnd1
+ bndcu 15(%rdx), %bnd0
+# endif
movaps %xmm2, (%rdx)
movaps 24(%rcx), %xmm2
@@ -318,6 +420,10 @@ L(Shl8Start):
jnz L(Shl8LoopExit)
palignr $8, %xmm3, %xmm2
+# ifdef __CHKP__
+ bndcu 24(%rcx), %bnd1
+ bndcu 15(%rdx), %bnd0
+# endif
movaps %xmm2, (%rdx)
movaps 24(%rcx), %xmm2
@@ -331,6 +437,10 @@ L(Shl8Start):
jnz L(Shl8LoopExit)
palignr $8, %xmm1, %xmm2
+# ifdef __CHKP__
+ bndcu 24(%rcx), %bnd1
+ bndcu 15(%rdx), %bnd0
+# endif
movaps %xmm2, (%rdx)
movaps 24(%rcx), %xmm2
@@ -343,6 +453,10 @@ L(Shl8Start):
jnz L(Shl8LoopExit)
palignr $8, %xmm3, %xmm2
+# ifdef __CHKP__
+ bndcu 24(%rcx), %bnd1
+ bndcu 15(%rdx), %bnd0
+# endif
movaps %xmm2, (%rdx)
lea 24(%rcx), %rcx
lea 16(%rdx), %rdx
@@ -357,6 +471,9 @@ L(Shl8Start):
.p2align 4
L(Shl8LoopStart):
+# ifdef __CHKP__
+ bndcu 8(%rcx), %bnd1
+# endif
movaps 8(%rcx), %xmm2
movaps 24(%rcx), %xmm3
movaps %xmm3, %xmm6
@@ -378,6 +495,9 @@ L(Shl8LoopStart):
lea 64(%rcx), %rcx
palignr $8, %xmm1, %xmm2
movaps %xmm7, %xmm1
+# ifdef __CHKP__
+ bndcu 63(%rdx), %bnd0
+# endif
movaps %xmm5, 48(%rdx)
movaps %xmm4, 32(%rdx)
movaps %xmm3, 16(%rdx)
@@ -386,6 +506,10 @@ L(Shl8LoopStart):
jmp L(Shl8LoopStart)
L(Shl8LoopExit):
+# ifdef __CHKP__
+ bndcu (%rcx), %bnd1
+ bndcu 7(%rdx), %bnd0
+# endif
mov (%rcx), %r9
mov $8, %rsi
mov %r9, (%rdx)
@@ -393,6 +517,9 @@ L(Shl8LoopExit):
.p2align 4
L(Shl12):
+# ifdef __CHKP__
+ bndcu 4(%rcx), %bnd1
+# endif
movaps -12(%rcx), %xmm1
movaps 4(%rcx), %xmm2
L(Shl12Start):
@@ -404,6 +531,10 @@ L(Shl12Start):
jnz L(Shl12LoopExit)
palignr $12, %xmm1, %xmm2
+# ifdef __CHKP__
+ bndcu 20(%rcx), %bnd1
+ bndcu 15(%rdx), %bnd0
+# endif
movaps %xmm2, (%rdx)
movaps 20(%rcx), %xmm2
@@ -417,6 +548,10 @@ L(Shl12Start):
jnz L(Shl12LoopExit)
palignr $12, %xmm3, %xmm2
+# ifdef __CHKP__
+ bndcu 20(%rcx), %bnd1
+ bndcu 15(%rdx), %bnd0
+# endif
movaps %xmm2, (%rdx)
movaps 20(%rcx), %xmm2
@@ -430,6 +565,10 @@ L(Shl12Start):
jnz L(Shl12LoopExit)
palignr $12, %xmm1, %xmm2
+# ifdef __CHKP__
+ bndcu 20(%rcx), %bnd1
+ bndcu 15(%rdx), %bnd0
+# endif
movaps %xmm2, (%rdx)
movaps 20(%rcx), %xmm2
@@ -442,6 +581,10 @@ L(Shl12Start):
jnz L(Shl12LoopExit)
palignr $12, %xmm3, %xmm2
+# ifdef __CHKP__
+ bndcu 20(%rcx), %bnd1
+ bndcu 15(%rdx), %bnd0
+# endif
movaps %xmm2, (%rdx)
lea 20(%rcx), %rcx
lea 16(%rdx), %rdx
@@ -456,6 +599,9 @@ L(Shl12Start):
.p2align 4
L(Shl12LoopStart):
+# ifdef __CHKP__
+ bndcu 4(%rcx), %bnd1
+# endif
movaps 4(%rcx), %xmm2
movaps 20(%rcx), %xmm3
movaps %xmm3, %xmm6
@@ -476,6 +622,9 @@ L(Shl12LoopStart):
lea 64(%rcx), %rcx
palignr $12, %xmm1, %xmm2
movaps %xmm7, %xmm1
+# ifdef __CHKP__
+ bndcu 63(%rdx), %bnd0
+# endif
movaps %xmm5, 48(%rdx)
movaps %xmm4, 32(%rdx)
movaps %xmm3, 16(%rdx)
@@ -484,6 +633,10 @@ L(Shl12LoopStart):
jmp L(Shl12LoopStart)
L(Shl12LoopExit):
+# ifdef __CHKP__
+ bndcu (%rcx), %bnd1
+ bndcu 3(%rdx), %bnd0
+# endif
mov (%rcx), %r9d
mov $4, %rsi
mov %r9d, (%rdx)
@@ -500,6 +653,9 @@ L(CopyFrom1To16Bytes):
jnz L(Exit4)
mov (%rcx), %rax
+# ifdef __CHKP__
+ bndcu 7(%rdx), %bnd0
+# endif
mov %rax, (%rdx)
mov %rdi, %rax
ret
@@ -510,6 +666,9 @@ L(ExitHigh):
jnz L(Exit12)
mov (%rcx), %rax
+# ifdef __CHKP__
+ bndcu 15(%rdx), %bnd0
+# endif
mov %rax, (%rdx)
mov 8(%rcx), %rax
mov %rax, 8(%rdx)
@@ -519,6 +678,9 @@ L(ExitHigh):
.p2align 4
L(Exit4):
movl (%rcx), %eax
+# ifdef __CHKP__
+ bndcu 3(%rdx), %bnd0
+# endif
movl %eax, (%rdx)
mov %rdi, %rax
ret
@@ -526,6 +688,9 @@ L(Exit4):
.p2align 4
L(Exit8):
mov (%rcx), %rax
+# ifdef __CHKP__
+ bndcu 7(%rdx), %bnd0
+# endif
mov %rax, (%rdx)
mov %rdi, %rax
ret
@@ -533,6 +698,9 @@ L(Exit8):
.p2align 4
L(Exit12):
mov (%rcx), %rax
+# ifdef __CHKP__
+ bndcu 11(%rdx), %bnd0
+# endif
mov %rax, (%rdx)
mov 8(%rcx), %eax
mov %eax, 8(%rdx)
@@ -542,6 +710,9 @@ L(Exit12):
.p2align 4
L(Exit16):
mov (%rcx), %rax
+# ifdef __CHKP__
+ bndcu 15(%rdx), %bnd0
+# endif
mov %rax, (%rdx)
mov 8(%rcx), %rax
mov %rax, 8(%rdx)