diff options
Diffstat (limited to 'sysdeps/x86_64/multiarch')
29 files changed, 1456 insertions, 71 deletions
diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile index 203d16eed3..bdf7964d14 100644 --- a/sysdeps/x86_64/multiarch/Makefile +++ b/sysdeps/x86_64/multiarch/Makefile @@ -26,6 +26,29 @@ CFLAGS-strstr.c += -msse4 CFLAGS-strcasestr.c += -msse4 CFLAGS-strcasestr-nonascii.c += -msse4 endif + +ifeq ($(enable-mpx), yes) +sysdep_routines += memcpy-ssse3-back-1 mempcpy-ssse3-back-1 memmove-ssse3-back-1 \ + memcpy-c memmove-c mempcpy-c memcpy_chk-c mempcpy_chk-c memmove_chk-c +#These are C versions written with intrinsics. We need to add checks as intrinsics manually +CFLAGS-varshift.c += -fno-chkp-check-read -fno-chkp-check-write +CFLAGS-strcspn-c.c += -fno-chkp-check-read -fno-chkp-check-write +CFLAGS-strpbrk-c.c += -fno-chkp-check-read -fno-chkp-check-write +CFLAGS-strspn-c.c += -fno-chkp-check-read -fno-chkp-check-write +CFLAGS-strstr.c += -fno-chkp-check-read -fno-chkp-check-write +CFLAGS-strcasestr.c += -fno-chkp-check-read -fno-chkp-check-write +CFLAGS-strcasestr-nonascii.c += -fno-chkp-check-read -fno-chkp-check-write +#Checks are put manually for these routines. +CFLAGS-memcpy-c.c += -fno-chkp-check-read -fno-chkp-check-write +CFLAGS-mempcpy-c.c += -fno-chkp-check-read -fno-chkp-check-write +CFLAGS-memmove-c.c += -fno-chkp-check-read -fno-chkp-check-write +endif + +ifeq ($(enable-mpx-write-only), yes) +CFLAGS-memcpy-c.c += -D__CHKWR__ +CFLAGS-memmove-c.c += -D__CHKWR__ +endif + endif ifeq ($(subdir),wcsmbs) diff --git a/sysdeps/x86_64/multiarch/Versions b/sysdeps/x86_64/multiarch/Versions index 59b185ac8d..5325bdece6 100644 --- a/sysdeps/x86_64/multiarch/Versions +++ b/sysdeps/x86_64/multiarch/Versions @@ -2,4 +2,17 @@ libc { GLIBC_PRIVATE { __get_cpu_features; } +%ifdef __CHKP__ + GLIBC_2.17 { + chkp_memcpy_nobnd; + chkp_memmove_nobnd; + chkp_mempcpy_nobnd; + chkp_memcpy_nobnd_nochk; + chkp_memmove_nobnd_nochk; + chkp_mempcpy_nobnd_nochk; + chkp_memcpy_nochk; + chkp_memmove_nochk; + chkp_mempcpy_nochk; + } +%endif } diff --git a/sysdeps/x86_64/multiarch/bcopy.S b/sysdeps/x86_64/multiarch/bcopy.S index 639f02bde3..9809d471ba 100644 --- a/sysdeps/x86_64/multiarch/bcopy.S +++ b/sysdeps/x86_64/multiarch/bcopy.S @@ -3,5 +3,10 @@ .text ENTRY(bcopy) xchg %rdi, %rsi +#ifdef __CHKP__ + bndmov %bnd0, %bnd2 + bndmov %bnd1, %bnd0 + bndmov %bnd2, %bnd1 +#endif jmp __libc_memmove /* Branch to IFUNC memmove. */ END(bcopy) diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c index d0992e113f..e3a4163c5b 100644 --- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c +++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c @@ -44,6 +44,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_IMPL_ADD (array, i, memcmp, HAS_SSSE3, __memcmp_ssse3) IFUNC_IMPL_ADD (array, i, memcmp, 1, __memcmp_sse2)) +#ifndef __CHKP__ + /* We use specific version for MPX glibc */ /* Support sysdeps/x86_64/multiarch/memmove_chk.S. */ IFUNC_IMPL (i, name, __memmove_chk, IFUNC_IMPL_ADD (array, i, __memmove_chk, HAS_SSSE3, @@ -60,6 +62,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_IMPL_ADD (array, i, memmove, HAS_SSSE3, __memmove_ssse3) IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_sse2)) +#endif /* Support sysdeps/x86_64/multiarch/stpncpy.S. */ IFUNC_IMPL (i, name, stpncpy, @@ -207,6 +210,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_IMPL_ADD (array, i, wmemcmp, 1, __wmemcmp_sse2)) #ifdef SHARED +#ifndef __CHKP__ + /* We use specific version of memcpy, memcpy_chk, mempcpy if Intel MPX is enabled. */ /* Support sysdeps/x86_64/multiarch/memcpy_chk.S. */ IFUNC_IMPL (i, name, __memcpy_chk, IFUNC_IMPL_ADD (array, i, __memcpy_chk, HAS_SSSE3, @@ -240,6 +245,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_IMPL_ADD (array, i, mempcpy, HAS_SSSE3, __mempcpy_ssse3) IFUNC_IMPL_ADD (array, i, mempcpy, 1, __mempcpy_sse2)) +#endif /* Support sysdeps/x86_64/multiarch/strncmp.S. */ IFUNC_IMPL (i, name, strncmp, diff --git a/sysdeps/x86_64/multiarch/memcmp-sse4.S b/sysdeps/x86_64/multiarch/memcmp-sse4.S index 1ed4200f4c..b5c6675d31 100644 --- a/sysdeps/x86_64/multiarch/memcmp-sse4.S +++ b/sysdeps/x86_64/multiarch/memcmp-sse4.S @@ -48,6 +48,13 @@ ENTRY (MEMCMP) # ifdef USE_AS_WMEMCMP shl $2, %rdx # endif +# ifdef __CHKP__ + testq %rdx, %rdx + jz L(NoEntryCheck) + bndcl (%rdi), %bnd0 + bndcl (%rsi), %bnd1 +L(NoEntryCheck): +# endif pxor %xmm0, %xmm0 cmp $79, %rdx ja L(79bytesormore) @@ -70,6 +77,10 @@ L(firstbyte): ALIGN (4) L(79bytesormore): +# ifdef __CHKP__ + bndcu (%rdi), %bnd0 + bndcu (%rsi), %bnd1 +# endif movdqu (%rsi), %xmm1 movdqu (%rdi), %xmm2 pxor %xmm1, %xmm2 @@ -90,21 +101,37 @@ L(79bytesormore): L(less128bytes): sub $64, %rdx +# ifdef __CHKP__ + bndcu (%rdi), %bnd0 + bndcu (%rsi), %bnd1 +# endif movdqu (%rdi), %xmm2 pxor (%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(16bytesin256) +# ifdef __CHKP__ + bndcu 16(%rdi), %bnd0 + bndcu 16(%rsi), %bnd1 +# endif movdqu 16(%rdi), %xmm2 pxor 16(%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(32bytesin256) +# ifdef __CHKP__ + bndcu 32(%rdi), %bnd0 + bndcu 32(%rsi), %bnd1 +# endif movdqu 32(%rdi), %xmm2 pxor 32(%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(48bytesin256) +# ifdef __CHKP__ + bndcu 48(%rdi), %bnd0 + bndcu 48(%rsi), %bnd1 +# endif movdqu 48(%rdi), %xmm2 pxor 48(%rsi), %xmm2 ptest %xmm2, %xmm0 @@ -112,11 +139,19 @@ L(less128bytes): cmp $32, %rdx jb L(less32bytesin64) +# ifdef __CHKP__ + bndcu 64(%rdi), %bnd0 + bndcu 64(%rsi), %bnd1 +# endif movdqu 64(%rdi), %xmm2 pxor 64(%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(80bytesin256) +# ifdef __CHKP__ + bndcu 80(%rdi), %bnd0 + bndcu 80(%rsi), %bnd1 +# endif movdqu 80(%rdi), %xmm2 pxor 80(%rsi), %xmm2 ptest %xmm2, %xmm0 @@ -139,41 +174,73 @@ L(128bytesormore): L(less256bytes): sub $128, %rdx +# ifdef __CHKP__ + bndcu (%rdi), %bnd0 + bndcu (%rsi), %bnd1 +# endif movdqu (%rdi), %xmm2 pxor (%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(16bytesin256) +# ifdef __CHKP__ + bndcu 16(%rdi), %bnd0 + bndcu 16(%rsi), %bnd1 +# endif movdqu 16(%rdi), %xmm2 pxor 16(%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(32bytesin256) +# ifdef __CHKP__ + bndcu 32(%rdi), %bnd0 + bndcu 32(%rsi), %bnd1 +# endif movdqu 32(%rdi), %xmm2 pxor 32(%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(48bytesin256) +# ifdef __CHKP__ + bndcu 48(%rdi), %bnd0 + bndcu 48(%rsi), %bnd1 +# endif movdqu 48(%rdi), %xmm2 pxor 48(%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(64bytesin256) +# ifdef __CHKP__ + bndcu 64(%rdi), %bnd0 + bndcu 64(%rsi), %bnd1 +# endif movdqu 64(%rdi), %xmm2 pxor 64(%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(80bytesin256) +# ifdef __CHKP__ + bndcu 80(%rdi), %bnd0 + bndcu 80(%rsi), %bnd1 +# endif movdqu 80(%rdi), %xmm2 pxor 80(%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(96bytesin256) +# ifdef __CHKP__ + bndcu 96(%rdi), %bnd0 + bndcu 96(%rsi), %bnd1 +# endif movdqu 96(%rdi), %xmm2 pxor 96(%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(112bytesin256) +# ifdef __CHKP__ + bndcu 112(%rdi), %bnd0 + bndcu 112(%rsi), %bnd1 +# endif movdqu 112(%rdi), %xmm2 pxor 112(%rsi), %xmm2 ptest %xmm2, %xmm0 @@ -188,11 +255,19 @@ L(less256bytes): cmp $32, %rdx jb L(less32bytesin128) +# ifdef __CHKP__ + bndcu (%rdi), %bnd0 + bndcu (%rsi), %bnd1 +# endif movdqu (%rdi), %xmm2 pxor (%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(16bytesin256) +# ifdef __CHKP__ + bndcu 16(%rdi), %bnd0 + bndcu 16(%rsi), %bnd1 +# endif movdqu 16(%rdi), %xmm2 pxor 16(%rsi), %xmm2 ptest %xmm2, %xmm0 @@ -207,81 +282,145 @@ L(less32bytesin128): L(less512bytes): sub $256, %rdx +# ifdef __CHKP__ + bndcu (%rdi), %bnd0 + bndcu (%rsi), %bnd1 +# endif movdqu (%rdi), %xmm2 pxor (%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(16bytesin256) +# ifdef __CHKP__ + bndcu 16(%rdi), %bnd0 + bndcu 16(%rsi), %bnd1 +# endif movdqu 16(%rdi), %xmm2 pxor 16(%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(32bytesin256) +# ifdef __CHKP__ + bndcu 32(%rdi), %bnd0 + bndcu 32(%rsi), %bnd1 +# endif movdqu 32(%rdi), %xmm2 pxor 32(%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(48bytesin256) +# ifdef __CHKP__ + bndcu 48(%rdi), %bnd0 + bndcu 48(%rsi), %bnd1 +# endif movdqu 48(%rdi), %xmm2 pxor 48(%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(64bytesin256) +# ifdef __CHKP__ + bndcu 64(%rdi), %bnd0 + bndcu 64(%rsi), %bnd1 +# endif movdqu 64(%rdi), %xmm2 pxor 64(%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(80bytesin256) +# ifdef __CHKP__ + bndcu 80(%rdi), %bnd0 + bndcu 80(%rsi), %bnd1 +# endif movdqu 80(%rdi), %xmm2 pxor 80(%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(96bytesin256) +# ifdef __CHKP__ + bndcu 96(%rdi), %bnd0 + bndcu 96(%rsi), %bnd1 +# endif movdqu 96(%rdi), %xmm2 pxor 96(%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(112bytesin256) +# ifdef __CHKP__ + bndcu 112(%rdi), %bnd0 + bndcu 112(%rsi), %bnd1 +# endif movdqu 112(%rdi), %xmm2 pxor 112(%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(128bytesin256) +# ifdef __CHKP__ + bndcu 128(%rdi), %bnd0 + bndcu 128(%rsi), %bnd1 +# endif movdqu 128(%rdi), %xmm2 pxor 128(%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(144bytesin256) +# ifdef __CHKP__ + bndcu 144(%rdi), %bnd0 + bndcu 144(%rsi), %bnd1 +# endif movdqu 144(%rdi), %xmm2 pxor 144(%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(160bytesin256) +# ifdef __CHKP__ + bndcu 160(%rdi), %bnd0 + bndcu 160(%rsi), %bnd1 +# endif movdqu 160(%rdi), %xmm2 pxor 160(%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(176bytesin256) +# ifdef __CHKP__ + bndcu 176(%rdi), %bnd0 + bndcu 176(%rsi), %bnd1 +# endif movdqu 176(%rdi), %xmm2 pxor 176(%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(192bytesin256) +# ifdef __CHKP__ + bndcu 192(%rdi), %bnd0 + bndcu 192(%rsi), %bnd1 +# endif movdqu 192(%rdi), %xmm2 pxor 192(%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(208bytesin256) +# ifdef __CHKP__ + bndcu 208(%rdi), %bnd0 + bndcu 208(%rsi), %bnd1 +# endif movdqu 208(%rdi), %xmm2 pxor 208(%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(224bytesin256) +# ifdef __CHKP__ + bndcu 224(%rdi), %bnd0 + bndcu 224(%rsi), %bnd1 +# endif movdqu 224(%rdi), %xmm2 pxor 224(%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(240bytesin256) +# ifdef __CHKP__ + bndcu 240(%rdi), %bnd0 + bndcu 240(%rsi), %bnd1 +# endif movdqu 240(%rdi), %xmm2 pxor 240(%rsi), %xmm2 ptest %xmm2, %xmm0 @@ -299,11 +438,19 @@ L(less512bytes): cmp $32, %rdx jb L(less32bytesin256) +# ifdef __CHKP__ + bndcu (%rdi), %bnd0 + bndcu (%rsi), %bnd1 +# endif movdqu (%rdi), %xmm2 pxor (%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(16bytesin256) +# ifdef __CHKP__ + bndcu 16(%rdi), %bnd0 + bndcu 16(%rsi), %bnd1 +# endif movdqu 16(%rdi), %xmm2 pxor 16(%rsi), %xmm2 ptest %xmm2, %xmm0 @@ -331,18 +478,34 @@ L(512bytesormore): sub $64, %rdx ALIGN (4) L(64bytesormore_loop): +# ifdef __CHKP__ + bndcu (%rdi), %bnd0 + bndcu (%rsi), %bnd1 +# endif movdqu (%rdi), %xmm2 pxor (%rsi), %xmm2 movdqa %xmm2, %xmm1 +# ifdef __CHKP__ + bndcu 16(%rdi), %bnd0 + bndcu 16(%rsi), %bnd1 +# endif movdqu 16(%rdi), %xmm3 pxor 16(%rsi), %xmm3 por %xmm3, %xmm1 +# ifdef __CHKP__ + bndcu 32(%rdi), %bnd0 + bndcu 32(%rsi), %bnd1 +# endif movdqu 32(%rdi), %xmm4 pxor 32(%rsi), %xmm4 por %xmm4, %xmm1 +# ifdef __CHKP__ + bndcu 48(%rdi), %bnd0 + bndcu 48(%rsi), %bnd1 +# endif movdqu 48(%rdi), %xmm5 pxor 48(%rsi), %xmm5 por %xmm5, %xmm1 @@ -365,18 +528,34 @@ L(L2_L3_cache_unaglined): L(L2_L3_unaligned_128bytes_loop): prefetchnta 0x1c0(%rdi) prefetchnta 0x1c0(%rsi) +# ifdef __CHKP__ + bndcu (%rdi), %bnd0 + bndcu (%rsi), %bnd1 +# endif movdqu (%rdi), %xmm2 pxor (%rsi), %xmm2 movdqa %xmm2, %xmm1 +# ifdef __CHKP__ + bndcu 16(%rdi), %bnd0 + bndcu 16(%rsi), %bnd1 +# endif movdqu 16(%rdi), %xmm3 pxor 16(%rsi), %xmm3 por %xmm3, %xmm1 +# ifdef __CHKP__ + bndcu 32(%rdi), %bnd0 + bndcu 32(%rsi), %bnd1 +# endif movdqu 32(%rdi), %xmm4 pxor 32(%rsi), %xmm4 por %xmm4, %xmm1 +# ifdef __CHKP__ + bndcu 48(%rdi), %bnd0 + bndcu 48(%rsi), %bnd1 +# endif movdqu 48(%rdi), %xmm5 pxor 48(%rsi), %xmm5 por %xmm5, %xmm1 @@ -403,21 +582,37 @@ L(2aligned): L(less128bytesin2aligned): sub $64, %rdx +# ifdef __CHKP__ + bndcu (%rdi), %bnd0 + bndcu (%rsi), %bnd1 +# endif movdqa (%rdi), %xmm2 pxor (%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(16bytesin256) +# ifdef __CHKP__ + bndcu 16(%rdi), %bnd0 + bndcu 16(%rsi), %bnd1 +# endif movdqa 16(%rdi), %xmm2 pxor 16(%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(32bytesin256) +# ifdef __CHKP__ + bndcu 32(%rdi), %bnd0 + bndcu 32(%rsi), %bnd1 +# endif movdqa 32(%rdi), %xmm2 pxor 32(%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(48bytesin256) +# ifdef __CHKP__ + bndcu 48(%rdi), %bnd0 + bndcu 48(%rsi), %bnd1 +# endif movdqa 48(%rdi), %xmm2 pxor 48(%rsi), %xmm2 ptest %xmm2, %xmm0 @@ -425,11 +620,19 @@ L(less128bytesin2aligned): cmp $32, %rdx jb L(less32bytesin64in2alinged) +# ifdef __CHKP__ + bndcu 64(%rdi), %bnd0 + bndcu 64(%rsi), %bnd1 +# endif movdqa 64(%rdi), %xmm2 pxor 64(%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(80bytesin256) +# ifdef __CHKP__ + bndcu 80(%rdi), %bnd0 + bndcu 80(%rsi), %bnd1 +# endif movdqa 80(%rdi), %xmm2 pxor 80(%rsi), %xmm2 ptest %xmm2, %xmm0 @@ -453,41 +656,73 @@ L(128bytesormorein2aligned): L(less256bytesin2alinged): sub $128, %rdx +# ifdef __CHKP__ + bndcu (%rdi), %bnd0 + bndcu (%rsi), %bnd1 +# endif movdqa (%rdi), %xmm2 pxor (%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(16bytesin256) +# ifdef __CHKP__ + bndcu 16(%rdi), %bnd0 + bndcu 16(%rsi), %bnd1 +# endif movdqa 16(%rdi), %xmm2 pxor 16(%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(32bytesin256) +# ifdef __CHKP__ + bndcu 32(%rdi), %bnd0 + bndcu 32(%rsi), %bnd1 +# endif movdqa 32(%rdi), %xmm2 pxor 32(%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(48bytesin256) +# ifdef __CHKP__ + bndcu 48(%rdi), %bnd0 + bndcu 48(%rsi), %bnd1 +# endif movdqa 48(%rdi), %xmm2 pxor 48(%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(64bytesin256) +# ifdef __CHKP__ + bndcu 64(%rdi), %bnd0 + bndcu 64(%rsi), %bnd1 +# endif movdqa 64(%rdi), %xmm2 pxor 64(%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(80bytesin256) +# ifdef __CHKP__ + bndcu 80(%rdi), %bnd0 + bndcu 80(%rsi), %bnd1 +# endif movdqa 80(%rdi), %xmm2 pxor 80(%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(96bytesin256) +# ifdef __CHKP__ + bndcu 96(%rdi), %bnd0 + bndcu 96(%rsi), %bnd1 +# endif movdqa 96(%rdi), %xmm2 pxor 96(%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(112bytesin256) +# ifdef __CHKP__ + bndcu 112(%rdi), %bnd0 + bndcu 112(%rsi), %bnd1 +# endif movdqa 112(%rdi), %xmm2 pxor 112(%rsi), %xmm2 ptest %xmm2, %xmm0 @@ -502,11 +737,19 @@ L(less256bytesin2alinged): cmp $32, %rdx jb L(less32bytesin128in2aligned) +# ifdef __CHKP__ + bndcu (%rdi), %bnd0 + bndcu (%rsi), %bnd1 +# endif movdqu (%rdi), %xmm2 pxor (%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(16bytesin256) +# ifdef __CHKP__ + bndcu 16(%rdi), %bnd0 + bndcu 16(%rsi), %bnd1 +# endif movdqu 16(%rdi), %xmm2 pxor 16(%rsi), %xmm2 ptest %xmm2, %xmm0 @@ -523,81 +766,145 @@ L(less32bytesin128in2aligned): L(256bytesormorein2aligned): sub $256, %rdx +# ifdef __CHKP__ + bndcu (%rdi), %bnd0 + bndcu (%rsi), %bnd1 +# endif movdqa (%rdi), %xmm2 pxor (%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(16bytesin256) +# ifdef __CHKP__ + bndcu 16(%rdi), %bnd0 + bndcu 16(%rsi), %bnd1 +# endif movdqa 16(%rdi), %xmm2 pxor 16(%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(32bytesin256) +# ifdef __CHKP__ + bndcu 32(%rdi), %bnd0 + bndcu 32(%rsi), %bnd1 +# endif movdqa 32(%rdi), %xmm2 pxor 32(%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(48bytesin256) +# ifdef __CHKP__ + bndcu 48(%rdi), %bnd0 + bndcu 48(%rsi), %bnd1 +# endif movdqa 48(%rdi), %xmm2 pxor 48(%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(64bytesin256) +# ifdef __CHKP__ + bndcu 64(%rdi), %bnd0 + bndcu 64(%rsi), %bnd1 +# endif movdqa 64(%rdi), %xmm2 pxor 64(%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(80bytesin256) +# ifdef __CHKP__ + bndcu 80(%rdi), %bnd0 + bndcu 80(%rsi), %bnd1 +# endif movdqa 80(%rdi), %xmm2 pxor 80(%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(96bytesin256) +# ifdef __CHKP__ + bndcu 96(%rdi), %bnd0 + bndcu 96(%rsi), %bnd1 +# endif movdqa 96(%rdi), %xmm2 pxor 96(%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(112bytesin256) +# ifdef __CHKP__ + bndcu 112(%rdi), %bnd0 + bndcu 112(%rsi), %bnd1 +# endif movdqa 112(%rdi), %xmm2 pxor 112(%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(128bytesin256) +# ifdef __CHKP__ + bndcu 128(%rdi), %bnd0 + bndcu 128(%rsi), %bnd1 +# endif movdqa 128(%rdi), %xmm2 pxor 128(%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(144bytesin256) +# ifdef __CHKP__ + bndcu 144(%rdi), %bnd0 + bndcu 144(%rsi), %bnd1 +# endif movdqa 144(%rdi), %xmm2 pxor 144(%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(160bytesin256) +# ifdef __CHKP__ + bndcu 160(%rdi), %bnd0 + bndcu 160(%rsi), %bnd1 +# endif movdqa 160(%rdi), %xmm2 pxor 160(%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(176bytesin256) +# ifdef __CHKP__ + bndcu 176(%rdi), %bnd0 + bndcu 176(%rsi), %bnd1 +# endif movdqa 176(%rdi), %xmm2 pxor 176(%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(192bytesin256) +# ifdef __CHKP__ + bndcu 192(%rdi), %bnd0 + bndcu 192(%rsi), %bnd1 +# endif movdqa 192(%rdi), %xmm2 pxor 192(%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(208bytesin256) +# ifdef __CHKP__ + bndcu 208(%rdi), %bnd0 + bndcu 208(%rsi), %bnd1 +# endif movdqa 208(%rdi), %xmm2 pxor 208(%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(224bytesin256) +# ifdef __CHKP__ + bndcu 224(%rdi), %bnd0 + bndcu 224(%rsi), %bnd1 +# endif movdqa 224(%rdi), %xmm2 pxor 224(%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(240bytesin256) +# ifdef __CHKP__ + bndcu 240(%rdi), %bnd0 + bndcu 240(%rsi), %bnd1 +# endif movdqa 240(%rdi), %xmm2 pxor 240(%rsi), %xmm2 ptest %xmm2, %xmm0 @@ -648,18 +955,34 @@ L(512bytesormorein2aligned): sub $64, %rdx ALIGN (4) L(64bytesormore_loopin2aligned): +# ifdef __CHKP__ + bndcu (%rdi), %bnd0 + bndcu (%rsi), %bnd1 +# endif movdqa (%rdi), %xmm2 pxor (%rsi), %xmm2 movdqa %xmm2, %xmm1 +# ifdef __CHKP__ + bndcu 16(%rdi), %bnd0 + bndcu 16(%rsi), %bnd1 +# endif movdqa 16(%rdi), %xmm3 pxor 16(%rsi), %xmm3 por %xmm3, %xmm1 +# ifdef __CHKP__ + bndcu 32(%rdi), %bnd0 + bndcu 32(%rsi), %bnd1 +# endif movdqa 32(%rdi), %xmm4 pxor 32(%rsi), %xmm4 por %xmm4, %xmm1 +# ifdef __CHKP__ + bndcu 48(%rdi), %bnd0 + bndcu 48(%rsi), %bnd1 +# endif movdqa 48(%rdi), %xmm5 pxor 48(%rsi), %xmm5 por %xmm5, %xmm1 @@ -682,18 +1005,34 @@ L(L2_L3_cache_aglined): L(L2_L3_aligned_128bytes_loop): prefetchnta 0x1c0(%rdi) prefetchnta 0x1c0(%rsi) +# ifdef __CHKP__ + bndcu (%rdi), %bnd0 + bndcu (%rsi), %bnd1 +# endif movdqa (%rdi), %xmm2 pxor (%rsi), %xmm2 movdqa %xmm2, %xmm1 +# ifdef __CHKP__ + bndcu 16(%rdi), %bnd0 + bndcu 16(%rsi), %bnd1 +# endif movdqa 16(%rdi), %xmm3 pxor 16(%rsi), %xmm3 por %xmm3, %xmm1 +# ifdef __CHKP__ + bndcu 32(%rdi), %bnd0 + bndcu 32(%rsi), %bnd1 +# endif movdqa 32(%rdi), %xmm4 pxor 32(%rsi), %xmm4 por %xmm4, %xmm1 +# ifdef __CHKP__ + bndcu 48(%rdi), %bnd0 + bndcu 48(%rsi), %bnd1 +# endif movdqa 48(%rdi), %xmm5 pxor 48(%rsi), %xmm5 por %xmm5, %xmm1 diff --git a/sysdeps/x86_64/multiarch/memcpy-c.c b/sysdeps/x86_64/multiarch/memcpy-c.c new file mode 100644 index 0000000000..6fa50eada1 --- /dev/null +++ b/sysdeps/x86_64/multiarch/memcpy-c.c @@ -0,0 +1,80 @@ +/* C-version of memcpy for using when Intel MPX is on + in order to prosess with a buffer of pointers correctly. + Copyright (C) 2013 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <stddef.h> + +void * +__memcpy (void *dst, const void *src, size_t n) +{ + if (!n) return dst; + + __bnd_chk_ptr_lbounds(dst); + __bnd_chk_ptr_ubounds(dst+n-1); +#ifndef __CHKWR__ + __bnd_chk_ptr_lbounds(src); + __bnd_chk_ptr_ubounds(src+n-1); +#endif + + return chkp_memcpy_nochk(dst, src, n); +} + +void * +chkp_memcpy_nochk (void *dst, const void *src, size_t n) +{ + const char *s = src; + char *d = dst; + void *ret = dst; + size_t offset_src = ((size_t) s) & (sizeof(size_t) - 1); + size_t offset_dst = ((size_t) d) & (sizeof(size_t) - 1); + + if (offset_src != offset_dst) + { + while (n--) + *d++ = *s++; + } + else + { + if (offset_src) offset_src = sizeof(size_t) - offset_src; + while (n-- && offset_src--) + *d++ = *s++; + n++; + if (!n) return ret; + void **d1 = (void **)d; + void **s1 = (void **)s; + while (n >= sizeof(void *)) + { + n -= sizeof(void *); + *d1++ = *s1++; + } + s = (char *)s1; + d = (char *)d1; + while (n--) + *d++ = *s++; + } + return ret; +} + +weak_alias (__memcpy, __GI_memcpy) + +# if defined SHARED && !defined NOT_IN_libc && !defined IA32 +# include <shlib-compat.h> +versioned_symbol (libc, __memcpy, memcpy, GLIBC_2_14); +# else +weak_alias (__memcpy, memcpy) +# endif diff --git a/sysdeps/x86_64/multiarch/memcpy-ssse3-back-1.S b/sysdeps/x86_64/multiarch/memcpy-ssse3-back-1.S new file mode 100644 index 0000000000..7fedbeef8e --- /dev/null +++ b/sysdeps/x86_64/multiarch/memcpy-ssse3-back-1.S @@ -0,0 +1,5 @@ +/* optimized version of memcpy without any checks or copying bounds. */ +#define MEMCPY chkp_memcpy_nobnd_nochk +#undef __CHKP__ +#undef __CHKWR__ +#include "memcpy-ssse3-back.S" diff --git a/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S b/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S index fc9fcef27d..16b4e680a1 100644 --- a/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S +++ b/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S @@ -27,7 +27,11 @@ #include "asm-syntax.h" #ifndef MEMCPY -# define MEMCPY __memcpy_ssse3_back +# if defined __CHKP__ || defined __CHKWR__ +# define MEMCPY chkp_memcpy_nobnd +# else +# define MEMCPY __memcpy_ssse3_back +# endif # define MEMCPY_CHK __memcpy_chk_ssse3_back #endif @@ -48,7 +52,7 @@ ud2 .section .text.ssse3,"ax",@progbits -#if !defined USE_AS_BCOPY +#if !defined USE_AS_BCOPY && defined MEMCPY_CHK ENTRY (MEMCPY_CHK) cmpq %rdx, %rcx jb HIDDEN_JUMPTARGET (__chk_fail) @@ -56,6 +60,15 @@ END (MEMCPY_CHK) #endif ENTRY (MEMCPY) +#ifdef __CHKP__ + testq %rdx, %rdx + jz L(NoEntryCheck) + bndcl (%rdi), %bnd0 + bndcu -1(%rdi, %rdx), %bnd0 + bndcl (%rsi), %bnd1 + bndcu -1(%rsi, %rdx), %bnd1 +#endif + mov %rdi, %rax #ifdef USE_AS_MEMPCPY add %rdx, %rax @@ -87,6 +100,15 @@ L(bk_write): BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4) #endif +#ifdef __CHKP__ +L(NoEntryCheck): + mov %rdi, %rax +# ifdef USE_AS_MEMPCPY + add %rdx, %rax +# endif + ret +#endif + ALIGN (4) L(144bytesormore): diff --git a/sysdeps/x86_64/multiarch/memcpy.S b/sysdeps/x86_64/multiarch/memcpy.S index a1e5031376..fc5ab2da03 100644 --- a/sysdeps/x86_64/multiarch/memcpy.S +++ b/sysdeps/x86_64/multiarch/memcpy.S @@ -18,14 +18,15 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ -#include <sysdep.h> -#include <shlib-compat.h> -#include <init-arch.h> +#if !defined __CHKP__ && !defined __CHKWR__ +# include <sysdep.h> +# include <shlib-compat.h> +# include <init-arch.h> /* Define multiple versions only for the definition in lib and for DSO. In static binaries we need memcpy before the initialization happened. */ -#if defined SHARED && !defined NOT_IN_libc +# if defined SHARED && !defined NOT_IN_libc .text ENTRY(__new_memcpy) .type __new_memcpy, @gnu_indirect_function @@ -43,37 +44,39 @@ ENTRY(__new_memcpy) 3: ret END(__new_memcpy) -# undef ENTRY -# define ENTRY(name) \ +# undef ENTRY +# define ENTRY(name) \ .type __memcpy_sse2, @function; \ .globl __memcpy_sse2; \ .hidden __memcpy_sse2; \ .p2align 4; \ __memcpy_sse2: cfi_startproc; \ CALL_MCOUNT -# undef END -# define END(name) \ +# undef END +# define END(name) \ cfi_endproc; .size __memcpy_sse2, .-__memcpy_sse2 -# undef ENTRY_CHK -# define ENTRY_CHK(name) \ +# undef ENTRY_CHK +# define ENTRY_CHK(name) \ .type __memcpy_chk_sse2, @function; \ .globl __memcpy_chk_sse2; \ .p2align 4; \ __memcpy_chk_sse2: cfi_startproc; \ CALL_MCOUNT -# undef END_CHK -# define END_CHK(name) \ +# undef END_CHK +# define END_CHK(name) \ cfi_endproc; .size __memcpy_chk_sse2, .-__memcpy_chk_sse2 -# undef libc_hidden_builtin_def +# undef libc_hidden_builtin_def /* It doesn't make sense to send libc-internal memcpy calls through a PLT. The speedup we get from using SSSE3 instruction is likely eaten away by the indirect call in the PLT. */ -# define libc_hidden_builtin_def(name) \ +# define libc_hidden_builtin_def(name) \ .globl __GI_memcpy; __GI_memcpy = __memcpy_sse2 versioned_symbol (libc, __new_memcpy, memcpy, GLIBC_2_14); -#endif +# endif + +# include "../memcpy.S" -#include "../memcpy.S" +#endif diff --git a/sysdeps/x86_64/multiarch/memcpy_chk-c.c b/sysdeps/x86_64/multiarch/memcpy_chk-c.c new file mode 100644 index 0000000000..1eee86c639 --- /dev/null +++ b/sysdeps/x86_64/multiarch/memcpy_chk-c.c @@ -0,0 +1 @@ +#include <debug/memcpy_chk.c> diff --git a/sysdeps/x86_64/multiarch/memcpy_chk.S b/sysdeps/x86_64/multiarch/memcpy_chk.S index ad01d8cd9f..6f87f2686d 100644 --- a/sysdeps/x86_64/multiarch/memcpy_chk.S +++ b/sysdeps/x86_64/multiarch/memcpy_chk.S @@ -18,14 +18,15 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ -#include <sysdep.h> -#include <init-arch.h> +#if !defined __CHKP__ && !defined __CHKWR__ +# include <sysdep.h> +# include <init-arch.h> /* Define multiple versions only for the definition in lib and for DSO. There are no multiarch memcpy functions for static binaries. */ -#ifndef NOT_IN_libc -# ifdef SHARED +# ifndef NOT_IN_libc +# ifdef SHARED .text ENTRY(__memcpy_chk) .type __memcpy_chk, @gnu_indirect_function @@ -41,7 +42,8 @@ ENTRY(__memcpy_chk) leaq __memcpy_chk_ssse3_back(%rip), %rax 2: ret END(__memcpy_chk) -# else -# include "../memcpy_chk.S" +# else +# include "../memcpy_chk.S" +# endif # endif #endif diff --git a/sysdeps/x86_64/multiarch/memmove-c.c b/sysdeps/x86_64/multiarch/memmove-c.c new file mode 100644 index 0000000000..7111128e75 --- /dev/null +++ b/sysdeps/x86_64/multiarch/memmove-c.c @@ -0,0 +1,118 @@ +/* C-version of memmove for using when Intel MPX is enabled + in order to prosess with a buffer of pointers correctly. + Copyright (C) 2013 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <stddef.h> + +void * +__memmove (void *dst, const void *src, size_t n) +{ + if (n == 0) return dst; + + __bnd_chk_ptr_lbounds(dst); + __bnd_chk_ptr_ubounds(dst+n-1); +#ifndef __CHKWR__ + __bnd_chk_ptr_lbounds(src); + __bnd_chk_ptr_ubounds(src+n-1); +#endif + return chkp_memmove_nochk(dst, src, n); +} + + +void * +chkp_memmove_nochk (void *dst, const void *src, size_t n) +{ + const char *s = src; + char *d = dst; + void *ret = dst; + size_t offset_src = ((size_t) s) & (sizeof(size_t) - 1); + size_t offset_dst = ((size_t) d) & (sizeof(size_t) - 1); + + if (offset_src != offset_dst) + { + if (s < d) + { + /* backward copying */ + d += n; + s += n; + while (n--) + *--d = *--s; + } + else + /* forward copying */ + while (n--) + *d++ = *s++; + } + else + { + if (s < d) + { + offset_src = (offset_src + (size_t)src) & (sizeof(size_t) - 1); + /* backward copying */ + d += n; + s += n; + while (n-- && offset_src--) + *--d = *--s; + n++; + if (!n) return ret; + void **d1 = (void **)d; + void **s1 = (void **)s; + while (n >= sizeof(void *)) + { + n -= sizeof(void *); + *--d1 = *--s1; + } + s = (char *)s1; + d = (char *)d1; + while (n--) + *--d = *--s; + } + else + { + if (offset_src) offset_src = sizeof(size_t) - offset_src; + /* forward copying */ + while (n-- && offset_src--) + *d++ = *s++; + n++; + if (!n) return ret; + void **d1 = (void **)d; + void **s1 = (void **)s; + while (n >= sizeof(void *)) + { + n -= sizeof(void *); + *d1++ = *s1++; + } + s = (char *)s1; + d = (char *)d1; + while (n--) + *d++ = *s++; + } + } + return ret; +} + +weak_alias (__memmove, __libc_memmove) +weak_alias (__memmove, __GI_memmove) +weak_alias (__memmove, memmove) + +# if defined SHARED && !defined NOT_IN_libc +# include <shlib-compat.h> +# if SHLIB_COMPAT (libc, GLIBC_2_2_5, GLIBC_2_14) +compat_symbol (libc, memmove, memcpy, GLIBC_2_2_5); +# endif +# endif diff --git a/sysdeps/x86_64/multiarch/memmove-ssse3-back-1.S b/sysdeps/x86_64/multiarch/memmove-ssse3-back-1.S new file mode 100644 index 0000000000..2a1f3e67b7 --- /dev/null +++ b/sysdeps/x86_64/multiarch/memmove-ssse3-back-1.S @@ -0,0 +1,6 @@ +/* optimized version of memmove without any checks or copying bounds. */ +#define USE_AS_MEMMOVE +#define MEMCPY chkp_memmove_nobnd_nochk +#undef __CHKP__ +#undef __CHKWR__ +#include "memcpy-ssse3-back.S" diff --git a/sysdeps/x86_64/multiarch/memmove-ssse3-back.S b/sysdeps/x86_64/multiarch/memmove-ssse3-back.S index f9a4e9aff9..478141b14a 100644 --- a/sysdeps/x86_64/multiarch/memmove-ssse3-back.S +++ b/sysdeps/x86_64/multiarch/memmove-ssse3-back.S @@ -1,4 +1,10 @@ #define USE_AS_MEMMOVE -#define MEMCPY __memmove_ssse3_back +#if defined __CHKP__ || defined __CHKWR__ +/* version of memmove with no copying of bounds support + if there are pointers in the source buffer. */ +# define MEMCPY chkp_memmove_nobnd +# else +# define MEMCPY __memmove_ssse3_back +#endif #define MEMCPY_CHK __memmove_chk_ssse3_back #include "memcpy-ssse3-back.S" diff --git a/sysdeps/x86_64/multiarch/memmove.c b/sysdeps/x86_64/multiarch/memmove.c index 8149c487d5..0d2c6f0266 100644 --- a/sysdeps/x86_64/multiarch/memmove.c +++ b/sysdeps/x86_64/multiarch/memmove.c @@ -17,31 +17,32 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ -#ifndef NOT_IN_libc -# define MEMMOVE __memmove_sse2 -# ifdef SHARED -# undef libc_hidden_builtin_def -# define libc_hidden_builtin_def(name) \ +#ifndef __CHKP__ +# ifndef NOT_IN_libc +# define MEMMOVE __memmove_sse2 +# ifdef SHARED +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) \ __hidden_ver1 (__memmove_sse2, __GI_memmove, __memmove_sse2); -# endif +# endif /* Redefine memmove so that the compiler won't complain about the type mismatch with the IFUNC selector in strong_alias, below. */ -# undef memmove -# define memmove __redirect_memmove -# include <string.h> -# undef memmove +# undef memmove +# define memmove __redirect_memmove +# include <string.h> +# undef memmove extern __typeof (__redirect_memmove) __memmove_sse2 attribute_hidden; extern __typeof (__redirect_memmove) __memmove_ssse3 attribute_hidden; extern __typeof (__redirect_memmove) __memmove_ssse3_back attribute_hidden; -#endif +# endif -#include "string/memmove.c" +# include "string/memmove.c" -#ifndef NOT_IN_libc -# include <shlib-compat.h> -# include "init-arch.h" +# ifndef NOT_IN_libc +# include <shlib-compat.h> +# include "init-arch.h" /* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle ifunc symbol properly. */ @@ -54,7 +55,8 @@ libc_ifunc (__libc_memmove, strong_alias (__libc_memmove, memmove) -# if SHLIB_COMPAT (libc, GLIBC_2_2_5, GLIBC_2_14) +# if SHLIB_COMPAT (libc, GLIBC_2_2_5, GLIBC_2_14) compat_symbol (libc, memmove, memcpy, GLIBC_2_2_5); +# endif # endif #endif diff --git a/sysdeps/x86_64/multiarch/memmove_chk-c.c b/sysdeps/x86_64/multiarch/memmove_chk-c.c new file mode 100644 index 0000000000..bbf53d00d3 --- /dev/null +++ b/sysdeps/x86_64/multiarch/memmove_chk-c.c @@ -0,0 +1 @@ +#include <debug/memmove_chk.c> diff --git a/sysdeps/x86_64/multiarch/memmove_chk.c b/sysdeps/x86_64/multiarch/memmove_chk.c index 17ed460324..c1b0b9304b 100644 --- a/sysdeps/x86_64/multiarch/memmove_chk.c +++ b/sysdeps/x86_64/multiarch/memmove_chk.c @@ -17,19 +17,21 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ -#include <string.h> -#include "init-arch.h" +#ifndef __CHKP__ +# include <string.h> +# include "init-arch.h" -#define MEMMOVE_CHK __memmove_chk_sse2 +# define MEMMOVE_CHK __memmove_chk_sse2 extern __typeof (__memmove_chk) __memmove_chk_sse2 attribute_hidden; extern __typeof (__memmove_chk) __memmove_chk_ssse3 attribute_hidden; extern __typeof (__memmove_chk) __memmove_chk_ssse3_back attribute_hidden; -#include "debug/memmove_chk.c" +# include "debug/memmove_chk.c" libc_ifunc (__memmove_chk, HAS_SSSE3 ? (HAS_FAST_COPY_BACKWARD ? __memmove_chk_ssse3_back : __memmove_chk_ssse3) : __memmove_chk_sse2); +#endif diff --git a/sysdeps/x86_64/multiarch/mempcpy-c.c b/sysdeps/x86_64/multiarch/mempcpy-c.c new file mode 100644 index 0000000000..522fb86e3e --- /dev/null +++ b/sysdeps/x86_64/multiarch/mempcpy-c.c @@ -0,0 +1,36 @@ +/* C-version of mempcpy for using when Intel MPX is enabled + in order to process with an array of pointers correctly. + Copyright (C) 2013 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <stddef.h> + +void * +mempcpy (void *dst, const void *src, size_t n) +{ + return memcpy(dst, src, n) + n; +} + +void * +chkp_mempcpy_nochk (void *dst, const void *src, size_t n) +{ + return chkp_memcpy_nochk(dst, src, n) + n; +} + +weak_alias (mempcpy, __GI_mempcpy) +weak_alias (mempcpy, __GI___mempcpy) +weak_alias (mempcpy, __mempcpy) diff --git a/sysdeps/x86_64/multiarch/mempcpy-ssse3-back-1.S b/sysdeps/x86_64/multiarch/mempcpy-ssse3-back-1.S new file mode 100644 index 0000000000..eb929f4182 --- /dev/null +++ b/sysdeps/x86_64/multiarch/mempcpy-ssse3-back-1.S @@ -0,0 +1,6 @@ +/* optimized version of mempcpy without any checks or copying bounds. */ +#define USE_AS_MEMPCPY +#define MEMCPY chkp_mempcpy_nobnd_nochk +#undef __CHKP__ +#undef __CHKWR__ +#include "memcpy-ssse3-back.S" diff --git a/sysdeps/x86_64/multiarch/mempcpy-ssse3-back.S b/sysdeps/x86_64/multiarch/mempcpy-ssse3-back.S index 82ffacb8fb..f32ecfc76e 100644 --- a/sysdeps/x86_64/multiarch/mempcpy-ssse3-back.S +++ b/sysdeps/x86_64/multiarch/mempcpy-ssse3-back.S @@ -1,4 +1,12 @@ #define USE_AS_MEMPCPY -#define MEMCPY __mempcpy_ssse3_back -#define MEMCPY_CHK __mempcpy_chk_ssse3_back + +#if defined __CHKP__ || defined __CHKWR__ +/* version of mempcpy with no copying of bounds support + if there are pointers in the source buffer. */ +# define MEMCPY chkp_mempcpy_nobnd +#else +# define MEMCPY __mempcpy_ssse3_back +#endif + +#define MEMCPY_CHK __mempcpy_chk_ssse3_back #include "memcpy-ssse3-back.S" diff --git a/sysdeps/x86_64/multiarch/mempcpy.S b/sysdeps/x86_64/multiarch/mempcpy.S index b8b7fcd121..4ec5825989 100644 --- a/sysdeps/x86_64/multiarch/mempcpy.S +++ b/sysdeps/x86_64/multiarch/mempcpy.S @@ -18,13 +18,14 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ -#include <sysdep.h> -#include <init-arch.h> +#if !defined __CHKP__ && !defined __CHKWR__ +# include <sysdep.h> +# include <init-arch.h> /* Define multiple versions only for the definition in lib and for DSO. In static binaries we need mempcpy before the initialization happened. */ -#if defined SHARED && !defined NOT_IN_libc +# if defined SHARED && !defined NOT_IN_libc ENTRY(__mempcpy) .type __mempcpy, @gnu_indirect_function cmpl $0, KIND_OFFSET+__cpu_features(%rip) @@ -40,38 +41,40 @@ ENTRY(__mempcpy) 2: ret END(__mempcpy) -# undef ENTRY -# define ENTRY(name) \ +# undef ENTRY +# define ENTRY(name) \ .type __mempcpy_sse2, @function; \ .p2align 4; \ .globl __mempcpy_sse2; \ .hidden __mempcpy_sse2; \ __mempcpy_sse2: cfi_startproc; \ CALL_MCOUNT -# undef END -# define END(name) \ +# undef END +# define END(name) \ cfi_endproc; .size __mempcpy_sse2, .-__mempcpy_sse2 -# undef ENTRY_CHK -# define ENTRY_CHK(name) \ +# undef ENTRY_CHK +# define ENTRY_CHK(name) \ .type __mempcpy_chk_sse2, @function; \ .globl __mempcpy_chk_sse2; \ .p2align 4; \ __mempcpy_chk_sse2: cfi_startproc; \ CALL_MCOUNT -# undef END_CHK -# define END_CHK(name) \ +# undef END_CHK +# define END_CHK(name) \ cfi_endproc; .size __mempcpy_chk_sse2, .-__mempcpy_chk_sse2 -# undef libc_hidden_def -# undef libc_hidden_builtin_def +# undef libc_hidden_def +# undef libc_hidden_builtin_def /* It doesn't make sense to send libc-internal mempcpy calls through a PLT. The speedup we get from using SSSE3 instruction is likely eaten away by the indirect call in the PLT. */ -# define libc_hidden_def(name) \ +# define libc_hidden_def(name) \ .globl __GI_mempcpy; __GI_mempcpy = __mempcpy_sse2 -# define libc_hidden_builtin_def(name) \ +# define libc_hidden_builtin_def(name) \ .globl __GI___mempcpy; __GI___mempcpy = __mempcpy_sse2 -#endif +# endif + +# include "../mempcpy.S" -#include "../mempcpy.S" +#endif diff --git a/sysdeps/x86_64/multiarch/mempcpy_chk-c.c b/sysdeps/x86_64/multiarch/mempcpy_chk-c.c new file mode 100644 index 0000000000..ba170784c3 --- /dev/null +++ b/sysdeps/x86_64/multiarch/mempcpy_chk-c.c @@ -0,0 +1 @@ +#include <debug/mempcpy_chk.c> diff --git a/sysdeps/x86_64/multiarch/mempcpy_chk.S b/sysdeps/x86_64/multiarch/mempcpy_chk.S index 3801db399b..98acf9691c 100644 --- a/sysdeps/x86_64/multiarch/mempcpy_chk.S +++ b/sysdeps/x86_64/multiarch/mempcpy_chk.S @@ -18,14 +18,15 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ -#include <sysdep.h> -#include <init-arch.h> +#if !defined __CHKP__ && !defined __CHKWR__ +# include <sysdep.h> +# include <init-arch.h> /* Define multiple versions only for the definition in lib and for DSO. There are no multiarch mempcpy functions for static binaries. */ -#ifndef NOT_IN_libc -# ifdef SHARED +# ifndef NOT_IN_libc +# ifdef SHARED .text ENTRY(__mempcpy_chk) .type __mempcpy_chk, @gnu_indirect_function @@ -41,7 +42,8 @@ ENTRY(__mempcpy_chk) leaq __mempcpy_chk_ssse3_back(%rip), %rax 2: ret END(__mempcpy_chk) -# else -# include "../mempcpy_chk.S" +# else +# include "../mempcpy_chk.S" +# endif # endif #endif diff --git a/sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S b/sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S index 028c6d3d74..a3535ad500 100644 --- a/sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S +++ b/sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S @@ -25,6 +25,14 @@ # define STRCAT __strcat_sse2_unaligned # endif +# ifdef __CHKP__ +# define RETURN \ + bndcu -1(%rdi, %rax), %bnd0; \ + ret +# else +# define RETURN ret +# endif + # define USE_AS_STRCAT .text @@ -37,6 +45,10 @@ ENTRY (STRCAT) /* Inline corresponding strlen file, temporary until new strcpy implementation gets merged. */ +# ifdef __CHKP__ + bndcl (%rdi), %bnd0 + bndcu (%rdi), %bnd0 +# endif xor %rax, %rax mov %edi, %ecx and $0x3f, %ecx @@ -67,84 +79,132 @@ L(align16_start): pxor %xmm1, %xmm1 pxor %xmm2, %xmm2 pxor %xmm3, %xmm3 +# ifdef __CHKP__ + bndcu 16(%rax), %bnd0 +# endif pcmpeqb 16(%rax), %xmm0 pmovmskb %xmm0, %edx test %edx, %edx jnz L(exit16) +# ifdef __CHKP__ + bndcu 32(%rax), %bnd0 +# endif pcmpeqb 32(%rax), %xmm1 pmovmskb %xmm1, %edx test %edx, %edx jnz L(exit32) +# ifdef __CHKP__ + bndcu 48(%rax), %bnd0 +# endif pcmpeqb 48(%rax), %xmm2 pmovmskb %xmm2, %edx test %edx, %edx jnz L(exit48) +# ifdef __CHKP__ + bndcu 64(%rax), %bnd0 +# endif pcmpeqb 64(%rax), %xmm3 pmovmskb %xmm3, %edx test %edx, %edx jnz L(exit64) +# ifdef __CHKP__ + bndcu 80(%rax), %bnd0 +# endif pcmpeqb 80(%rax), %xmm0 add $64, %rax pmovmskb %xmm0, %edx test %edx, %edx jnz L(exit16) +# ifdef __CHKP__ + bndcu 32(%rax), %bnd0 +# endif pcmpeqb 32(%rax), %xmm1 pmovmskb %xmm1, %edx test %edx, %edx jnz L(exit32) +# ifdef __CHKP__ + bndcu 48(%rax), %bnd0 +# endif pcmpeqb 48(%rax), %xmm2 pmovmskb %xmm2, %edx test %edx, %edx jnz L(exit48) +# ifdef __CHKP__ + bndcu 64(%rax), %bnd0 +# endif pcmpeqb 64(%rax), %xmm3 pmovmskb %xmm3, %edx test %edx, %edx jnz L(exit64) +# ifdef __CHKP__ + bndcu 80(%rax), %bnd0 +# endif pcmpeqb 80(%rax), %xmm0 add $64, %rax pmovmskb %xmm0, %edx test %edx, %edx jnz L(exit16) +# ifdef __CHKP__ + bndcu 32(%rax), %bnd0 +# endif pcmpeqb 32(%rax), %xmm1 pmovmskb %xmm1, %edx test %edx, %edx jnz L(exit32) +# ifdef __CHKP__ + bndcu 48(%rax), %bnd0 +# endif pcmpeqb 48(%rax), %xmm2 pmovmskb %xmm2, %edx test %edx, %edx jnz L(exit48) +# ifdef __CHKP__ + bndcu 64(%rax), %bnd0 +# endif pcmpeqb 64(%rax), %xmm3 pmovmskb %xmm3, %edx test %edx, %edx jnz L(exit64) +# ifdef __CHKP__ + bndcu 80(%rax), %bnd0 +# endif pcmpeqb 80(%rax), %xmm0 add $64, %rax pmovmskb %xmm0, %edx test %edx, %edx jnz L(exit16) +# ifdef __CHKP__ + bndcu 32(%rax), %bnd0 +# endif pcmpeqb 32(%rax), %xmm1 pmovmskb %xmm1, %edx test %edx, %edx jnz L(exit32) +# ifdef __CHKP__ + bndcu 48(%rax), %bnd0 +# endif pcmpeqb 48(%rax), %xmm2 pmovmskb %xmm2, %edx test %edx, %edx jnz L(exit48) +# ifdef __CHKP__ + bndcu 64(%rax), %bnd0 +# endif pcmpeqb 64(%rax), %xmm3 pmovmskb %xmm3, %edx test %edx, %edx @@ -153,6 +213,9 @@ L(align16_start): test $0x3f, %rax jz L(align64_loop) +# ifdef __CHKP__ + bndcu 80(%rax), %bnd0 +# endif pcmpeqb 80(%rax), %xmm0 add $80, %rax pmovmskb %xmm0, %edx @@ -162,6 +225,9 @@ L(align16_start): test $0x3f, %rax jz L(align64_loop) +# ifdef __CHKP__ + bndcu 16(%rax), %bnd0 +# endif pcmpeqb 16(%rax), %xmm1 add $16, %rax pmovmskb %xmm1, %edx @@ -171,6 +237,9 @@ L(align16_start): test $0x3f, %rax jz L(align64_loop) +# ifdef __CHKP__ + bndcu 16(%rax), %bnd0 +# endif pcmpeqb 16(%rax), %xmm2 add $16, %rax pmovmskb %xmm2, %edx @@ -180,6 +249,9 @@ L(align16_start): test $0x3f, %rax jz L(align64_loop) +# ifdef __CHKP__ + bndcu 16(%rax), %bnd0 +# endif pcmpeqb 16(%rax), %xmm3 add $16, %rax pmovmskb %xmm3, %edx @@ -187,8 +259,12 @@ L(align16_start): jnz L(exit) add $16, %rax + .p2align 4 L(align64_loop): +# ifdef __CHKP__ + bndcu (%rax), %bnd0 +# endif movaps (%rax), %xmm4 pminub 16(%rax), %xmm4 movaps 32(%rax), %xmm5 diff --git a/sysdeps/x86_64/multiarch/strchr.S b/sysdeps/x86_64/multiarch/strchr.S index f170238b55..4311e8689c 100644 --- a/sysdeps/x86_64/multiarch/strchr.S +++ b/sysdeps/x86_64/multiarch/strchr.S @@ -91,6 +91,10 @@ __strchr_sse42: CALL_MCOUNT testb %sil, %sil je __strend_sse4 +# ifdef __CHKP__ + bndcl (%rdi), %bnd0 + bndcu (%rdi), %bnd0 +# endif pxor %xmm2, %xmm2 movd %esi, %xmm1 movl %edi, %ecx @@ -124,6 +128,9 @@ __strchr_sse42: ja L(return_null) L(unaligned_match): addq %rdi, %rax +# ifdef __CHKP__ + bndcu (%rax), %bnd0 +# endif ret .p2align 4 @@ -135,15 +142,27 @@ L(unaligned_no_match): L(loop): addq $16, %r8 L(aligned_start): +# ifdef __CHKP__ + bndcu (%r8), %bnd0 +# endif pcmpistri $0x2, (%r8), %xmm1 jbe L(wrap) addq $16, %r8 +# ifdef __CHKP__ + bndcu (%r8), %bnd0 +# endif pcmpistri $0x2, (%r8), %xmm1 jbe L(wrap) addq $16, %r8 +# ifdef __CHKP__ + bndcu (%r8), %bnd0 +# endif pcmpistri $0x2, (%r8), %xmm1 jbe L(wrap) addq $16, %r8 +# ifdef __CHKP__ + bndcu (%r8), %bnd0 +# endif pcmpistri $0x2, (%r8), %xmm1 jbe L(wrap) jmp L(loop) @@ -159,6 +178,9 @@ L(return_null): .p2align 4 L(loop_exit): leaq (%r8,%rcx), %rax +# ifdef __CHKP__ + bndcu (%rax), %bnd0 +# endif ret cfi_endproc .size __strchr_sse42, .-__strchr_sse42 diff --git a/sysdeps/x86_64/multiarch/strcmp-sse42.S b/sysdeps/x86_64/multiarch/strcmp-sse42.S index c84f1c2b31..edfa915707 100644 --- a/sysdeps/x86_64/multiarch/strcmp-sse42.S +++ b/sysdeps/x86_64/multiarch/strcmp-sse42.S @@ -127,6 +127,14 @@ STRCMP_SSE42: je LABEL(Byte0) mov %rdx, %r11 #endif + +#ifdef __CHKP__ + bndcl (%rdi), %bnd0 + bndcu (%rdi), %bnd0 + bndcl (%rsi), %bnd1 + bndcu (%rsi), %bnd1 +#endif + mov %esi, %ecx mov %edi, %eax /* Use 64bit AND here to avoid long NOP padding. */ @@ -210,6 +218,10 @@ LABEL(touppermask): #endif add $16, %rsi /* prepare to search next 16 bytes */ add $16, %rdi /* prepare to search next 16 bytes */ +#ifdef __CHKP__ + bndcu (%rdi), %bnd0 + bndcu (%rsi), %bnd1 +#endif /* * Determine source and destination string offsets from 16-byte @@ -231,6 +243,11 @@ LABEL(crosscache): mov %edx, %r8d /* r8d is offset flag for exit tail */ xchg %ecx, %eax xchg %rsi, %rdi +#ifdef __CHKP__ + bndmov %bnd0, %bnd2 + bndmov %bnd1, %bnd0 + bndmov %bnd2, %bnd1 +#endif LABEL(bigger): movdqa (%rdi), %xmm2 movdqa (%rsi), %xmm1 @@ -280,6 +297,10 @@ LABEL(ashr_0): mov %rcx, %rdx /* only for offset of sse4 instruction loop*/ .p2align 4 LABEL(ashr_0_use): +#ifdef __CHKP__ + bndcu -1(%rdi, %rdx), %bnd0 + bndcu -1(%rsi, %rdx), %bnd1 +#endif movdqa (%rdi,%rdx), %xmm0 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L pcmpistri $0x1a,(%rsi,%rdx), %xmm0 @@ -295,6 +316,10 @@ LABEL(ashr_0_use): jbe LABEL(strcmp_exitz) #endif +#ifdef __CHKP__ + bndcu -1(%rdi, %rdx), %bnd0 + bndcu -1(%rsi, %rdx), %bnd1 +#endif movdqa (%rdi,%rdx), %xmm0 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L pcmpistri $0x1a,(%rsi,%rdx), %xmm0 @@ -320,6 +345,10 @@ LABEL(ashr_0_exit_use): jbe LABEL(strcmp_exitz) #endif lea -16(%rdx, %rcx), %rcx +#ifdef __CHKP__ + bndcu -1(%rdi, %rcx), %bnd0 + bndcu -1(%rsi, %rcx), %bnd1 +#endif movzbl (%rdi, %rcx), %eax movzbl (%rsi, %rcx), %edx #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L @@ -362,6 +391,15 @@ LABEL(ashr_1): and $0xfff, %r10 /* offset into 4K page */ sub $0x1000, %r10 /* subtract 4K pagesize */ mov %rcx, %rdx /* only for offset of sse4 instruction loop*/ +#ifdef __CHKP__ + bndcu -16(%rdi, %rdx), %bnd0 + bndcu -16(%rsi, %rdx), %bnd1 + jmp LABEL(loop_ashr_1_use) +LABEL(ashr_1_check): + bndcu (%rdi, %rdx), %bnd0 + bndcu (%rsi, %rdx), %bnd1 + jmp LABEL(nibble_ashr_1_restart_use) +#endif .p2align 4 LABEL(loop_ashr_1_use): @@ -416,7 +454,11 @@ LABEL(nibble_ashr_1_use): jae LABEL(nibble_ashr_exit_use) #endif cmp $14, %ecx +#ifdef __CHKP__ + ja LABEL(ashr_1_check) +#else ja LABEL(nibble_ashr_1_restart_use) +#endif jmp LABEL(nibble_ashr_exit_use) @@ -450,6 +492,15 @@ LABEL(ashr_2): and $0xfff, %r10 /* offset into 4K page */ sub $0x1000, %r10 /* subtract 4K pagesize */ mov %rcx, %rdx /* only for offset of sse4 instruction loop*/ +#ifdef __CHKP__ + bndcu -16(%rdi, %rdx), %bnd0 + bndcu -16(%rsi, %rdx), %bnd1 + jmp LABEL(loop_ashr_2_use) +LABEL(ashr_2_check): + bndcu (%rdi, %rdx), %bnd0 + bndcu (%rsi, %rdx), %bnd1 + jmp LABEL(nibble_ashr_2_restart_use) +#endif .p2align 4 LABEL(loop_ashr_2_use): @@ -504,7 +555,11 @@ LABEL(nibble_ashr_2_use): jae LABEL(nibble_ashr_exit_use) #endif cmp $13, %ecx +#ifdef __CHKP__ + ja LABEL(ashr_2_check) +#else ja LABEL(nibble_ashr_2_restart_use) +#endif jmp LABEL(nibble_ashr_exit_use) @@ -539,6 +594,15 @@ LABEL(ashr_3): and $0xfff, %r10 /* offset into 4K page */ sub $0x1000, %r10 /* subtract 4K pagesize */ mov %rcx, %rdx /* only for offset of sse4 instruction loop*/ +#ifdef __CHKP__ + bndcu -16(%rdi, %rdx), %bnd0 + bndcu -16(%rsi, %rdx), %bnd1 + jmp LABEL(loop_ashr_3_use) +LABEL(ashr_3_check): + bndcu (%rdi, %rdx), %bnd0 + bndcu (%rsi, %rdx), %bnd1 + jmp LABEL(nibble_ashr_3_restart_use) +#endif LABEL(loop_ashr_3_use): add $16, %r10 @@ -592,7 +656,11 @@ LABEL(nibble_ashr_3_use): jae LABEL(nibble_ashr_exit_use) #endif cmp $12, %ecx +#ifdef __CHKP__ + ja LABEL(ashr_3_check) +#else ja LABEL(nibble_ashr_3_restart_use) +#endif jmp LABEL(nibble_ashr_exit_use) @@ -627,6 +695,15 @@ LABEL(ashr_4): and $0xfff, %r10 /* offset into 4K page */ sub $0x1000, %r10 /* subtract 4K pagesize */ mov %rcx, %rdx /* only for offset of sse4 instruction loop*/ +#ifdef __CHKP__ + bndcu -16(%rdi, %rdx), %bnd0 + bndcu -16(%rsi, %rdx), %bnd1 + jmp LABEL(loop_ashr_4_use) +LABEL(ashr_4_check): + bndcu (%rdi, %rdx), %bnd0 + bndcu (%rsi, %rdx), %bnd1 + jmp LABEL(nibble_ashr_4_restart_use) +#endif .p2align 4 LABEL(loop_ashr_4_use): @@ -681,7 +758,11 @@ LABEL(nibble_ashr_4_use): jae LABEL(nibble_ashr_exit_use) #endif cmp $11, %ecx +#ifdef __CHKP__ + ja LABEL(ashr_4_check) +#else ja LABEL(nibble_ashr_4_restart_use) +#endif jmp LABEL(nibble_ashr_exit_use) @@ -716,6 +797,15 @@ LABEL(ashr_5): and $0xfff, %r10 /* offset into 4K page */ sub $0x1000, %r10 /* subtract 4K pagesize */ mov %rcx, %rdx /* only for offset of sse4 instruction loop*/ +#ifdef __CHKP__ + bndcu -16(%rdi, %rdx), %bnd0 + bndcu -16(%rsi, %rdx), %bnd1 + jmp LABEL(loop_ashr_5_use) +LABEL(ashr_5_check): + bndcu (%rdi, %rdx), %bnd0 + bndcu (%rsi, %rdx), %bnd1 + jmp LABEL(nibble_ashr_5_restart_use) +#endif .p2align 4 LABEL(loop_ashr_5_use): @@ -771,7 +861,11 @@ LABEL(nibble_ashr_5_use): jae LABEL(nibble_ashr_exit_use) #endif cmp $10, %ecx +#ifdef __CHKP__ + ja LABEL(ashr_5_check) +#else ja LABEL(nibble_ashr_5_restart_use) +#endif jmp LABEL(nibble_ashr_exit_use) @@ -806,6 +900,15 @@ LABEL(ashr_6): and $0xfff, %r10 /* offset into 4K page */ sub $0x1000, %r10 /* subtract 4K pagesize */ mov %rcx, %rdx /* only for offset of sse4 instruction loop*/ +#ifdef __CHKP__ + bndcu -16(%rdi, %rdx), %bnd0 + bndcu -16(%rsi, %rdx), %bnd1 + jmp LABEL(loop_ashr_6_use) +LABEL(ashr_6_check): + bndcu (%rdi, %rdx), %bnd0 + bndcu (%rsi, %rdx), %bnd1 + jmp LABEL(nibble_ashr_6_restart_use) +#endif .p2align 4 LABEL(loop_ashr_6_use): @@ -860,7 +963,11 @@ LABEL(nibble_ashr_6_use): jae LABEL(nibble_ashr_exit_use) #endif cmp $9, %ecx +#ifdef __CHKP__ + ja LABEL(ashr_6_check) +#else ja LABEL(nibble_ashr_6_restart_use) +#endif jmp LABEL(nibble_ashr_exit_use) @@ -895,6 +1002,15 @@ LABEL(ashr_7): and $0xfff, %r10 /* offset into 4K page */ sub $0x1000, %r10 /* subtract 4K pagesize */ mov %rcx, %rdx /* only for offset of sse4 instruction loop*/ +#ifdef __CHKP__ + bndcu -16(%rdi, %rdx), %bnd0 + bndcu -16(%rsi, %rdx), %bnd1 + jmp LABEL(loop_ashr_7_use) +LABEL(ashr_7_check): + bndcu (%rdi, %rdx), %bnd0 + bndcu (%rsi, %rdx), %bnd1 + jmp LABEL(nibble_ashr_7_restart_use) +#endif .p2align 4 LABEL(loop_ashr_7_use): @@ -949,7 +1065,11 @@ LABEL(nibble_ashr_7_use): jae LABEL(nibble_ashr_exit_use) #endif cmp $8, %ecx +#ifdef __CHKP__ + ja LABEL(ashr_7_check) +#else ja LABEL(nibble_ashr_7_restart_use) +#endif jmp LABEL(nibble_ashr_exit_use) @@ -984,6 +1104,15 @@ LABEL(ashr_8): and $0xfff, %r10 /* offset into 4K page */ sub $0x1000, %r10 /* subtract 4K pagesize */ mov %rcx, %rdx /* only for offset of sse4 instruction loop*/ +#ifdef __CHKP__ + bndcu -16(%rdi, %rdx), %bnd0 + bndcu -16(%rsi, %rdx), %bnd1 + jmp LABEL(loop_ashr_8_use) +LABEL(ashr_8_check): + bndcu (%rdi, %rdx), %bnd0 + bndcu (%rsi, %rdx), %bnd1 + jmp LABEL(nibble_ashr_8_restart_use) +#endif .p2align 4 LABEL(loop_ashr_8_use): @@ -1038,7 +1167,11 @@ LABEL(nibble_ashr_8_use): jae LABEL(nibble_ashr_exit_use) #endif cmp $7, %ecx +#ifdef __CHKP__ + ja LABEL(ashr_8_check) +#else ja LABEL(nibble_ashr_8_restart_use) +#endif jmp LABEL(nibble_ashr_exit_use) @@ -1073,6 +1206,15 @@ LABEL(ashr_9): and $0xfff, %r10 /* offset into 4K page */ sub $0x1000, %r10 /* subtract 4K pagesize */ mov %rcx, %rdx /* only for offset of sse4 instruction loop*/ +#ifdef __CHKP__ + bndcu -16(%rdi, %rdx), %bnd0 + bndcu -16(%rsi, %rdx), %bnd1 + jmp LABEL(loop_ashr_9_use) +LABEL(ashr_9_check): + bndcu (%rdi, %rdx), %bnd0 + bndcu (%rsi, %rdx), %bnd1 + jmp LABEL(nibble_ashr_9_restart_use) +#endif .p2align 4 LABEL(loop_ashr_9_use): @@ -1128,7 +1270,11 @@ LABEL(nibble_ashr_9_use): jae LABEL(nibble_ashr_exit_use) #endif cmp $6, %ecx +#ifdef __CHKP__ + ja LABEL(ashr_9_check) +#else ja LABEL(nibble_ashr_9_restart_use) +#endif jmp LABEL(nibble_ashr_exit_use) @@ -1163,6 +1309,15 @@ LABEL(ashr_10): and $0xfff, %r10 /* offset into 4K page */ sub $0x1000, %r10 /* subtract 4K pagesize */ mov %rcx, %rdx /* only for offset of sse4 instruction loop*/ +#ifdef __CHKP__ + bndcu -16(%rdi, %rdx), %bnd0 + bndcu -16(%rsi, %rdx), %bnd1 + jmp LABEL(loop_ashr_10_use) +LABEL(ashr_10_check): + bndcu (%rdi, %rdx), %bnd0 + bndcu (%rsi, %rdx), %bnd1 + jmp LABEL(nibble_ashr_10_restart_use) +#endif .p2align 4 LABEL(loop_ashr_10_use): @@ -1217,7 +1372,11 @@ LABEL(nibble_ashr_10_use): jae LABEL(nibble_ashr_exit_use) #endif cmp $5, %ecx +#ifdef __CHKP__ + ja LABEL(ashr_10_check) +#else ja LABEL(nibble_ashr_10_restart_use) +#endif jmp LABEL(nibble_ashr_exit_use) @@ -1252,6 +1411,15 @@ LABEL(ashr_11): and $0xfff, %r10 /* offset into 4K page */ sub $0x1000, %r10 /* subtract 4K pagesize */ mov %rcx, %rdx /* only for offset of sse4 instruction loop*/ +#ifdef __CHKP__ + bndcu -16(%rdi, %rdx), %bnd0 + bndcu -16(%rsi, %rdx), %bnd1 + jmp LABEL(loop_ashr_11_use) +LABEL(ashr_11_check): + bndcu (%rdi, %rdx), %bnd0 + bndcu (%rsi, %rdx), %bnd1 + jmp LABEL(nibble_ashr_11_restart_use) +#endif .p2align 4 LABEL(loop_ashr_11_use): @@ -1306,7 +1474,11 @@ LABEL(nibble_ashr_11_use): jae LABEL(nibble_ashr_exit_use) #endif cmp $4, %ecx +#ifdef __CHKP__ + ja LABEL(ashr_11_check) +#else ja LABEL(nibble_ashr_11_restart_use) +#endif jmp LABEL(nibble_ashr_exit_use) @@ -1341,6 +1513,15 @@ LABEL(ashr_12): and $0xfff, %r10 /* offset into 4K page */ sub $0x1000, %r10 /* subtract 4K pagesize */ mov %rcx, %rdx /* only for offset of sse4 instruction loop*/ +#ifdef __CHKP__ + bndcu -16(%rdi, %rdx), %bnd0 + bndcu -16(%rsi, %rdx), %bnd1 + jmp LABEL(loop_ashr_12_use) +LABEL(ashr_12_check): + bndcu (%rdi, %rdx), %bnd0 + bndcu (%rsi, %rdx), %bnd1 + jmp LABEL(nibble_ashr_12_restart_use) +#endif .p2align 4 LABEL(loop_ashr_12_use): @@ -1395,7 +1576,11 @@ LABEL(nibble_ashr_12_use): jae LABEL(nibble_ashr_exit_use) #endif cmp $3, %ecx +#ifdef __CHKP__ + ja LABEL(ashr_12_check) +#else ja LABEL(nibble_ashr_12_restart_use) +#endif jmp LABEL(nibble_ashr_exit_use) @@ -1431,6 +1616,15 @@ LABEL(ashr_13): sub $0x1000, %r10 /* subtract 4K pagesize */ mov %rcx, %rdx /* only for offset of sse4 instruction loop*/ +#ifdef __CHKP__ + bndcu -16(%rdi, %rdx), %bnd0 + bndcu -16(%rsi, %rdx), %bnd1 + jmp LABEL(loop_ashr_13_use) +LABEL(ashr_13_check): + bndcu (%rdi, %rdx), %bnd0 + bndcu (%rsi, %rdx), %bnd1 + jmp LABEL(nibble_ashr_13_restart_use) +#endif .p2align 4 LABEL(loop_ashr_13_use): @@ -1485,7 +1679,11 @@ LABEL(nibble_ashr_13_use): jae LABEL(nibble_ashr_exit_use) #endif cmp $2, %ecx +#ifdef __CHKP__ + ja LABEL(ashr_13_check) +#else ja LABEL(nibble_ashr_13_restart_use) +#endif jmp LABEL(nibble_ashr_exit_use) @@ -1521,6 +1719,15 @@ LABEL(ashr_14): sub $0x1000, %r10 /* subtract 4K pagesize */ mov %rcx, %rdx /* only for offset of sse4 instruction loop*/ +#ifdef __CHKP__ + bndcu -16(%rdi, %rdx), %bnd0 + bndcu -16(%rsi, %rdx), %bnd1 + jmp LABEL(loop_ashr_14_use) +LABEL(ashr_14_check): + bndcu (%rdi, %rdx), %bnd0 + bndcu (%rsi, %rdx), %bnd1 + jmp LABEL(nibble_ashr_14_restart_use) +#endif .p2align 4 LABEL(loop_ashr_14_use): @@ -1575,7 +1782,11 @@ LABEL(nibble_ashr_14_use): jae LABEL(nibble_ashr_exit_use) #endif cmp $1, %ecx +#ifdef __CHKP__ + ja LABEL(ashr_14_check) +#else ja LABEL(nibble_ashr_14_restart_use) +#endif jmp LABEL(nibble_ashr_exit_use) @@ -1613,6 +1824,15 @@ LABEL(ashr_15): sub $0x1000, %r10 /* subtract 4K pagesize */ mov %rcx, %rdx /* only for offset of sse4 instruction loop*/ +#ifdef __CHKP__ + bndcu -16(%rdi, %rdx), %bnd0 + bndcu -16(%rsi, %rdx), %bnd1 + jmp LABEL(loop_ashr_15_use) +LABEL(ashr_15_check): + bndcu (%rdi, %rdx), %bnd0 + bndcu (%rsi, %rdx), %bnd1 + jmp LABEL(nibble_ashr_15_restart_use) +#endif .p2align 4 LABEL(loop_ashr_15_use): @@ -1667,7 +1887,11 @@ LABEL(nibble_ashr_15_use): jae LABEL(nibble_ashr_exit_use) #endif cmp $0, %ecx +#ifdef __CHKP__ + ja LABEL(ashr_15_check) +#else ja LABEL(nibble_ashr_15_restart_use) +#endif LABEL(nibble_ashr_exit_use): #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L @@ -1691,6 +1915,11 @@ LABEL(exit_use): test %r8d, %r8d jz LABEL(ret_use) xchg %eax, %edx +#ifdef __CHKP__ + bndmov %bnd0, %bnd2 + bndmov %bnd1, %bnd0 + bndmov %bnd2, %bnd1 +#endif LABEL(ret_use): #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L leaq _nl_C_LC_CTYPE_tolower+128*4(%rip), %rcx @@ -1707,6 +1936,11 @@ LABEL(less32bytes): test %r8d, %r8d jz LABEL(ret) xchg %rsi, %rdi /* recover original order according to flag(%r8d) */ +#ifdef __CHKP__ + bndmov %bnd0, %bnd2 + bndmov %bnd1, %bnd0 + bndmov %bnd2, %bnd1 +#endif .p2align 4 LABEL(ret): @@ -1717,6 +1951,10 @@ LABEL(less16bytes): sub %rdx, %r11 jbe LABEL(strcmp_exitz) #endif +#ifdef __CHKP__ + bndcu (%rdi, %rdx), %bnd0 + bndcu (%rsi, %rdx), %bnd1 +#endif movzbl (%rsi, %rdx), %ecx movzbl (%rdi, %rdx), %eax diff --git a/sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S b/sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S index 7710173c68..e6baee92db 100644 --- a/sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S +++ b/sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S @@ -33,7 +33,7 @@ lea TABLE(%rip), %r11; \ movslq (%r11, INDEX, SCALE), %rcx; \ lea (%r11, %rcx), %rcx; \ - jmp *%rcx + jmp *%rcx # ifndef USE_AS_STRCAT @@ -51,6 +51,16 @@ ENTRY (STRCPY) # endif +# ifdef __CHKP__ + bndcl (%rdi), %bnd0 + bndcu (%rdi), %bnd0 + bndcl (%rsi), %bnd1 + bndcu (%rsi), %bnd1 +# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT + bndcu -1(%rdi, %rdx), %bnd0 +# endif +# endif + and $63, %rcx cmp $32, %rcx jbe L(SourceStringAlignmentLess32) @@ -79,6 +89,9 @@ ENTRY (STRCPY) test %rdx, %rdx jnz L(CopyFrom1To16BytesTail) +# ifdef __CHKP__ + bndcu 16(%rsi), %bnd1 +# endif pcmpeqb 16(%rsi), %xmm0 pmovmskb %xmm0, %rdx @@ -91,6 +104,9 @@ ENTRY (STRCPY) jnz L(CopyFrom1To32Bytes) movdqu (%rsi, %rcx), %xmm1 /* copy 16 bytes */ +# ifdef __CHKP__ + bndcu 15(%rdi), %bnd0 +# endif movdqu %xmm1, (%rdi) /* If source address alignment != destination address alignment */ @@ -101,6 +117,10 @@ L(Unalign16Both): add %rcx, %r8 # endif mov $16, %rcx +# ifdef __CHKP__ + bndcu 16(%rsi, %rcx), %bnd1 + bndcu 15(%rdi, %rcx), %bnd0 +# endif movdqa (%rsi, %rcx), %xmm1 movaps 16(%rsi, %rcx), %xmm2 movdqu %xmm1, (%rdi, %rcx) @@ -118,6 +138,10 @@ L(Unalign16Both): jnz L(CopyFrom1To16Bytes) # endif +# ifdef __CHKP__ + bndcu 16(%rsi, %rcx), %bnd1 + bndcu 15(%rdi, %rcx), %bnd0 +# endif movaps 16(%rsi, %rcx), %xmm3 movdqu %xmm2, (%rdi, %rcx) pcmpeqb %xmm3, %xmm0 @@ -134,6 +158,10 @@ L(Unalign16Both): jnz L(CopyFrom1To16Bytes) # endif +# ifdef __CHKP__ + bndcu 16(%rsi, %rcx), %bnd1 + bndcu 15(%rdi, %rcx), %bnd0 +# endif movaps 16(%rsi, %rcx), %xmm4 movdqu %xmm3, (%rdi, %rcx) pcmpeqb %xmm4, %xmm0 @@ -150,6 +178,10 @@ L(Unalign16Both): jnz L(CopyFrom1To16Bytes) # endif +# ifdef __CHKP__ + bndcu 16(%rsi, %rcx), %bnd1 + bndcu 15(%rdi, %rcx), %bnd0 +# endif movaps 16(%rsi, %rcx), %xmm1 movdqu %xmm4, (%rdi, %rcx) pcmpeqb %xmm1, %xmm0 @@ -166,6 +198,10 @@ L(Unalign16Both): jnz L(CopyFrom1To16Bytes) # endif +# ifdef __CHKP__ + bndcu 16(%rsi, %rcx), %bnd1 + bndcu 15(%rdi, %rcx), %bnd0 +# endif movaps 16(%rsi, %rcx), %xmm2 movdqu %xmm1, (%rdi, %rcx) pcmpeqb %xmm2, %xmm0 @@ -182,6 +218,10 @@ L(Unalign16Both): jnz L(CopyFrom1To16Bytes) # endif +# ifdef __CHKP__ + bndcu 16(%rsi, %rcx), %bnd1 + bndcu 15(%rdi, %rcx), %bnd0 +# endif movaps 16(%rsi, %rcx), %xmm3 movdqu %xmm2, (%rdi, %rcx) pcmpeqb %xmm3, %xmm0 @@ -198,6 +238,10 @@ L(Unalign16Both): jnz L(CopyFrom1To16Bytes) # endif +# ifdef __CHKP__ + bndcu 16(%rsi, %rcx), %bnd1 + bndcu 15(%rdi, %rcx), %bnd0 +# endif movdqu %xmm3, (%rdi, %rcx) mov %rsi, %rdx lea 16(%rsi, %rcx), %rsi @@ -208,6 +252,9 @@ L(Unalign16Both): lea 128(%r8, %rdx), %r8 # endif L(Unaligned64Loop): +# ifdef __CHKP__ + bndcu 48(%rsi), %bnd1 +# endif movaps (%rsi), %xmm2 movaps %xmm2, %xmm4 movaps 16(%rsi), %xmm5 @@ -229,6 +276,10 @@ L(Unaligned64Loop): L(Unaligned64Loop_start): add $64, %rdi add $64, %rsi +# ifdef __CHKP__ + bndcu (%rsi), %bnd1 + bndcu (%rdi), %bnd0 +# endif movdqu %xmm4, -64(%rdi) movaps (%rsi), %xmm2 movdqa %xmm2, %xmm4 @@ -271,16 +322,28 @@ L(Unaligned64Leave): jnz L(CopyFrom1To16BytesUnaligned_32) bsf %rcx, %rdx +# ifdef __CHKP__ + bndcu 47(%rdi), %bnd0 +# endif movdqu %xmm4, (%rdi) movdqu %xmm5, 16(%rdi) movdqu %xmm6, 32(%rdi) # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT # ifdef USE_AS_STPCPY +# ifdef __CHKP__ + bndcu 48(%rdi, %rdx), %bnd0 +# endif lea 48(%rdi, %rdx), %rax # endif +# ifdef __CHKP__ + bndcu 63(%rdi), %bnd0 +# endif movdqu %xmm7, 48(%rdi) add $15, %r8 sub %rdx, %r8 +# ifdef __CHKP__ + bndcu 49(%rdi, %rdx), %bnd0 +# endif lea 49(%rdi, %rdx), %rdi jmp L(StrncpyFillTailWithZero) # else @@ -309,6 +372,10 @@ L(SourceStringAlignmentLess32): test %rdx, %rdx jnz L(CopyFrom1To16BytesTail1) +# ifdef __CHKP__ + bndcu 16(%rsi), %bnd1 + bndcu 15(%rdi), %bnd0 +# endif pcmpeqb %xmm2, %xmm0 movdqu %xmm1, (%rdi) pmovmskb %xmm0, %rdx @@ -372,6 +439,9 @@ L(CopyFrom1To16BytesUnaligned_0): # ifdef USE_AS_STPCPY lea (%rdi, %rdx), %rax # endif +# ifdef __CHKP__ + bndcu 15(%rdi), %bnd0 +# endif movdqu %xmm4, (%rdi) add $63, %r8 sub %rdx, %r8 @@ -384,6 +454,9 @@ L(CopyFrom1To16BytesUnaligned_0): .p2align 4 L(CopyFrom1To16BytesUnaligned_16): bsf %rcx, %rdx +# ifdef __CHKP__ + bndcu 31(%rdi), %bnd0 +# endif movdqu %xmm4, (%rdi) # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT # ifdef USE_AS_STPCPY @@ -403,6 +476,9 @@ L(CopyFrom1To16BytesUnaligned_16): .p2align 4 L(CopyFrom1To16BytesUnaligned_32): bsf %rdx, %rdx +# ifdef __CHKP__ + bndcu 47(%rdi), %bnd0 +# endif movdqu %xmm4, (%rdi) movdqu %xmm5, 16(%rdi) # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT @@ -529,6 +605,9 @@ L(CopyFrom1To16BytesTail1Case2OrCase3): .p2align 4 L(Exit1): +# ifdef __CHKP__ + bndcu (%rdi), %bnd0 +# endif mov %dh, (%rdi) # ifdef USE_AS_STPCPY lea (%rdi), %rax @@ -543,6 +622,9 @@ L(Exit1): .p2align 4 L(Exit2): mov (%rsi), %dx +# ifdef __CHKP__ + bndcu 1(%rdi), %bnd0 +# endif mov %dx, (%rdi) # ifdef USE_AS_STPCPY lea 1(%rdi), %rax @@ -557,6 +639,9 @@ L(Exit2): .p2align 4 L(Exit3): mov (%rsi), %cx +# ifdef __CHKP__ + bndcu 2(%rdi), %bnd0 +# endif mov %cx, (%rdi) mov %dh, 2(%rdi) # ifdef USE_AS_STPCPY @@ -572,6 +657,9 @@ L(Exit3): .p2align 4 L(Exit4): mov (%rsi), %edx +# ifdef __CHKP__ + bndcu 3(%rdi), %bnd0 +# endif mov %edx, (%rdi) # ifdef USE_AS_STPCPY lea 3(%rdi), %rax @@ -586,6 +674,9 @@ L(Exit4): .p2align 4 L(Exit5): mov (%rsi), %ecx +# ifdef __CHKP__ + bndcu 4(%rdi), %bnd0 +# endif mov %dh, 4(%rdi) mov %ecx, (%rdi) # ifdef USE_AS_STPCPY @@ -602,6 +693,9 @@ L(Exit5): L(Exit6): mov (%rsi), %ecx mov 4(%rsi), %dx +# ifdef __CHKP__ + bndcu 5(%rdi), %bnd0 +# endif mov %ecx, (%rdi) mov %dx, 4(%rdi) # ifdef USE_AS_STPCPY @@ -618,6 +712,9 @@ L(Exit6): L(Exit7): mov (%rsi), %ecx mov 3(%rsi), %edx +# ifdef __CHKP__ + bndcu 6(%rdi), %bnd0 +# endif mov %ecx, (%rdi) mov %edx, 3(%rdi) # ifdef USE_AS_STPCPY @@ -633,6 +730,9 @@ L(Exit7): .p2align 4 L(Exit8): mov (%rsi), %rdx +# ifdef __CHKP__ + bndcu 7(%rdi), %bnd0 +# endif mov %rdx, (%rdi) # ifdef USE_AS_STPCPY lea 7(%rdi), %rax @@ -647,6 +747,9 @@ L(Exit8): .p2align 4 L(Exit9): mov (%rsi), %rcx +# ifdef __CHKP__ + bndcu 8(%rdi), %bnd0 +# endif mov %dh, 8(%rdi) mov %rcx, (%rdi) # ifdef USE_AS_STPCPY @@ -663,6 +766,9 @@ L(Exit9): L(Exit10): mov (%rsi), %rcx mov 8(%rsi), %dx +# ifdef __CHKP__ + bndcu 9(%rdi), %bnd0 +# endif mov %rcx, (%rdi) mov %dx, 8(%rdi) # ifdef USE_AS_STPCPY @@ -679,6 +785,9 @@ L(Exit10): L(Exit11): mov (%rsi), %rcx mov 7(%rsi), %edx +# ifdef __CHKP__ + bndcu 10(%rdi), %bnd0 +# endif mov %rcx, (%rdi) mov %edx, 7(%rdi) # ifdef USE_AS_STPCPY @@ -695,6 +804,9 @@ L(Exit11): L(Exit12): mov (%rsi), %rcx mov 8(%rsi), %edx +# ifdef __CHKP__ + bndcu 11(%rdi), %bnd0 +# endif mov %rcx, (%rdi) mov %edx, 8(%rdi) # ifdef USE_AS_STPCPY @@ -711,6 +823,9 @@ L(Exit12): L(Exit13): mov (%rsi), %rcx mov 5(%rsi), %rdx +# ifdef __CHKP__ + bndcu 12(%rdi), %bnd0 +# endif mov %rcx, (%rdi) mov %rdx, 5(%rdi) # ifdef USE_AS_STPCPY @@ -727,6 +842,9 @@ L(Exit13): L(Exit14): mov (%rsi), %rcx mov 6(%rsi), %rdx +# ifdef __CHKP__ + bndcu 13(%rdi), %bnd0 +# endif mov %rcx, (%rdi) mov %rdx, 6(%rdi) # ifdef USE_AS_STPCPY @@ -743,6 +861,9 @@ L(Exit14): L(Exit15): mov (%rsi), %rcx mov 7(%rsi), %rdx +# ifdef __CHKP__ + bndcu 14(%rdi), %bnd0 +# endif mov %rcx, (%rdi) mov %rdx, 7(%rdi) # ifdef USE_AS_STPCPY @@ -758,6 +879,9 @@ L(Exit15): .p2align 4 L(Exit16): movdqu (%rsi), %xmm0 +# ifdef __CHKP__ + bndcu 15(%rdi), %bnd0 +# endif movdqu %xmm0, (%rdi) # ifdef USE_AS_STPCPY lea 15(%rdi), %rax @@ -772,6 +896,9 @@ L(Exit16): .p2align 4 L(Exit17): movdqu (%rsi), %xmm0 +# ifdef __CHKP__ + bndcu 16(%rdi), %bnd0 +# endif movdqu %xmm0, (%rdi) mov %dh, 16(%rdi) # ifdef USE_AS_STPCPY @@ -788,6 +915,9 @@ L(Exit17): L(Exit18): movdqu (%rsi), %xmm0 mov 16(%rsi), %cx +# ifdef __CHKP__ + bndcu 17(%rdi), %bnd0 +# endif movdqu %xmm0, (%rdi) mov %cx, 16(%rdi) # ifdef USE_AS_STPCPY @@ -804,6 +934,9 @@ L(Exit18): L(Exit19): movdqu (%rsi), %xmm0 mov 15(%rsi), %ecx +# ifdef __CHKP__ + bndcu 18(%rdi), %bnd0 +# endif movdqu %xmm0, (%rdi) mov %ecx, 15(%rdi) # ifdef USE_AS_STPCPY @@ -820,6 +953,9 @@ L(Exit19): L(Exit20): movdqu (%rsi), %xmm0 mov 16(%rsi), %ecx +# ifdef __CHKP__ + bndcu 19(%rdi), %bnd0 +# endif movdqu %xmm0, (%rdi) mov %ecx, 16(%rdi) # ifdef USE_AS_STPCPY @@ -836,6 +972,9 @@ L(Exit20): L(Exit21): movdqu (%rsi), %xmm0 mov 16(%rsi), %ecx +# ifdef __CHKP__ + bndcu 20(%rdi), %bnd0 +# endif movdqu %xmm0, (%rdi) mov %ecx, 16(%rdi) mov %dh, 20(%rdi) @@ -853,6 +992,9 @@ L(Exit21): L(Exit22): movdqu (%rsi), %xmm0 mov 14(%rsi), %rcx +# ifdef __CHKP__ + bndcu 21(%rdi), %bnd0 +# endif movdqu %xmm0, (%rdi) mov %rcx, 14(%rdi) # ifdef USE_AS_STPCPY @@ -869,6 +1011,9 @@ L(Exit22): L(Exit23): movdqu (%rsi), %xmm0 mov 15(%rsi), %rcx +# ifdef __CHKP__ + bndcu 22(%rdi), %bnd0 +# endif movdqu %xmm0, (%rdi) mov %rcx, 15(%rdi) # ifdef USE_AS_STPCPY @@ -885,6 +1030,9 @@ L(Exit23): L(Exit24): movdqu (%rsi), %xmm0 mov 16(%rsi), %rcx +# ifdef __CHKP__ + bndcu 23(%rdi), %bnd0 +# endif movdqu %xmm0, (%rdi) mov %rcx, 16(%rdi) # ifdef USE_AS_STPCPY @@ -901,6 +1049,9 @@ L(Exit24): L(Exit25): movdqu (%rsi), %xmm0 mov 16(%rsi), %rcx +# ifdef __CHKP__ + bndcu 24(%rdi), %bnd0 +# endif movdqu %xmm0, (%rdi) mov %rcx, 16(%rdi) mov %dh, 24(%rdi) @@ -919,6 +1070,9 @@ L(Exit26): movdqu (%rsi), %xmm0 mov 16(%rsi), %rdx mov 24(%rsi), %cx +# ifdef __CHKP__ + bndcu 25(%rdi), %bnd0 +# endif movdqu %xmm0, (%rdi) mov %rdx, 16(%rdi) mov %cx, 24(%rdi) @@ -937,6 +1091,9 @@ L(Exit27): movdqu (%rsi), %xmm0 mov 16(%rsi), %rdx mov 23(%rsi), %ecx +# ifdef __CHKP__ + bndcu 26(%rdi), %bnd0 +# endif movdqu %xmm0, (%rdi) mov %rdx, 16(%rdi) mov %ecx, 23(%rdi) @@ -955,6 +1112,9 @@ L(Exit28): movdqu (%rsi), %xmm0 mov 16(%rsi), %rdx mov 24(%rsi), %ecx +# ifdef __CHKP__ + bndcu 27(%rdi), %bnd0 +# endif movdqu %xmm0, (%rdi) mov %rdx, 16(%rdi) mov %ecx, 24(%rdi) @@ -972,6 +1132,9 @@ L(Exit28): L(Exit29): movdqu (%rsi), %xmm0 movdqu 13(%rsi), %xmm2 +# ifdef __CHKP__ + bndcu 28(%rdi), %bnd0 +# endif movdqu %xmm0, (%rdi) movdqu %xmm2, 13(%rdi) # ifdef USE_AS_STPCPY @@ -988,6 +1151,9 @@ L(Exit29): L(Exit30): movdqu (%rsi), %xmm0 movdqu 14(%rsi), %xmm2 +# ifdef __CHKP__ + bndcu 29(%rdi), %bnd0 +# endif movdqu %xmm0, (%rdi) movdqu %xmm2, 14(%rdi) # ifdef USE_AS_STPCPY @@ -1004,6 +1170,9 @@ L(Exit30): L(Exit31): movdqu (%rsi), %xmm0 movdqu 15(%rsi), %xmm2 +# ifdef __CHKP__ + bndcu 30(%rdi), %bnd0 +# endif movdqu %xmm0, (%rdi) movdqu %xmm2, 15(%rdi) # ifdef USE_AS_STPCPY @@ -1020,6 +1189,9 @@ L(Exit31): L(Exit32): movdqu (%rsi), %xmm0 movdqu 16(%rsi), %xmm2 +# ifdef __CHKP__ + bndcu 31(%rdi), %bnd0 +# endif movdqu %xmm0, (%rdi) movdqu %xmm2, 16(%rdi) # ifdef USE_AS_STPCPY diff --git a/sysdeps/x86_64/multiarch/strrchr.S b/sysdeps/x86_64/multiarch/strrchr.S index 3f92a41ef9..1fed105bf0 100644 --- a/sysdeps/x86_64/multiarch/strrchr.S +++ b/sysdeps/x86_64/multiarch/strrchr.S @@ -97,6 +97,10 @@ __strrchr_sse42: CALL_MCOUNT testb %sil, %sil je __strend_sse4 +# ifdef __CHKP__ + bndcl (%rdi), %bnd0 + bndcu (%rdi), %bnd0 +# endif xor %eax,%eax /* RAX has the last occurrence of s. */ movd %esi, %xmm1 punpcklbw %xmm1, %xmm1 @@ -135,6 +139,9 @@ L(unaligned_no_byte): contain the NULL terminator. */ jg L(exit) addq $16, %r8 +# ifdef __CHKP__ + bndcu (%r8), %bnd0 +# endif /* Loop start on aligned string. */ .p2align 4 @@ -142,6 +149,9 @@ L(loop): pcmpistri $0x4a, (%r8), %xmm1 jbe L(match_or_eos) addq $16, %r8 +# ifdef __CHKP__ + bndcu (%r8), %bnd0 +# endif jmp L(loop) .p2align 4 L(match_or_eos): @@ -149,11 +159,17 @@ L(match_or_eos): L(match_no_eos): leaq (%r8,%rcx), %rax addq $16, %r8 +# ifdef __CHKP__ + bndcu (%r8), %bnd0 +# endif jmp L(loop) .p2align 4 L(had_eos): jnc L(exit) leaq (%r8,%rcx), %rax +# ifdef __CHKP__ + bndcu (%rax), %bnd0 +# endif .p2align 4 L(exit): ret diff --git a/sysdeps/x86_64/multiarch/wcscpy-ssse3.S b/sysdeps/x86_64/multiarch/wcscpy-ssse3.S index b7de092228..77889dd555 100644 --- a/sysdeps/x86_64/multiarch/wcscpy-ssse3.S +++ b/sysdeps/x86_64/multiarch/wcscpy-ssse3.S @@ -25,13 +25,27 @@ ENTRY (__wcscpy_ssse3) mov %rsi, %rcx mov %rdi, %rdx +# ifdef __CHKP__ + bndcl (%rdi), %bnd0 + bndcl (%rsi), %bnd1 + bndcu (%rsi), %bnd1 +# endif cmpl $0, (%rcx) jz L(Exit4) +# ifdef __CHKP__ + bndcu 4(%rcx), %bnd1 +# endif cmpl $0, 4(%rcx) jz L(Exit8) +# ifdef __CHKP__ + bndcu 8(%rcx), %bnd1 +# endif cmpl $0, 8(%rcx) jz L(Exit12) +# ifdef __CHKP__ + bndcu 12(%rcx), %bnd1 +# endif cmpl $0, 12(%rcx) jz L(Exit16) @@ -40,10 +54,19 @@ ENTRY (__wcscpy_ssse3) pxor %xmm0, %xmm0 mov (%rcx), %r9 +# ifdef __CHKP__ + bndcu 7(%rdx), %bnd0 +# endif mov %r9, (%rdx) +# ifdef __CHKP__ + bndcu (%rsi), %bnd1 +# endif pcmpeqd (%rsi), %xmm0 mov 8(%rcx), %r9 +# ifdef __CHKP__ + bndcu 15(%rdx), %bnd0 +# endif mov %r9, 8(%rdx) pmovmskb %xmm0, %rax @@ -72,6 +95,10 @@ ENTRY (__wcscpy_ssse3) jmp L(Shl12) L(Align16Both): +# ifdef __CHKP__ + bndcu 16(%rcx), %bnd1 + bndcu 15(%rdx), %bnd0 +# endif movaps (%rcx), %xmm1 movaps 16(%rcx), %xmm2 movaps %xmm1, (%rdx) @@ -82,6 +109,10 @@ L(Align16Both): test %rax, %rax jnz L(CopyFrom1To16Bytes) +# ifdef __CHKP__ + bndcu 16(%rcx, %rsi), %bnd1 + bndcu 15(%rdx, %rsi), %bnd0 +# endif movaps 16(%rcx, %rsi), %xmm3 movaps %xmm2, (%rdx, %rsi) pcmpeqd %xmm3, %xmm0 @@ -91,6 +122,10 @@ L(Align16Both): test %rax, %rax jnz L(CopyFrom1To16Bytes) +# ifdef __CHKP__ + bndcu 16(%rcx, %rsi), %bnd1 + bndcu 15(%rdx, %rsi), %bnd0 +# endif movaps 16(%rcx, %rsi), %xmm4 movaps %xmm3, (%rdx, %rsi) pcmpeqd %xmm4, %xmm0 @@ -100,6 +135,10 @@ L(Align16Both): test %rax, %rax jnz L(CopyFrom1To16Bytes) +# ifdef __CHKP__ + bndcu 16(%rcx, %rsi), %bnd1 + bndcu 15(%rdx, %rsi), %bnd0 +# endif movaps 16(%rcx, %rsi), %xmm1 movaps %xmm4, (%rdx, %rsi) pcmpeqd %xmm1, %xmm0 @@ -109,6 +148,10 @@ L(Align16Both): test %rax, %rax jnz L(CopyFrom1To16Bytes) +# ifdef __CHKP__ + bndcu 16(%rcx, %rsi), %bnd1 + bndcu 15(%rdx, %rsi), %bnd0 +# endif movaps 16(%rcx, %rsi), %xmm2 movaps %xmm1, (%rdx, %rsi) pcmpeqd %xmm2, %xmm0 @@ -118,6 +161,10 @@ L(Align16Both): test %rax, %rax jnz L(CopyFrom1To16Bytes) +# ifdef __CHKP__ + bndcu 16(%rcx, %rsi), %bnd1 + bndcu 15(%rdx, %rsi), %bnd0 +# endif movaps 16(%rcx, %rsi), %xmm3 movaps %xmm2, (%rdx, %rsi) pcmpeqd %xmm3, %xmm0 @@ -127,6 +174,10 @@ L(Align16Both): test %rax, %rax jnz L(CopyFrom1To16Bytes) +# ifdef __CHKP__ + bndcu 16(%rcx, %rsi), %bnd1 + bndcu 15(%rdx, %rsi), %bnd0 +# endif movaps %xmm3, (%rdx, %rsi) mov %rcx, %rax lea 16(%rcx, %rsi), %rcx @@ -138,6 +189,10 @@ L(Align16Both): .p2align 4 L(Aligned64Loop): +# ifdef __CHKP__ + bndcu (%rcx), %bnd1 + bndcu 63(%rdx), %bnd0 +# endif movaps (%rcx), %xmm2 movaps %xmm2, %xmm4 movaps 16(%rcx), %xmm5 @@ -168,6 +223,9 @@ L(Aligned64Leave): pcmpeqd %xmm5, %xmm0 pmovmskb %xmm0, %rax +# ifdef __CHKP__ + bndcu -49(%rdx), %bnd0 +# endif movaps %xmm4, -64(%rdx) test %rax, %rax lea 16(%rsi), %rsi @@ -176,11 +234,17 @@ L(Aligned64Leave): pcmpeqd %xmm6, %xmm0 pmovmskb %xmm0, %rax +# ifdef __CHKP__ + bndcu -33(%rdx), %bnd0 +# endif movaps %xmm5, -48(%rdx) test %rax, %rax lea 16(%rsi), %rsi jnz L(CopyFrom1To16Bytes) +# ifdef __CHKP__ + bndcu -17(%rdx), %bnd0 +# endif movaps %xmm6, -32(%rdx) pcmpeqd %xmm7, %xmm0 @@ -190,11 +254,17 @@ L(Aligned64Leave): jnz L(CopyFrom1To16Bytes) mov $-0x40, %rsi +# ifdef __CHKP__ + bndcu -1(%rdx), %bnd0 +# endif movaps %xmm7, -16(%rdx) jmp L(Aligned64Loop) .p2align 4 L(Shl4): +# ifdef __CHKP__ + bndcu 12(%rcx), %bnd1 +# endif movaps -4(%rcx), %xmm1 movaps 12(%rcx), %xmm2 L(Shl4Start): @@ -206,6 +276,10 @@ L(Shl4Start): jnz L(Shl4LoopExit) palignr $4, %xmm1, %xmm2 +# ifdef __CHKP__ + bndcu 28(%rcx), %bnd1 + bndcu 15(%rdx), %bnd0 +# endif movaps %xmm2, (%rdx) movaps 28(%rcx), %xmm2 @@ -219,6 +293,10 @@ L(Shl4Start): jnz L(Shl4LoopExit) palignr $4, %xmm3, %xmm2 +# ifdef __CHKP__ + bndcu 28(%rcx), %bnd1 + bndcu 15(%rdx), %bnd0 +# endif movaps %xmm2, (%rdx) movaps 28(%rcx), %xmm2 @@ -232,6 +310,10 @@ L(Shl4Start): jnz L(Shl4LoopExit) palignr $4, %xmm1, %xmm2 +# ifdef __CHKP__ + bndcu 28(%rcx), %bnd1 + bndcu 15(%rdx), %bnd0 +# endif movaps %xmm2, (%rdx) movaps 28(%rcx), %xmm2 @@ -244,6 +326,9 @@ L(Shl4Start): jnz L(Shl4LoopExit) palignr $4, %xmm3, %xmm2 +# ifdef __CHKP__ + bndcu 15(%rdx), %bnd0 +# endif movaps %xmm2, (%rdx) lea 28(%rcx), %rcx lea 16(%rdx), %rdx @@ -258,6 +343,9 @@ L(Shl4Start): .p2align 4 L(Shl4LoopStart): +# ifdef __CHKP__ + bndcu 12(%rcx), %bnd1 +# endif movaps 12(%rcx), %xmm2 movaps 28(%rcx), %xmm3 movaps %xmm3, %xmm6 @@ -279,6 +367,9 @@ L(Shl4LoopStart): lea 64(%rcx), %rcx palignr $4, %xmm1, %xmm2 movaps %xmm7, %xmm1 +# ifdef __CHKP__ + bndcu 63(%rdx), %bnd0 +# endif movaps %xmm5, 48(%rdx) movaps %xmm4, 32(%rdx) movaps %xmm3, 16(%rdx) @@ -287,6 +378,10 @@ L(Shl4LoopStart): jmp L(Shl4LoopStart) L(Shl4LoopExit): +# ifdef __CHKP__ + bndcu -4(%rcx), %bnd1 + bndcu 11(%rdx), %bnd0 +# endif movdqu -4(%rcx), %xmm1 mov $12, %rsi movdqu %xmm1, -4(%rdx) @@ -294,6 +389,9 @@ L(Shl4LoopExit): .p2align 4 L(Shl8): +# ifdef __CHKP__ + bndcu 8(%rcx), %bnd1 +# endif movaps -8(%rcx), %xmm1 movaps 8(%rcx), %xmm2 L(Shl8Start): @@ -305,6 +403,10 @@ L(Shl8Start): jnz L(Shl8LoopExit) palignr $8, %xmm1, %xmm2 +# ifdef __CHKP__ + bndcu 24(%rcx), %bnd1 + bndcu 15(%rdx), %bnd0 +# endif movaps %xmm2, (%rdx) movaps 24(%rcx), %xmm2 @@ -318,6 +420,10 @@ L(Shl8Start): jnz L(Shl8LoopExit) palignr $8, %xmm3, %xmm2 +# ifdef __CHKP__ + bndcu 24(%rcx), %bnd1 + bndcu 15(%rdx), %bnd0 +# endif movaps %xmm2, (%rdx) movaps 24(%rcx), %xmm2 @@ -331,6 +437,10 @@ L(Shl8Start): jnz L(Shl8LoopExit) palignr $8, %xmm1, %xmm2 +# ifdef __CHKP__ + bndcu 24(%rcx), %bnd1 + bndcu 15(%rdx), %bnd0 +# endif movaps %xmm2, (%rdx) movaps 24(%rcx), %xmm2 @@ -343,6 +453,10 @@ L(Shl8Start): jnz L(Shl8LoopExit) palignr $8, %xmm3, %xmm2 +# ifdef __CHKP__ + bndcu 24(%rcx), %bnd1 + bndcu 15(%rdx), %bnd0 +# endif movaps %xmm2, (%rdx) lea 24(%rcx), %rcx lea 16(%rdx), %rdx @@ -357,6 +471,9 @@ L(Shl8Start): .p2align 4 L(Shl8LoopStart): +# ifdef __CHKP__ + bndcu 8(%rcx), %bnd1 +# endif movaps 8(%rcx), %xmm2 movaps 24(%rcx), %xmm3 movaps %xmm3, %xmm6 @@ -378,6 +495,9 @@ L(Shl8LoopStart): lea 64(%rcx), %rcx palignr $8, %xmm1, %xmm2 movaps %xmm7, %xmm1 +# ifdef __CHKP__ + bndcu 63(%rdx), %bnd0 +# endif movaps %xmm5, 48(%rdx) movaps %xmm4, 32(%rdx) movaps %xmm3, 16(%rdx) @@ -386,6 +506,10 @@ L(Shl8LoopStart): jmp L(Shl8LoopStart) L(Shl8LoopExit): +# ifdef __CHKP__ + bndcu (%rcx), %bnd1 + bndcu 7(%rdx), %bnd0 +# endif mov (%rcx), %r9 mov $8, %rsi mov %r9, (%rdx) @@ -393,6 +517,9 @@ L(Shl8LoopExit): .p2align 4 L(Shl12): +# ifdef __CHKP__ + bndcu 4(%rcx), %bnd1 +# endif movaps -12(%rcx), %xmm1 movaps 4(%rcx), %xmm2 L(Shl12Start): @@ -404,6 +531,10 @@ L(Shl12Start): jnz L(Shl12LoopExit) palignr $12, %xmm1, %xmm2 +# ifdef __CHKP__ + bndcu 20(%rcx), %bnd1 + bndcu 15(%rdx), %bnd0 +# endif movaps %xmm2, (%rdx) movaps 20(%rcx), %xmm2 @@ -417,6 +548,10 @@ L(Shl12Start): jnz L(Shl12LoopExit) palignr $12, %xmm3, %xmm2 +# ifdef __CHKP__ + bndcu 20(%rcx), %bnd1 + bndcu 15(%rdx), %bnd0 +# endif movaps %xmm2, (%rdx) movaps 20(%rcx), %xmm2 @@ -430,6 +565,10 @@ L(Shl12Start): jnz L(Shl12LoopExit) palignr $12, %xmm1, %xmm2 +# ifdef __CHKP__ + bndcu 20(%rcx), %bnd1 + bndcu 15(%rdx), %bnd0 +# endif movaps %xmm2, (%rdx) movaps 20(%rcx), %xmm2 @@ -442,6 +581,10 @@ L(Shl12Start): jnz L(Shl12LoopExit) palignr $12, %xmm3, %xmm2 +# ifdef __CHKP__ + bndcu 20(%rcx), %bnd1 + bndcu 15(%rdx), %bnd0 +# endif movaps %xmm2, (%rdx) lea 20(%rcx), %rcx lea 16(%rdx), %rdx @@ -456,6 +599,9 @@ L(Shl12Start): .p2align 4 L(Shl12LoopStart): +# ifdef __CHKP__ + bndcu 4(%rcx), %bnd1 +# endif movaps 4(%rcx), %xmm2 movaps 20(%rcx), %xmm3 movaps %xmm3, %xmm6 @@ -476,6 +622,9 @@ L(Shl12LoopStart): lea 64(%rcx), %rcx palignr $12, %xmm1, %xmm2 movaps %xmm7, %xmm1 +# ifdef __CHKP__ + bndcu 63(%rdx), %bnd0 +# endif movaps %xmm5, 48(%rdx) movaps %xmm4, 32(%rdx) movaps %xmm3, 16(%rdx) @@ -484,6 +633,10 @@ L(Shl12LoopStart): jmp L(Shl12LoopStart) L(Shl12LoopExit): +# ifdef __CHKP__ + bndcu (%rcx), %bnd1 + bndcu 3(%rdx), %bnd0 +# endif mov (%rcx), %r9d mov $4, %rsi mov %r9d, (%rdx) @@ -500,6 +653,9 @@ L(CopyFrom1To16Bytes): jnz L(Exit4) mov (%rcx), %rax +# ifdef __CHKP__ + bndcu 7(%rdx), %bnd0 +# endif mov %rax, (%rdx) mov %rdi, %rax ret @@ -510,6 +666,9 @@ L(ExitHigh): jnz L(Exit12) mov (%rcx), %rax +# ifdef __CHKP__ + bndcu 15(%rdx), %bnd0 +# endif mov %rax, (%rdx) mov 8(%rcx), %rax mov %rax, 8(%rdx) @@ -519,6 +678,9 @@ L(ExitHigh): .p2align 4 L(Exit4): movl (%rcx), %eax +# ifdef __CHKP__ + bndcu 3(%rdx), %bnd0 +# endif movl %eax, (%rdx) mov %rdi, %rax ret @@ -526,6 +688,9 @@ L(Exit4): .p2align 4 L(Exit8): mov (%rcx), %rax +# ifdef __CHKP__ + bndcu 7(%rdx), %bnd0 +# endif mov %rax, (%rdx) mov %rdi, %rax ret @@ -533,6 +698,9 @@ L(Exit8): .p2align 4 L(Exit12): mov (%rcx), %rax +# ifdef __CHKP__ + bndcu 11(%rdx), %bnd0 +# endif mov %rax, (%rdx) mov 8(%rcx), %eax mov %eax, 8(%rdx) @@ -542,6 +710,9 @@ L(Exit12): .p2align 4 L(Exit16): mov (%rcx), %rax +# ifdef __CHKP__ + bndcu 15(%rdx), %bnd0 +# endif mov %rax, (%rdx) mov 8(%rcx), %rax mov %rax, 8(%rdx) |