diff options
author | Liubov Dmitrieva <ldmitrie@sourceware.org> | 2013-08-30 18:37:28 +0400 |
---|---|---|
committer | Liubov Dmitrieva <ldmitrie@sourceware.org> | 2013-10-23 23:51:44 +0400 |
commit | 029183a4ca3f765f63e7b64bc260622f02b04539 (patch) | |
tree | c7f6912d9575e27f691f219ad9437e9af39e016e | |
parent | 01d5454d13d2c21b9a08b28441d37a7ddce089a6 (diff) | |
download | glibc-ldmitrie/intel_mpx.tar.gz |
Implemented bound check support for string/memory routines for x86_64.ldmitrie/intel_mpx
TODO: Fix bound check support in strcmp-sse2 and implement in strspn, strstr and strcspn.
53 files changed, 2308 insertions, 108 deletions
diff --git a/sysdeps/x86_64/Makefile b/sysdeps/x86_64/Makefile index 08db331923..db6838dc93 100644 --- a/sysdeps/x86_64/Makefile +++ b/sysdeps/x86_64/Makefile @@ -18,6 +18,9 @@ endif ifeq ($(subdir),string) sysdep_routines += cacheinfo strcasecmp_l-nonascii strncase_l-nonascii gen-as-const-headers += locale-defines.sym +ifeq ($(enable-mpx), yes) +sysdep_routines += strcpy_chk-c stpcpy_chk-c +endif endif ifeq ($(subdir),elf) diff --git a/sysdeps/x86_64/Versions b/sysdeps/x86_64/Versions index a437f85e6e..1de589c65b 100644 --- a/sysdeps/x86_64/Versions +++ b/sysdeps/x86_64/Versions @@ -2,6 +2,13 @@ libc { GLIBC_2.14 { memcpy; } +%ifdef __CHKP__ + GLIBC_2.17 { + chkp_memset_nobnd; + chkp_memset_nochk; + chkp_memset_nobnd_nochk; + } +%endif } libm { GLIBC_2.1 { diff --git a/sysdeps/x86_64/memchr.S b/sysdeps/x86_64/memchr.S index 891ee70aef..205345b43d 100644 --- a/sysdeps/x86_64/memchr.S +++ b/sysdeps/x86_64/memchr.S @@ -20,8 +20,17 @@ /* fast SSE2 version with using pmaxub and 64 byte loop */ +# ifdef __CHKP__ +# define RETURN \ + bndcu (%rax), %bnd0; \ + ret +# else +# define RETURN ret +# endif + .text ENTRY(memchr) + movd %rsi, %xmm1 mov %rdi, %rcx @@ -33,6 +42,10 @@ ENTRY(memchr) and $63, %rcx pshufd $0, %xmm1, %xmm1 +#ifdef __CHKP__ + bndcl (%rdi), %bnd0 + bndcu (%rdi), %bnd0 +#endif cmp $48, %rcx ja L(crosscache) @@ -72,7 +85,7 @@ L(crosscache): jbe L(return_null) add %rdi, %rax add %rcx, %rax - ret + RETURN .p2align 4 L(unaligned_no_match): @@ -85,24 +98,36 @@ L(unaligned_no_match): .p2align 4 L(loop_prolog): +#ifdef __CHKP__ + bndcu (%rdi), %bnd0 +#endif movdqa (%rdi), %xmm0 pcmpeqb %xmm1, %xmm0 pmovmskb %xmm0, %eax test %eax, %eax jnz L(matches) +#ifdef __CHKP__ + bndcu 16(%rdi), %bnd0 +#endif movdqa 16(%rdi), %xmm2 pcmpeqb %xmm1, %xmm2 pmovmskb %xmm2, %eax test %eax, %eax jnz L(matches16) +#ifdef __CHKP__ + bndcu 32(%rdi), %bnd0 +#endif movdqa 32(%rdi), %xmm3 pcmpeqb %xmm1, %xmm3 pmovmskb %xmm3, %eax test %eax, %eax jnz L(matches32) +#ifdef __CHKP__ + bndcu 48(%rdi), %bnd0 +#endif movdqa 48(%rdi), %xmm4 pcmpeqb %xmm1, %xmm4 add $64, %rdi @@ -116,24 +141,36 @@ L(loop_prolog): sub $64, %rdx jbe L(exit_loop) +#ifdef __CHKP__ + bndcu (%rdi), %bnd0 +#endif movdqa (%rdi), %xmm0 pcmpeqb %xmm1, %xmm0 pmovmskb %xmm0, %eax test %eax, %eax jnz L(matches) +#ifdef __CHKP__ + bndcu 16(%rdi), %bnd0 +#endif movdqa 16(%rdi), %xmm2 pcmpeqb %xmm1, %xmm2 pmovmskb %xmm2, %eax test %eax, %eax jnz L(matches16) +#ifdef __CHKP__ + bndcu 32(%rdi), %bnd0 +#endif movdqa 32(%rdi), %xmm3 pcmpeqb %xmm1, %xmm3 pmovmskb %xmm3, %eax test %eax, %eax jnz L(matches32) +#ifdef __CHKP__ + bndcu 48(%rdi), %bnd0 +#endif movdqa 48(%rdi), %xmm3 pcmpeqb %xmm1, %xmm3 pmovmskb %xmm3, %eax @@ -151,6 +188,9 @@ L(loop_prolog): L(align64_loop): sub $64, %rdx jbe L(exit_loop) +#ifdef __CHKP__ + bndcu (%rdi), %bnd0 +#endif movdqa (%rdi), %xmm0 movdqa 16(%rdi), %xmm2 movdqa 32(%rdi), %xmm3 @@ -192,25 +232,34 @@ L(align64_loop): pmovmskb %xmm1, %eax bsf %eax, %eax lea 48(%rdi, %rax), %rax - ret + RETURN .p2align 4 L(exit_loop): add $32, %rdx jle L(exit_loop_32) +#ifdef __CHKP__ + bndcu (%rdi), %bnd0 +#endif movdqa (%rdi), %xmm0 pcmpeqb %xmm1, %xmm0 pmovmskb %xmm0, %eax test %eax, %eax jnz L(matches) +#ifdef __CHKP__ + bndcu 16(%rdi), %bnd0 +#endif movdqa 16(%rdi), %xmm2 pcmpeqb %xmm1, %xmm2 pmovmskb %xmm2, %eax test %eax, %eax jnz L(matches16) +#ifdef __CHKP__ + bndcu 32(%rdi), %bnd0 +#endif movdqa 32(%rdi), %xmm3 pcmpeqb %xmm1, %xmm3 pmovmskb %xmm3, %eax @@ -219,6 +268,9 @@ L(exit_loop): sub $16, %rdx jle L(return_null) +#ifdef __CHKP__ + bndcu 48(%rdi), %bnd0 +#endif pcmpeqb 48(%rdi), %xmm1 pmovmskb %xmm1, %eax test %eax, %eax @@ -229,6 +281,9 @@ L(exit_loop): .p2align 4 L(exit_loop_32): add $32, %rdx +#ifdef __CHKP__ + bndcu (%rdi), %bnd0 +#endif movdqa (%rdi), %xmm0 pcmpeqb %xmm1, %xmm0 pmovmskb %xmm0, %eax @@ -237,6 +292,9 @@ L(exit_loop_32): sub $16, %rdx jbe L(return_null) +#ifdef __CHKP__ + bndcu 16(%rdi), %bnd0 +#endif pcmpeqb 16(%rdi), %xmm1 pmovmskb %xmm1, %eax test %eax, %eax @@ -248,25 +306,25 @@ L(exit_loop_32): L(matches0): bsf %eax, %eax lea -16(%rax, %rdi), %rax - ret + RETURN .p2align 4 L(matches): bsf %eax, %eax add %rdi, %rax - ret + RETURN .p2align 4 L(matches16): bsf %eax, %eax lea 16(%rax, %rdi), %rax - ret + RETURN .p2align 4 L(matches32): bsf %eax, %eax lea 32(%rax, %rdi), %rax - ret + RETURN .p2align 4 L(matches_1): @@ -274,7 +332,7 @@ L(matches_1): sub %rax, %rdx jbe L(return_null) add %rdi, %rax - ret + RETURN .p2align 4 L(matches16_1): @@ -282,7 +340,7 @@ L(matches16_1): sub %rax, %rdx jbe L(return_null) lea 16(%rdi, %rax), %rax - ret + RETURN .p2align 4 L(matches32_1): @@ -290,7 +348,7 @@ L(matches32_1): sub %rax, %rdx jbe L(return_null) lea 32(%rdi, %rax), %rax - ret + RETURN .p2align 4 L(matches48_1): @@ -298,7 +356,7 @@ L(matches48_1): sub %rax, %rdx jbe L(return_null) lea 48(%rdi, %rax), %rax - ret + RETURN .p2align 4 L(return_null): diff --git a/sysdeps/x86_64/memcmp.S b/sysdeps/x86_64/memcmp.S index d5c072c7f4..77a7bcaafe 100644 --- a/sysdeps/x86_64/memcmp.S +++ b/sysdeps/x86_64/memcmp.S @@ -23,6 +23,11 @@ ENTRY (memcmp) test %rdx, %rdx jz L(finz) +#ifdef __CHKP__ + bndcl (%rdi), %bnd0 + bndcl (%rsi), %bnd1 +#endif + pxor %xmm0, %xmm0 cmpq $1, %rdx jle L(finr1b) subq %rdi, %rsi @@ -86,6 +91,10 @@ L(s16b): .p2align 4,, 4 L(finr1b): +#ifdef __CHKP__ + bndcu (%rdi), %bnd0 + bndcu (%rsi), %bnd1 +#endif movzbl (%rdi), %eax movzbl (%rsi), %edx L(finz1): @@ -132,6 +141,10 @@ L(gt32): andq $15, %r8 jz L(16am) /* Both pointers may be misaligned. */ +#ifdef __CHKP__ + bndcu (%rdi), %bnd0 + bndcu (%rdi, %rsi), %bnd1 +#endif movdqu (%rdi), %xmm1 movdqu (%rdi, %rsi), %xmm0 pcmpeqb %xmm0, %xmm1 @@ -146,6 +159,10 @@ L(16am): jz L(ATR) testq $16, %rdi jz L(A32) +#ifdef __CHKP__ + bndcu (%rdi), %bnd0 + bndcu (%rdi, %rsi), %bnd1 +#endif movdqu (%rdi, %rsi), %xmm0 pcmpeqb (%rdi), %xmm0 pmovmskb %xmm0, %edx @@ -160,6 +177,10 @@ L(A32): /* Pre-unroll to be ready for unrolled 64B loop. */ testq $32, %rdi jz L(A64) +#ifdef __CHKP__ + bndcu (%rdi), %bnd0 + bndcu (%rdi, %rsi), %bnd1 +#endif movdqu (%rdi,%rsi), %xmm0 pcmpeqb (%rdi), %xmm0 pmovmskb %xmm0, %edx @@ -167,6 +188,10 @@ L(A32): jnz L(neq) addq $16, %rdi +#ifdef __CHKP__ + bndcu (%rdi), %bnd0 + bndcu (%rdi, %rsi), %bnd1 +#endif movdqu (%rdi,%rsi), %xmm0 pcmpeqb (%rdi), %xmm0 pmovmskb %xmm0, %edx @@ -181,6 +206,10 @@ L(A64): jge L(mt32) L(A64main): +#ifdef __CHKP__ + bndcu (%rdi), %bnd0 + bndcu (%rdi, %rsi), %bnd1 +#endif movdqu (%rdi,%rsi), %xmm0 pcmpeqb (%rdi), %xmm0 pmovmskb %xmm0, %edx @@ -188,6 +217,10 @@ L(A64main): jnz L(neq) addq $16, %rdi +#ifdef __CHKP__ + bndcu (%rdi), %bnd0 + bndcu (%rdi, %rsi), %bnd1 +#endif movdqu (%rdi,%rsi), %xmm0 pcmpeqb (%rdi), %xmm0 pmovmskb %xmm0, %edx @@ -195,6 +228,10 @@ L(A64main): jnz L(neq) addq $16, %rdi +#ifdef __CHKP__ + bndcu (%rdi), %bnd0 + bndcu (%rdi, %rsi), %bnd1 +#endif movdqu (%rdi,%rsi), %xmm0 pcmpeqb (%rdi), %xmm0 pmovmskb %xmm0, %edx @@ -202,6 +239,10 @@ L(A64main): jnz L(neq) addq $16, %rdi +#ifdef __CHKP__ + bndcu (%rdi), %bnd0 + bndcu (%rdi, %rsi), %bnd1 +#endif movdqu (%rdi,%rsi), %xmm0 pcmpeqb (%rdi), %xmm0 pmovmskb %xmm0, %edx @@ -219,6 +260,10 @@ L(mt32): jge L(mt16) L(A32main): +#ifdef __CHKP__ + bndcu (%rdi), %bnd0 + bndcu (%rdi, %rsi), %bnd1 +#endif movdqu (%rdi,%rsi), %xmm0 pcmpeqb (%rdi), %xmm0 pmovmskb %xmm0, %edx @@ -226,6 +271,10 @@ L(A32main): jnz L(neq) addq $16, %rdi +#ifdef __CHKP__ + bndcu (%rdi), %bnd0 + bndcu (%rdi, %rsi), %bnd1 +#endif movdqu (%rdi,%rsi), %xmm0 pcmpeqb (%rdi), %xmm0 pmovmskb %xmm0, %edx @@ -258,6 +307,10 @@ L(ATR): testq $16, %rdi jz L(ATR32) +#ifdef __CHKP__ + bndcu (%rdi), %bnd0 + bndcu (%rdi, %rsi), %bnd1 +#endif movdqa (%rdi,%rsi), %xmm0 pcmpeqb (%rdi), %xmm0 pmovmskb %xmm0, %edx @@ -273,6 +326,10 @@ L(ATR32): testq $32, %rdi jz L(ATR64) +#ifdef __CHKP__ + bndcu (%rdi), %bnd0 + bndcu (%rdi, %rsi), %bnd1 +#endif movdqa (%rdi,%rsi), %xmm0 pcmpeqb (%rdi), %xmm0 pmovmskb %xmm0, %edx @@ -280,6 +337,10 @@ L(ATR32): jnz L(neq) addq $16, %rdi +#ifdef __CHKP__ + bndcu (%rdi), %bnd0 + bndcu (%rdi, %rsi), %bnd1 +#endif movdqa (%rdi,%rsi), %xmm0 pcmpeqb (%rdi), %xmm0 pmovmskb %xmm0, %edx @@ -292,6 +353,10 @@ L(ATR64): je L(mt32) L(ATR64main): +#ifdef __CHKP__ + bndcu (%rdi), %bnd0 + bndcu (%rdi, %rsi), %bnd1 +#endif movdqa (%rdi,%rsi), %xmm0 pcmpeqb (%rdi), %xmm0 pmovmskb %xmm0, %edx @@ -299,6 +364,10 @@ L(ATR64main): jnz L(neq) addq $16, %rdi +#ifdef __CHKP__ + bndcu (%rdi), %bnd0 + bndcu (%rdi, %rsi), %bnd1 +#endif movdqa (%rdi,%rsi), %xmm0 pcmpeqb (%rdi), %xmm0 pmovmskb %xmm0, %edx @@ -306,6 +375,10 @@ L(ATR64main): jnz L(neq) addq $16, %rdi +#ifdef __CHKP__ + bndcu (%rdi), %bnd0 + bndcu (%rdi, %rsi), %bnd1 +#endif movdqa (%rdi,%rsi), %xmm0 pcmpeqb (%rdi), %xmm0 pmovmskb %xmm0, %edx @@ -313,6 +386,10 @@ L(ATR64main): jnz L(neq) addq $16, %rdi +#ifdef __CHKP__ + bndcu (%rdi), %bnd0 + bndcu (%rdi, %rsi), %bnd1 +#endif movdqa (%rdi,%rsi), %xmm0 pcmpeqb (%rdi), %xmm0 pmovmskb %xmm0, %edx @@ -328,6 +405,10 @@ L(ATR64main): jge L(mt16) L(ATR32res): +#ifdef __CHKP__ + bndcu (%rdi), %bnd0 + bndcu (%rdi, %rsi), %bnd1 +#endif movdqa (%rdi,%rsi), %xmm0 pcmpeqb (%rdi), %xmm0 pmovmskb %xmm0, %edx @@ -335,6 +416,10 @@ L(ATR32res): jnz L(neq) addq $16, %rdi +#ifdef __CHKP__ + bndcu (%rdi), %bnd0 + bndcu (%rdi, %rsi), %bnd1 +#endif movdqa (%rdi,%rsi), %xmm0 pcmpeqb (%rdi), %xmm0 pmovmskb %xmm0, %edx diff --git a/sysdeps/x86_64/memrchr.S b/sysdeps/x86_64/memrchr.S index 5a659feede..3afa97c0b7 100644 --- a/sysdeps/x86_64/memrchr.S +++ b/sysdeps/x86_64/memrchr.S @@ -27,6 +27,11 @@ ENTRY (memrchr) sub $16, %rdx jbe L(length_less16) +#ifdef __CHKP__ + bndcl (%rdi), %bnd0 + bndcu -1(%rdi, %rdx), %bnd0 +#endif + punpcklbw %xmm1, %xmm1 punpcklbw %xmm1, %xmm1 @@ -284,6 +289,10 @@ L(length_less16_offset0): test %edx, %edx jz L(return_null) +#ifdef __CHKP__ + bndcl (%rdi), %bnd0 + bndcu -1(%rdi, %rdx), %bnd0 +#endif mov %dl, %cl pcmpeqb (%rdi), %xmm1 @@ -314,6 +323,10 @@ L(length_less16): and $15, %rcx jz L(length_less16_offset0) +#ifdef __CHKP__ + bndcl (%rdi), %bnd0 + bndcu -1(%rdi, %rdx), %bnd0 +#endif mov %rdi, %rcx and $15, %rcx mov %cl, %dh diff --git a/sysdeps/x86_64/memset.S b/sysdeps/x86_64/memset.S index 6c69f4b442..ccb2aae480 100644 --- a/sysdeps/x86_64/memset.S +++ b/sysdeps/x86_64/memset.S @@ -26,6 +26,15 @@ .text #if !defined NOT_IN_libc ENTRY(__bzero) + testq %rsi, %rsi + jz L(only_return) + +#if defined __CHKP__ && defined __CHKWR__ + bndcl (%rdi), %bnd0 + bndcu -1(%rdi, %rsi), %bnd0 +# endif + + mov %rdi, %rax movq %rdi, %rax /* Set return value. */ movq %rsi, %rdx /* Set n. */ pxor %xmm8, %xmm8 @@ -53,7 +62,20 @@ ENTRY_CHK (__memset_chk) END_CHK (__memset_chk) #endif +#if defined __CHKP__ && defined __CHKWR__ +ENTRY (chkp_memset_nochk) + jmp L(entry_from_chkp_memset_nochk) +END (chkp_memset_nochk) +#endif + ENTRY (memset) + testq %rdx, %rdx + jz L(only_return) +#if defined __CHKP__ && defined __CHKWR__ + bndcl (%rdi), %bnd0 + bndcu -1(%rdi, %rdx), %bnd0 +L(entry_from_chkp_memset_nochk): +#endif movd %esi, %xmm8 movq %rdi, %rax punpcklbw %xmm8, %xmm8 @@ -71,6 +93,9 @@ L(entry_from_bzero): L(return): rep ret +L(only_return): + movq %rdi, %rax + ret ALIGN (4) L(between_32_64_bytes): movdqu %xmm8, 16(%rdi) @@ -129,6 +154,11 @@ L(between8_16bytes): END (memset) libc_hidden_builtin_def (memset) +#if defined __CHKP__ && defined __CHKWR__ +weak_alias (memset, chkp_memset_nobnd) +weak_alias (chkp_memset_nochk, chkp_memset_nobnd_nochk) +#endif + #if defined PIC && !defined NOT_IN_libc && !defined USE_MULTIARCH strong_alias (__memset_chk, __memset_zero_constant_len_parameter) .section .gnu.warning.__memset_zero_constant_len_parameter diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile index 203d16eed3..bdf7964d14 100644 --- a/sysdeps/x86_64/multiarch/Makefile +++ b/sysdeps/x86_64/multiarch/Makefile @@ -26,6 +26,29 @@ CFLAGS-strstr.c += -msse4 CFLAGS-strcasestr.c += -msse4 CFLAGS-strcasestr-nonascii.c += -msse4 endif + +ifeq ($(enable-mpx), yes) +sysdep_routines += memcpy-ssse3-back-1 mempcpy-ssse3-back-1 memmove-ssse3-back-1 \ + memcpy-c memmove-c mempcpy-c memcpy_chk-c mempcpy_chk-c memmove_chk-c +#These are C versions written with intrinsics. We need to add checks as intrinsics manually +CFLAGS-varshift.c += -fno-chkp-check-read -fno-chkp-check-write +CFLAGS-strcspn-c.c += -fno-chkp-check-read -fno-chkp-check-write +CFLAGS-strpbrk-c.c += -fno-chkp-check-read -fno-chkp-check-write +CFLAGS-strspn-c.c += -fno-chkp-check-read -fno-chkp-check-write +CFLAGS-strstr.c += -fno-chkp-check-read -fno-chkp-check-write +CFLAGS-strcasestr.c += -fno-chkp-check-read -fno-chkp-check-write +CFLAGS-strcasestr-nonascii.c += -fno-chkp-check-read -fno-chkp-check-write +#Checks are put manually for these routines. +CFLAGS-memcpy-c.c += -fno-chkp-check-read -fno-chkp-check-write +CFLAGS-mempcpy-c.c += -fno-chkp-check-read -fno-chkp-check-write +CFLAGS-memmove-c.c += -fno-chkp-check-read -fno-chkp-check-write +endif + +ifeq ($(enable-mpx-write-only), yes) +CFLAGS-memcpy-c.c += -D__CHKWR__ +CFLAGS-memmove-c.c += -D__CHKWR__ +endif + endif ifeq ($(subdir),wcsmbs) diff --git a/sysdeps/x86_64/multiarch/Versions b/sysdeps/x86_64/multiarch/Versions index 59b185ac8d..5325bdece6 100644 --- a/sysdeps/x86_64/multiarch/Versions +++ b/sysdeps/x86_64/multiarch/Versions @@ -2,4 +2,17 @@ libc { GLIBC_PRIVATE { __get_cpu_features; } +%ifdef __CHKP__ + GLIBC_2.17 { + chkp_memcpy_nobnd; + chkp_memmove_nobnd; + chkp_mempcpy_nobnd; + chkp_memcpy_nobnd_nochk; + chkp_memmove_nobnd_nochk; + chkp_mempcpy_nobnd_nochk; + chkp_memcpy_nochk; + chkp_memmove_nochk; + chkp_mempcpy_nochk; + } +%endif } diff --git a/sysdeps/x86_64/multiarch/bcopy.S b/sysdeps/x86_64/multiarch/bcopy.S index 639f02bde3..9809d471ba 100644 --- a/sysdeps/x86_64/multiarch/bcopy.S +++ b/sysdeps/x86_64/multiarch/bcopy.S @@ -3,5 +3,10 @@ .text ENTRY(bcopy) xchg %rdi, %rsi +#ifdef __CHKP__ + bndmov %bnd0, %bnd2 + bndmov %bnd1, %bnd0 + bndmov %bnd2, %bnd1 +#endif jmp __libc_memmove /* Branch to IFUNC memmove. */ END(bcopy) diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c index d0992e113f..e3a4163c5b 100644 --- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c +++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c @@ -44,6 +44,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_IMPL_ADD (array, i, memcmp, HAS_SSSE3, __memcmp_ssse3) IFUNC_IMPL_ADD (array, i, memcmp, 1, __memcmp_sse2)) +#ifndef __CHKP__ + /* We use specific version for MPX glibc */ /* Support sysdeps/x86_64/multiarch/memmove_chk.S. */ IFUNC_IMPL (i, name, __memmove_chk, IFUNC_IMPL_ADD (array, i, __memmove_chk, HAS_SSSE3, @@ -60,6 +62,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_IMPL_ADD (array, i, memmove, HAS_SSSE3, __memmove_ssse3) IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_sse2)) +#endif /* Support sysdeps/x86_64/multiarch/stpncpy.S. */ IFUNC_IMPL (i, name, stpncpy, @@ -207,6 +210,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_IMPL_ADD (array, i, wmemcmp, 1, __wmemcmp_sse2)) #ifdef SHARED +#ifndef __CHKP__ + /* We use specific version of memcpy, memcpy_chk, mempcpy if Intel MPX is enabled. */ /* Support sysdeps/x86_64/multiarch/memcpy_chk.S. */ IFUNC_IMPL (i, name, __memcpy_chk, IFUNC_IMPL_ADD (array, i, __memcpy_chk, HAS_SSSE3, @@ -240,6 +245,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_IMPL_ADD (array, i, mempcpy, HAS_SSSE3, __mempcpy_ssse3) IFUNC_IMPL_ADD (array, i, mempcpy, 1, __mempcpy_sse2)) +#endif /* Support sysdeps/x86_64/multiarch/strncmp.S. */ IFUNC_IMPL (i, name, strncmp, diff --git a/sysdeps/x86_64/multiarch/memcmp-sse4.S b/sysdeps/x86_64/multiarch/memcmp-sse4.S index 1ed4200f4c..b5c6675d31 100644 --- a/sysdeps/x86_64/multiarch/memcmp-sse4.S +++ b/sysdeps/x86_64/multiarch/memcmp-sse4.S @@ -48,6 +48,13 @@ ENTRY (MEMCMP) # ifdef USE_AS_WMEMCMP shl $2, %rdx # endif +# ifdef __CHKP__ + testq %rdx, %rdx + jz L(NoEntryCheck) + bndcl (%rdi), %bnd0 + bndcl (%rsi), %bnd1 +L(NoEntryCheck): +# endif pxor %xmm0, %xmm0 cmp $79, %rdx ja L(79bytesormore) @@ -70,6 +77,10 @@ L(firstbyte): ALIGN (4) L(79bytesormore): +# ifdef __CHKP__ + bndcu (%rdi), %bnd0 + bndcu (%rsi), %bnd1 +# endif movdqu (%rsi), %xmm1 movdqu (%rdi), %xmm2 pxor %xmm1, %xmm2 @@ -90,21 +101,37 @@ L(79bytesormore): L(less128bytes): sub $64, %rdx +# ifdef __CHKP__ + bndcu (%rdi), %bnd0 + bndcu (%rsi), %bnd1 +# endif movdqu (%rdi), %xmm2 pxor (%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(16bytesin256) +# ifdef __CHKP__ + bndcu 16(%rdi), %bnd0 + bndcu 16(%rsi), %bnd1 +# endif movdqu 16(%rdi), %xmm2 pxor 16(%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(32bytesin256) +# ifdef __CHKP__ + bndcu 32(%rdi), %bnd0 + bndcu 32(%rsi), %bnd1 +# endif movdqu 32(%rdi), %xmm2 pxor 32(%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(48bytesin256) +# ifdef __CHKP__ + bndcu 48(%rdi), %bnd0 + bndcu 48(%rsi), %bnd1 +# endif movdqu 48(%rdi), %xmm2 pxor 48(%rsi), %xmm2 ptest %xmm2, %xmm0 @@ -112,11 +139,19 @@ L(less128bytes): cmp $32, %rdx jb L(less32bytesin64) +# ifdef __CHKP__ + bndcu 64(%rdi), %bnd0 + bndcu 64(%rsi), %bnd1 +# endif movdqu 64(%rdi), %xmm2 pxor 64(%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(80bytesin256) +# ifdef __CHKP__ + bndcu 80(%rdi), %bnd0 + bndcu 80(%rsi), %bnd1 +# endif movdqu 80(%rdi), %xmm2 pxor 80(%rsi), %xmm2 ptest %xmm2, %xmm0 @@ -139,41 +174,73 @@ L(128bytesormore): L(less256bytes): sub $128, %rdx +# ifdef __CHKP__ + bndcu (%rdi), %bnd0 + bndcu (%rsi), %bnd1 +# endif movdqu (%rdi), %xmm2 pxor (%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(16bytesin256) +# ifdef __CHKP__ + bndcu 16(%rdi), %bnd0 + bndcu 16(%rsi), %bnd1 +# endif movdqu 16(%rdi), %xmm2 pxor 16(%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(32bytesin256) +# ifdef __CHKP__ + bndcu 32(%rdi), %bnd0 + bndcu 32(%rsi), %bnd1 +# endif movdqu 32(%rdi), %xmm2 pxor 32(%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(48bytesin256) +# ifdef __CHKP__ + bndcu 48(%rdi), %bnd0 + bndcu 48(%rsi), %bnd1 +# endif movdqu 48(%rdi), %xmm2 pxor 48(%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(64bytesin256) +# ifdef __CHKP__ + bndcu 64(%rdi), %bnd0 + bndcu 64(%rsi), %bnd1 +# endif movdqu 64(%rdi), %xmm2 pxor 64(%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(80bytesin256) +# ifdef __CHKP__ + bndcu 80(%rdi), %bnd0 + bndcu 80(%rsi), %bnd1 +# endif movdqu 80(%rdi), %xmm2 pxor 80(%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(96bytesin256) +# ifdef __CHKP__ + bndcu 96(%rdi), %bnd0 + bndcu 96(%rsi), %bnd1 +# endif movdqu 96(%rdi), %xmm2 pxor 96(%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(112bytesin256) +# ifdef __CHKP__ + bndcu 112(%rdi), %bnd0 + bndcu 112(%rsi), %bnd1 +# endif movdqu 112(%rdi), %xmm2 pxor 112(%rsi), %xmm2 ptest %xmm2, %xmm0 @@ -188,11 +255,19 @@ L(less256bytes): cmp $32, %rdx jb L(less32bytesin128) +# ifdef __CHKP__ + bndcu (%rdi), %bnd0 + bndcu (%rsi), %bnd1 +# endif movdqu (%rdi), %xmm2 pxor (%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(16bytesin256) +# ifdef __CHKP__ + bndcu 16(%rdi), %bnd0 + bndcu 16(%rsi), %bnd1 +# endif movdqu 16(%rdi), %xmm2 pxor 16(%rsi), %xmm2 ptest %xmm2, %xmm0 @@ -207,81 +282,145 @@ L(less32bytesin128): L(less512bytes): sub $256, %rdx +# ifdef __CHKP__ + bndcu (%rdi), %bnd0 + bndcu (%rsi), %bnd1 +# endif movdqu (%rdi), %xmm2 pxor (%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(16bytesin256) +# ifdef __CHKP__ + bndcu 16(%rdi), %bnd0 + bndcu 16(%rsi), %bnd1 +# endif movdqu 16(%rdi), %xmm2 pxor 16(%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(32bytesin256) +# ifdef __CHKP__ + bndcu 32(%rdi), %bnd0 + bndcu 32(%rsi), %bnd1 +# endif movdqu 32(%rdi), %xmm2 pxor 32(%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(48bytesin256) +# ifdef __CHKP__ + bndcu 48(%rdi), %bnd0 + bndcu 48(%rsi), %bnd1 +# endif movdqu 48(%rdi), %xmm2 pxor 48(%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(64bytesin256) +# ifdef __CHKP__ + bndcu 64(%rdi), %bnd0 + bndcu 64(%rsi), %bnd1 +# endif movdqu 64(%rdi), %xmm2 pxor 64(%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(80bytesin256) +# ifdef __CHKP__ + bndcu 80(%rdi), %bnd0 + bndcu 80(%rsi), %bnd1 +# endif movdqu 80(%rdi), %xmm2 pxor 80(%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(96bytesin256) +# ifdef __CHKP__ + bndcu 96(%rdi), %bnd0 + bndcu 96(%rsi), %bnd1 +# endif movdqu 96(%rdi), %xmm2 pxor 96(%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(112bytesin256) +# ifdef __CHKP__ + bndcu 112(%rdi), %bnd0 + bndcu 112(%rsi), %bnd1 +# endif movdqu 112(%rdi), %xmm2 pxor 112(%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(128bytesin256) +# ifdef __CHKP__ + bndcu 128(%rdi), %bnd0 + bndcu 128(%rsi), %bnd1 +# endif movdqu 128(%rdi), %xmm2 pxor 128(%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(144bytesin256) +# ifdef __CHKP__ + bndcu 144(%rdi), %bnd0 + bndcu 144(%rsi), %bnd1 +# endif movdqu 144(%rdi), %xmm2 pxor 144(%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(160bytesin256) +# ifdef __CHKP__ + bndcu 160(%rdi), %bnd0 + bndcu 160(%rsi), %bnd1 +# endif movdqu 160(%rdi), %xmm2 pxor 160(%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(176bytesin256) +# ifdef __CHKP__ + bndcu 176(%rdi), %bnd0 + bndcu 176(%rsi), %bnd1 +# endif movdqu 176(%rdi), %xmm2 pxor 176(%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(192bytesin256) +# ifdef __CHKP__ + bndcu 192(%rdi), %bnd0 + bndcu 192(%rsi), %bnd1 +# endif movdqu 192(%rdi), %xmm2 pxor 192(%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(208bytesin256) +# ifdef __CHKP__ + bndcu 208(%rdi), %bnd0 + bndcu 208(%rsi), %bnd1 +# endif movdqu 208(%rdi), %xmm2 pxor 208(%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(224bytesin256) +# ifdef __CHKP__ + bndcu 224(%rdi), %bnd0 + bndcu 224(%rsi), %bnd1 +# endif movdqu 224(%rdi), %xmm2 pxor 224(%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(240bytesin256) +# ifdef __CHKP__ + bndcu 240(%rdi), %bnd0 + bndcu 240(%rsi), %bnd1 +# endif movdqu 240(%rdi), %xmm2 pxor 240(%rsi), %xmm2 ptest %xmm2, %xmm0 @@ -299,11 +438,19 @@ L(less512bytes): cmp $32, %rdx jb L(less32bytesin256) +# ifdef __CHKP__ + bndcu (%rdi), %bnd0 + bndcu (%rsi), %bnd1 +# endif movdqu (%rdi), %xmm2 pxor (%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(16bytesin256) +# ifdef __CHKP__ + bndcu 16(%rdi), %bnd0 + bndcu 16(%rsi), %bnd1 +# endif movdqu 16(%rdi), %xmm2 pxor 16(%rsi), %xmm2 ptest %xmm2, %xmm0 @@ -331,18 +478,34 @@ L(512bytesormore): sub $64, %rdx ALIGN (4) L(64bytesormore_loop): +# ifdef __CHKP__ + bndcu (%rdi), %bnd0 + bndcu (%rsi), %bnd1 +# endif movdqu (%rdi), %xmm2 pxor (%rsi), %xmm2 movdqa %xmm2, %xmm1 +# ifdef __CHKP__ + bndcu 16(%rdi), %bnd0 + bndcu 16(%rsi), %bnd1 +# endif movdqu 16(%rdi), %xmm3 pxor 16(%rsi), %xmm3 por %xmm3, %xmm1 +# ifdef __CHKP__ + bndcu 32(%rdi), %bnd0 + bndcu 32(%rsi), %bnd1 +# endif movdqu 32(%rdi), %xmm4 pxor 32(%rsi), %xmm4 por %xmm4, %xmm1 +# ifdef __CHKP__ + bndcu 48(%rdi), %bnd0 + bndcu 48(%rsi), %bnd1 +# endif movdqu 48(%rdi), %xmm5 pxor 48(%rsi), %xmm5 por %xmm5, %xmm1 @@ -365,18 +528,34 @@ L(L2_L3_cache_unaglined): L(L2_L3_unaligned_128bytes_loop): prefetchnta 0x1c0(%rdi) prefetchnta 0x1c0(%rsi) +# ifdef __CHKP__ + bndcu (%rdi), %bnd0 + bndcu (%rsi), %bnd1 +# endif movdqu (%rdi), %xmm2 pxor (%rsi), %xmm2 movdqa %xmm2, %xmm1 +# ifdef __CHKP__ + bndcu 16(%rdi), %bnd0 + bndcu 16(%rsi), %bnd1 +# endif movdqu 16(%rdi), %xmm3 pxor 16(%rsi), %xmm3 por %xmm3, %xmm1 +# ifdef __CHKP__ + bndcu 32(%rdi), %bnd0 + bndcu 32(%rsi), %bnd1 +# endif movdqu 32(%rdi), %xmm4 pxor 32(%rsi), %xmm4 por %xmm4, %xmm1 +# ifdef __CHKP__ + bndcu 48(%rdi), %bnd0 + bndcu 48(%rsi), %bnd1 +# endif movdqu 48(%rdi), %xmm5 pxor 48(%rsi), %xmm5 por %xmm5, %xmm1 @@ -403,21 +582,37 @@ L(2aligned): L(less128bytesin2aligned): sub $64, %rdx +# ifdef __CHKP__ + bndcu (%rdi), %bnd0 + bndcu (%rsi), %bnd1 +# endif movdqa (%rdi), %xmm2 pxor (%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(16bytesin256) +# ifdef __CHKP__ + bndcu 16(%rdi), %bnd0 + bndcu 16(%rsi), %bnd1 +# endif movdqa 16(%rdi), %xmm2 pxor 16(%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(32bytesin256) +# ifdef __CHKP__ + bndcu 32(%rdi), %bnd0 + bndcu 32(%rsi), %bnd1 +# endif movdqa 32(%rdi), %xmm2 pxor 32(%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(48bytesin256) +# ifdef __CHKP__ + bndcu 48(%rdi), %bnd0 + bndcu 48(%rsi), %bnd1 +# endif movdqa 48(%rdi), %xmm2 pxor 48(%rsi), %xmm2 ptest %xmm2, %xmm0 @@ -425,11 +620,19 @@ L(less128bytesin2aligned): cmp $32, %rdx jb L(less32bytesin64in2alinged) +# ifdef __CHKP__ + bndcu 64(%rdi), %bnd0 + bndcu 64(%rsi), %bnd1 +# endif movdqa 64(%rdi), %xmm2 pxor 64(%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(80bytesin256) +# ifdef __CHKP__ + bndcu 80(%rdi), %bnd0 + bndcu 80(%rsi), %bnd1 +# endif movdqa 80(%rdi), %xmm2 pxor 80(%rsi), %xmm2 ptest %xmm2, %xmm0 @@ -453,41 +656,73 @@ L(128bytesormorein2aligned): L(less256bytesin2alinged): sub $128, %rdx +# ifdef __CHKP__ + bndcu (%rdi), %bnd0 + bndcu (%rsi), %bnd1 +# endif movdqa (%rdi), %xmm2 pxor (%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(16bytesin256) +# ifdef __CHKP__ + bndcu 16(%rdi), %bnd0 + bndcu 16(%rsi), %bnd1 +# endif movdqa 16(%rdi), %xmm2 pxor 16(%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(32bytesin256) +# ifdef __CHKP__ + bndcu 32(%rdi), %bnd0 + bndcu 32(%rsi), %bnd1 +# endif movdqa 32(%rdi), %xmm2 pxor 32(%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(48bytesin256) +# ifdef __CHKP__ + bndcu 48(%rdi), %bnd0 + bndcu 48(%rsi), %bnd1 +# endif movdqa 48(%rdi), %xmm2 pxor 48(%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(64bytesin256) +# ifdef __CHKP__ + bndcu 64(%rdi), %bnd0 + bndcu 64(%rsi), %bnd1 +# endif movdqa 64(%rdi), %xmm2 pxor 64(%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(80bytesin256) +# ifdef __CHKP__ + bndcu 80(%rdi), %bnd0 + bndcu 80(%rsi), %bnd1 +# endif movdqa 80(%rdi), %xmm2 pxor 80(%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(96bytesin256) +# ifdef __CHKP__ + bndcu 96(%rdi), %bnd0 + bndcu 96(%rsi), %bnd1 +# endif movdqa 96(%rdi), %xmm2 pxor 96(%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(112bytesin256) +# ifdef __CHKP__ + bndcu 112(%rdi), %bnd0 + bndcu 112(%rsi), %bnd1 +# endif movdqa 112(%rdi), %xmm2 pxor 112(%rsi), %xmm2 ptest %xmm2, %xmm0 @@ -502,11 +737,19 @@ L(less256bytesin2alinged): cmp $32, %rdx jb L(less32bytesin128in2aligned) +# ifdef __CHKP__ + bndcu (%rdi), %bnd0 + bndcu (%rsi), %bnd1 +# endif movdqu (%rdi), %xmm2 pxor (%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(16bytesin256) +# ifdef __CHKP__ + bndcu 16(%rdi), %bnd0 + bndcu 16(%rsi), %bnd1 +# endif movdqu 16(%rdi), %xmm2 pxor 16(%rsi), %xmm2 ptest %xmm2, %xmm0 @@ -523,81 +766,145 @@ L(less32bytesin128in2aligned): L(256bytesormorein2aligned): sub $256, %rdx +# ifdef __CHKP__ + bndcu (%rdi), %bnd0 + bndcu (%rsi), %bnd1 +# endif movdqa (%rdi), %xmm2 pxor (%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(16bytesin256) +# ifdef __CHKP__ + bndcu 16(%rdi), %bnd0 + bndcu 16(%rsi), %bnd1 +# endif movdqa 16(%rdi), %xmm2 pxor 16(%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(32bytesin256) +# ifdef __CHKP__ + bndcu 32(%rdi), %bnd0 + bndcu 32(%rsi), %bnd1 +# endif movdqa 32(%rdi), %xmm2 pxor 32(%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(48bytesin256) +# ifdef __CHKP__ + bndcu 48(%rdi), %bnd0 + bndcu 48(%rsi), %bnd1 +# endif movdqa 48(%rdi), %xmm2 pxor 48(%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(64bytesin256) +# ifdef __CHKP__ + bndcu 64(%rdi), %bnd0 + bndcu 64(%rsi), %bnd1 +# endif movdqa 64(%rdi), %xmm2 pxor 64(%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(80bytesin256) +# ifdef __CHKP__ + bndcu 80(%rdi), %bnd0 + bndcu 80(%rsi), %bnd1 +# endif movdqa 80(%rdi), %xmm2 pxor 80(%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(96bytesin256) +# ifdef __CHKP__ + bndcu 96(%rdi), %bnd0 + bndcu 96(%rsi), %bnd1 +# endif movdqa 96(%rdi), %xmm2 pxor 96(%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(112bytesin256) +# ifdef __CHKP__ + bndcu 112(%rdi), %bnd0 + bndcu 112(%rsi), %bnd1 +# endif movdqa 112(%rdi), %xmm2 pxor 112(%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(128bytesin256) +# ifdef __CHKP__ + bndcu 128(%rdi), %bnd0 + bndcu 128(%rsi), %bnd1 +# endif movdqa 128(%rdi), %xmm2 pxor 128(%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(144bytesin256) +# ifdef __CHKP__ + bndcu 144(%rdi), %bnd0 + bndcu 144(%rsi), %bnd1 +# endif movdqa 144(%rdi), %xmm2 pxor 144(%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(160bytesin256) +# ifdef __CHKP__ + bndcu 160(%rdi), %bnd0 + bndcu 160(%rsi), %bnd1 +# endif movdqa 160(%rdi), %xmm2 pxor 160(%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(176bytesin256) +# ifdef __CHKP__ + bndcu 176(%rdi), %bnd0 + bndcu 176(%rsi), %bnd1 +# endif movdqa 176(%rdi), %xmm2 pxor 176(%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(192bytesin256) +# ifdef __CHKP__ + bndcu 192(%rdi), %bnd0 + bndcu 192(%rsi), %bnd1 +# endif movdqa 192(%rdi), %xmm2 pxor 192(%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(208bytesin256) +# ifdef __CHKP__ + bndcu 208(%rdi), %bnd0 + bndcu 208(%rsi), %bnd1 +# endif movdqa 208(%rdi), %xmm2 pxor 208(%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(224bytesin256) +# ifdef __CHKP__ + bndcu 224(%rdi), %bnd0 + bndcu 224(%rsi), %bnd1 +# endif movdqa 224(%rdi), %xmm2 pxor 224(%rsi), %xmm2 ptest %xmm2, %xmm0 jnc L(240bytesin256) +# ifdef __CHKP__ + bndcu 240(%rdi), %bnd0 + bndcu 240(%rsi), %bnd1 +# endif movdqa 240(%rdi), %xmm2 pxor 240(%rsi), %xmm2 ptest %xmm2, %xmm0 @@ -648,18 +955,34 @@ L(512bytesormorein2aligned): sub $64, %rdx ALIGN (4) L(64bytesormore_loopin2aligned): +# ifdef __CHKP__ + bndcu (%rdi), %bnd0 + bndcu (%rsi), %bnd1 +# endif movdqa (%rdi), %xmm2 pxor (%rsi), %xmm2 movdqa %xmm2, %xmm1 +# ifdef __CHKP__ + bndcu 16(%rdi), %bnd0 + bndcu 16(%rsi), %bnd1 +# endif movdqa 16(%rdi), %xmm3 pxor 16(%rsi), %xmm3 por %xmm3, %xmm1 +# ifdef __CHKP__ + bndcu 32(%rdi), %bnd0 + bndcu 32(%rsi), %bnd1 +# endif movdqa 32(%rdi), %xmm4 pxor 32(%rsi), %xmm4 por %xmm4, %xmm1 +# ifdef __CHKP__ + bndcu 48(%rdi), %bnd0 + bndcu 48(%rsi), %bnd1 +# endif movdqa 48(%rdi), %xmm5 pxor 48(%rsi), %xmm5 por %xmm5, %xmm1 @@ -682,18 +1005,34 @@ L(L2_L3_cache_aglined): L(L2_L3_aligned_128bytes_loop): prefetchnta 0x1c0(%rdi) prefetchnta 0x1c0(%rsi) +# ifdef __CHKP__ + bndcu (%rdi), %bnd0 + bndcu (%rsi), %bnd1 +# endif movdqa (%rdi), %xmm2 pxor (%rsi), %xmm2 movdqa %xmm2, %xmm1 +# ifdef __CHKP__ + bndcu 16(%rdi), %bnd0 + bndcu 16(%rsi), %bnd1 +# endif movdqa 16(%rdi), %xmm3 pxor 16(%rsi), %xmm3 por %xmm3, %xmm1 +# ifdef __CHKP__ + bndcu 32(%rdi), %bnd0 + bndcu 32(%rsi), %bnd1 +# endif movdqa 32(%rdi), %xmm4 pxor 32(%rsi), %xmm4 por %xmm4, %xmm1 +# ifdef __CHKP__ + bndcu 48(%rdi), %bnd0 + bndcu 48(%rsi), %bnd1 +# endif movdqa 48(%rdi), %xmm5 pxor 48(%rsi), %xmm5 por %xmm5, %xmm1 diff --git a/sysdeps/x86_64/multiarch/memcpy-c.c b/sysdeps/x86_64/multiarch/memcpy-c.c new file mode 100644 index 0000000000..6fa50eada1 --- /dev/null +++ b/sysdeps/x86_64/multiarch/memcpy-c.c @@ -0,0 +1,80 @@ +/* C-version of memcpy for using when Intel MPX is on + in order to prosess with a buffer of pointers correctly. + Copyright (C) 2013 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <stddef.h> + +void * +__memcpy (void *dst, const void *src, size_t n) +{ + if (!n) return dst; + + __bnd_chk_ptr_lbounds(dst); + __bnd_chk_ptr_ubounds(dst+n-1); +#ifndef __CHKWR__ + __bnd_chk_ptr_lbounds(src); + __bnd_chk_ptr_ubounds(src+n-1); +#endif + + return chkp_memcpy_nochk(dst, src, n); +} + +void * +chkp_memcpy_nochk (void *dst, const void *src, size_t n) +{ + const char *s = src; + char *d = dst; + void *ret = dst; + size_t offset_src = ((size_t) s) & (sizeof(size_t) - 1); + size_t offset_dst = ((size_t) d) & (sizeof(size_t) - 1); + + if (offset_src != offset_dst) + { + while (n--) + *d++ = *s++; + } + else + { + if (offset_src) offset_src = sizeof(size_t) - offset_src; + while (n-- && offset_src--) + *d++ = *s++; + n++; + if (!n) return ret; + void **d1 = (void **)d; + void **s1 = (void **)s; + while (n >= sizeof(void *)) + { + n -= sizeof(void *); + *d1++ = *s1++; + } + s = (char *)s1; + d = (char *)d1; + while (n--) + *d++ = *s++; + } + return ret; +} + +weak_alias (__memcpy, __GI_memcpy) + +# if defined SHARED && !defined NOT_IN_libc && !defined IA32 +# include <shlib-compat.h> +versioned_symbol (libc, __memcpy, memcpy, GLIBC_2_14); +# else +weak_alias (__memcpy, memcpy) +# endif diff --git a/sysdeps/x86_64/multiarch/memcpy-ssse3-back-1.S b/sysdeps/x86_64/multiarch/memcpy-ssse3-back-1.S new file mode 100644 index 0000000000..7fedbeef8e --- /dev/null +++ b/sysdeps/x86_64/multiarch/memcpy-ssse3-back-1.S @@ -0,0 +1,5 @@ +/* optimized version of memcpy without any checks or copying bounds. */ +#define MEMCPY chkp_memcpy_nobnd_nochk +#undef __CHKP__ +#undef __CHKWR__ +#include "memcpy-ssse3-back.S" diff --git a/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S b/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S index fc9fcef27d..16b4e680a1 100644 --- a/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S +++ b/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S @@ -27,7 +27,11 @@ #include "asm-syntax.h" #ifndef MEMCPY -# define MEMCPY __memcpy_ssse3_back +# if defined __CHKP__ || defined __CHKWR__ +# define MEMCPY chkp_memcpy_nobnd +# else +# define MEMCPY __memcpy_ssse3_back +# endif # define MEMCPY_CHK __memcpy_chk_ssse3_back #endif @@ -48,7 +52,7 @@ ud2 .section .text.ssse3,"ax",@progbits -#if !defined USE_AS_BCOPY +#if !defined USE_AS_BCOPY && defined MEMCPY_CHK ENTRY (MEMCPY_CHK) cmpq %rdx, %rcx jb HIDDEN_JUMPTARGET (__chk_fail) @@ -56,6 +60,15 @@ END (MEMCPY_CHK) #endif ENTRY (MEMCPY) +#ifdef __CHKP__ + testq %rdx, %rdx + jz L(NoEntryCheck) + bndcl (%rdi), %bnd0 + bndcu -1(%rdi, %rdx), %bnd0 + bndcl (%rsi), %bnd1 + bndcu -1(%rsi, %rdx), %bnd1 +#endif + mov %rdi, %rax #ifdef USE_AS_MEMPCPY add %rdx, %rax @@ -87,6 +100,15 @@ L(bk_write): BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4) #endif +#ifdef __CHKP__ +L(NoEntryCheck): + mov %rdi, %rax +# ifdef USE_AS_MEMPCPY + add %rdx, %rax +# endif + ret +#endif + ALIGN (4) L(144bytesormore): diff --git a/sysdeps/x86_64/multiarch/memcpy.S b/sysdeps/x86_64/multiarch/memcpy.S index a1e5031376..fc5ab2da03 100644 --- a/sysdeps/x86_64/multiarch/memcpy.S +++ b/sysdeps/x86_64/multiarch/memcpy.S @@ -18,14 +18,15 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ -#include <sysdep.h> -#include <shlib-compat.h> -#include <init-arch.h> +#if !defined __CHKP__ && !defined __CHKWR__ +# include <sysdep.h> +# include <shlib-compat.h> +# include <init-arch.h> /* Define multiple versions only for the definition in lib and for DSO. In static binaries we need memcpy before the initialization happened. */ -#if defined SHARED && !defined NOT_IN_libc +# if defined SHARED && !defined NOT_IN_libc .text ENTRY(__new_memcpy) .type __new_memcpy, @gnu_indirect_function @@ -43,37 +44,39 @@ ENTRY(__new_memcpy) 3: ret END(__new_memcpy) -# undef ENTRY -# define ENTRY(name) \ +# undef ENTRY +# define ENTRY(name) \ .type __memcpy_sse2, @function; \ .globl __memcpy_sse2; \ .hidden __memcpy_sse2; \ .p2align 4; \ __memcpy_sse2: cfi_startproc; \ CALL_MCOUNT -# undef END -# define END(name) \ +# undef END +# define END(name) \ cfi_endproc; .size __memcpy_sse2, .-__memcpy_sse2 -# undef ENTRY_CHK -# define ENTRY_CHK(name) \ +# undef ENTRY_CHK +# define ENTRY_CHK(name) \ .type __memcpy_chk_sse2, @function; \ .globl __memcpy_chk_sse2; \ .p2align 4; \ __memcpy_chk_sse2: cfi_startproc; \ CALL_MCOUNT -# undef END_CHK -# define END_CHK(name) \ +# undef END_CHK +# define END_CHK(name) \ cfi_endproc; .size __memcpy_chk_sse2, .-__memcpy_chk_sse2 -# undef libc_hidden_builtin_def +# undef libc_hidden_builtin_def /* It doesn't make sense to send libc-internal memcpy calls through a PLT. The speedup we get from using SSSE3 instruction is likely eaten away by the indirect call in the PLT. */ -# define libc_hidden_builtin_def(name) \ +# define libc_hidden_builtin_def(name) \ .globl __GI_memcpy; __GI_memcpy = __memcpy_sse2 versioned_symbol (libc, __new_memcpy, memcpy, GLIBC_2_14); -#endif +# endif + +# include "../memcpy.S" -#include "../memcpy.S" +#endif diff --git a/sysdeps/x86_64/multiarch/memcpy_chk-c.c b/sysdeps/x86_64/multiarch/memcpy_chk-c.c new file mode 100644 index 0000000000..1eee86c639 --- /dev/null +++ b/sysdeps/x86_64/multiarch/memcpy_chk-c.c @@ -0,0 +1 @@ +#include <debug/memcpy_chk.c> diff --git a/sysdeps/x86_64/multiarch/memcpy_chk.S b/sysdeps/x86_64/multiarch/memcpy_chk.S index ad01d8cd9f..6f87f2686d 100644 --- a/sysdeps/x86_64/multiarch/memcpy_chk.S +++ b/sysdeps/x86_64/multiarch/memcpy_chk.S @@ -18,14 +18,15 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ -#include <sysdep.h> -#include <init-arch.h> +#if !defined __CHKP__ && !defined __CHKWR__ +# include <sysdep.h> +# include <init-arch.h> /* Define multiple versions only for the definition in lib and for DSO. There are no multiarch memcpy functions for static binaries. */ -#ifndef NOT_IN_libc -# ifdef SHARED +# ifndef NOT_IN_libc +# ifdef SHARED .text ENTRY(__memcpy_chk) .type __memcpy_chk, @gnu_indirect_function @@ -41,7 +42,8 @@ ENTRY(__memcpy_chk) leaq __memcpy_chk_ssse3_back(%rip), %rax 2: ret END(__memcpy_chk) -# else -# include "../memcpy_chk.S" +# else +# include "../memcpy_chk.S" +# endif # endif #endif diff --git a/sysdeps/x86_64/multiarch/memmove-c.c b/sysdeps/x86_64/multiarch/memmove-c.c new file mode 100644 index 0000000000..7111128e75 --- /dev/null +++ b/sysdeps/x86_64/multiarch/memmove-c.c @@ -0,0 +1,118 @@ +/* C-version of memmove for using when Intel MPX is enabled + in order to prosess with a buffer of pointers correctly. + Copyright (C) 2013 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <stddef.h> + +void * +__memmove (void *dst, const void *src, size_t n) +{ + if (n == 0) return dst; + + __bnd_chk_ptr_lbounds(dst); + __bnd_chk_ptr_ubounds(dst+n-1); +#ifndef __CHKWR__ + __bnd_chk_ptr_lbounds(src); + __bnd_chk_ptr_ubounds(src+n-1); +#endif + return chkp_memmove_nochk(dst, src, n); +} + + +void * +chkp_memmove_nochk (void *dst, const void *src, size_t n) +{ + const char *s = src; + char *d = dst; + void *ret = dst; + size_t offset_src = ((size_t) s) & (sizeof(size_t) - 1); + size_t offset_dst = ((size_t) d) & (sizeof(size_t) - 1); + + if (offset_src != offset_dst) + { + if (s < d) + { + /* backward copying */ + d += n; + s += n; + while (n--) + *--d = *--s; + } + else + /* forward copying */ + while (n--) + *d++ = *s++; + } + else + { + if (s < d) + { + offset_src = (offset_src + (size_t)src) & (sizeof(size_t) - 1); + /* backward copying */ + d += n; + s += n; + while (n-- && offset_src--) + *--d = *--s; + n++; + if (!n) return ret; + void **d1 = (void **)d; + void **s1 = (void **)s; + while (n >= sizeof(void *)) + { + n -= sizeof(void *); + *--d1 = *--s1; + } + s = (char *)s1; + d = (char *)d1; + while (n--) + *--d = *--s; + } + else + { + if (offset_src) offset_src = sizeof(size_t) - offset_src; + /* forward copying */ + while (n-- && offset_src--) + *d++ = *s++; + n++; + if (!n) return ret; + void **d1 = (void **)d; + void **s1 = (void **)s; + while (n >= sizeof(void *)) + { + n -= sizeof(void *); + *d1++ = *s1++; + } + s = (char *)s1; + d = (char *)d1; + while (n--) + *d++ = *s++; + } + } + return ret; +} + +weak_alias (__memmove, __libc_memmove) +weak_alias (__memmove, __GI_memmove) +weak_alias (__memmove, memmove) + +# if defined SHARED && !defined NOT_IN_libc +# include <shlib-compat.h> +# if SHLIB_COMPAT (libc, GLIBC_2_2_5, GLIBC_2_14) +compat_symbol (libc, memmove, memcpy, GLIBC_2_2_5); +# endif +# endif diff --git a/sysdeps/x86_64/multiarch/memmove-ssse3-back-1.S b/sysdeps/x86_64/multiarch/memmove-ssse3-back-1.S new file mode 100644 index 0000000000..2a1f3e67b7 --- /dev/null +++ b/sysdeps/x86_64/multiarch/memmove-ssse3-back-1.S @@ -0,0 +1,6 @@ +/* optimized version of memmove without any checks or copying bounds. */ +#define USE_AS_MEMMOVE +#define MEMCPY chkp_memmove_nobnd_nochk +#undef __CHKP__ +#undef __CHKWR__ +#include "memcpy-ssse3-back.S" diff --git a/sysdeps/x86_64/multiarch/memmove-ssse3-back.S b/sysdeps/x86_64/multiarch/memmove-ssse3-back.S index f9a4e9aff9..478141b14a 100644 --- a/sysdeps/x86_64/multiarch/memmove-ssse3-back.S +++ b/sysdeps/x86_64/multiarch/memmove-ssse3-back.S @@ -1,4 +1,10 @@ #define USE_AS_MEMMOVE -#define MEMCPY __memmove_ssse3_back +#if defined __CHKP__ || defined __CHKWR__ +/* version of memmove with no copying of bounds support + if there are pointers in the source buffer. */ +# define MEMCPY chkp_memmove_nobnd +# else +# define MEMCPY __memmove_ssse3_back +#endif #define MEMCPY_CHK __memmove_chk_ssse3_back #include "memcpy-ssse3-back.S" diff --git a/sysdeps/x86_64/multiarch/memmove.c b/sysdeps/x86_64/multiarch/memmove.c index 8149c487d5..0d2c6f0266 100644 --- a/sysdeps/x86_64/multiarch/memmove.c +++ b/sysdeps/x86_64/multiarch/memmove.c @@ -17,31 +17,32 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ -#ifndef NOT_IN_libc -# define MEMMOVE __memmove_sse2 -# ifdef SHARED -# undef libc_hidden_builtin_def -# define libc_hidden_builtin_def(name) \ +#ifndef __CHKP__ +# ifndef NOT_IN_libc +# define MEMMOVE __memmove_sse2 +# ifdef SHARED +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) \ __hidden_ver1 (__memmove_sse2, __GI_memmove, __memmove_sse2); -# endif +# endif /* Redefine memmove so that the compiler won't complain about the type mismatch with the IFUNC selector in strong_alias, below. */ -# undef memmove -# define memmove __redirect_memmove -# include <string.h> -# undef memmove +# undef memmove +# define memmove __redirect_memmove +# include <string.h> +# undef memmove extern __typeof (__redirect_memmove) __memmove_sse2 attribute_hidden; extern __typeof (__redirect_memmove) __memmove_ssse3 attribute_hidden; extern __typeof (__redirect_memmove) __memmove_ssse3_back attribute_hidden; -#endif +# endif -#include "string/memmove.c" +# include "string/memmove.c" -#ifndef NOT_IN_libc -# include <shlib-compat.h> -# include "init-arch.h" +# ifndef NOT_IN_libc +# include <shlib-compat.h> +# include "init-arch.h" /* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle ifunc symbol properly. */ @@ -54,7 +55,8 @@ libc_ifunc (__libc_memmove, strong_alias (__libc_memmove, memmove) -# if SHLIB_COMPAT (libc, GLIBC_2_2_5, GLIBC_2_14) +# if SHLIB_COMPAT (libc, GLIBC_2_2_5, GLIBC_2_14) compat_symbol (libc, memmove, memcpy, GLIBC_2_2_5); +# endif # endif #endif diff --git a/sysdeps/x86_64/multiarch/memmove_chk-c.c b/sysdeps/x86_64/multiarch/memmove_chk-c.c new file mode 100644 index 0000000000..bbf53d00d3 --- /dev/null +++ b/sysdeps/x86_64/multiarch/memmove_chk-c.c @@ -0,0 +1 @@ +#include <debug/memmove_chk.c> diff --git a/sysdeps/x86_64/multiarch/memmove_chk.c b/sysdeps/x86_64/multiarch/memmove_chk.c index 17ed460324..c1b0b9304b 100644 --- a/sysdeps/x86_64/multiarch/memmove_chk.c +++ b/sysdeps/x86_64/multiarch/memmove_chk.c @@ -17,19 +17,21 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ -#include <string.h> -#include "init-arch.h" +#ifndef __CHKP__ +# include <string.h> +# include "init-arch.h" -#define MEMMOVE_CHK __memmove_chk_sse2 +# define MEMMOVE_CHK __memmove_chk_sse2 extern __typeof (__memmove_chk) __memmove_chk_sse2 attribute_hidden; extern __typeof (__memmove_chk) __memmove_chk_ssse3 attribute_hidden; extern __typeof (__memmove_chk) __memmove_chk_ssse3_back attribute_hidden; -#include "debug/memmove_chk.c" +# include "debug/memmove_chk.c" libc_ifunc (__memmove_chk, HAS_SSSE3 ? (HAS_FAST_COPY_BACKWARD ? __memmove_chk_ssse3_back : __memmove_chk_ssse3) : __memmove_chk_sse2); +#endif diff --git a/sysdeps/x86_64/multiarch/mempcpy-c.c b/sysdeps/x86_64/multiarch/mempcpy-c.c new file mode 100644 index 0000000000..522fb86e3e --- /dev/null +++ b/sysdeps/x86_64/multiarch/mempcpy-c.c @@ -0,0 +1,36 @@ +/* C-version of mempcpy for using when Intel MPX is enabled + in order to process with an array of pointers correctly. + Copyright (C) 2013 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <stddef.h> + +void * +mempcpy (void *dst, const void *src, size_t n) +{ + return memcpy(dst, src, n) + n; +} + +void * +chkp_mempcpy_nochk (void *dst, const void *src, size_t n) +{ + return chkp_memcpy_nochk(dst, src, n) + n; +} + +weak_alias (mempcpy, __GI_mempcpy) +weak_alias (mempcpy, __GI___mempcpy) +weak_alias (mempcpy, __mempcpy) diff --git a/sysdeps/x86_64/multiarch/mempcpy-ssse3-back-1.S b/sysdeps/x86_64/multiarch/mempcpy-ssse3-back-1.S new file mode 100644 index 0000000000..eb929f4182 --- /dev/null +++ b/sysdeps/x86_64/multiarch/mempcpy-ssse3-back-1.S @@ -0,0 +1,6 @@ +/* optimized version of mempcpy without any checks or copying bounds. */ +#define USE_AS_MEMPCPY +#define MEMCPY chkp_mempcpy_nobnd_nochk +#undef __CHKP__ +#undef __CHKWR__ +#include "memcpy-ssse3-back.S" diff --git a/sysdeps/x86_64/multiarch/mempcpy-ssse3-back.S b/sysdeps/x86_64/multiarch/mempcpy-ssse3-back.S index 82ffacb8fb..f32ecfc76e 100644 --- a/sysdeps/x86_64/multiarch/mempcpy-ssse3-back.S +++ b/sysdeps/x86_64/multiarch/mempcpy-ssse3-back.S @@ -1,4 +1,12 @@ #define USE_AS_MEMPCPY -#define MEMCPY __mempcpy_ssse3_back -#define MEMCPY_CHK __mempcpy_chk_ssse3_back + +#if defined __CHKP__ || defined __CHKWR__ +/* version of mempcpy with no copying of bounds support + if there are pointers in the source buffer. */ +# define MEMCPY chkp_mempcpy_nobnd +#else +# define MEMCPY __mempcpy_ssse3_back +#endif + +#define MEMCPY_CHK __mempcpy_chk_ssse3_back #include "memcpy-ssse3-back.S" diff --git a/sysdeps/x86_64/multiarch/mempcpy.S b/sysdeps/x86_64/multiarch/mempcpy.S index b8b7fcd121..4ec5825989 100644 --- a/sysdeps/x86_64/multiarch/mempcpy.S +++ b/sysdeps/x86_64/multiarch/mempcpy.S @@ -18,13 +18,14 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ -#include <sysdep.h> -#include <init-arch.h> +#if !defined __CHKP__ && !defined __CHKWR__ +# include <sysdep.h> +# include <init-arch.h> /* Define multiple versions only for the definition in lib and for DSO. In static binaries we need mempcpy before the initialization happened. */ -#if defined SHARED && !defined NOT_IN_libc +# if defined SHARED && !defined NOT_IN_libc ENTRY(__mempcpy) .type __mempcpy, @gnu_indirect_function cmpl $0, KIND_OFFSET+__cpu_features(%rip) @@ -40,38 +41,40 @@ ENTRY(__mempcpy) 2: ret END(__mempcpy) -# undef ENTRY -# define ENTRY(name) \ +# undef ENTRY +# define ENTRY(name) \ .type __mempcpy_sse2, @function; \ .p2align 4; \ .globl __mempcpy_sse2; \ .hidden __mempcpy_sse2; \ __mempcpy_sse2: cfi_startproc; \ CALL_MCOUNT -# undef END -# define END(name) \ +# undef END +# define END(name) \ cfi_endproc; .size __mempcpy_sse2, .-__mempcpy_sse2 -# undef ENTRY_CHK -# define ENTRY_CHK(name) \ +# undef ENTRY_CHK +# define ENTRY_CHK(name) \ .type __mempcpy_chk_sse2, @function; \ .globl __mempcpy_chk_sse2; \ .p2align 4; \ __mempcpy_chk_sse2: cfi_startproc; \ CALL_MCOUNT -# undef END_CHK -# define END_CHK(name) \ +# undef END_CHK +# define END_CHK(name) \ cfi_endproc; .size __mempcpy_chk_sse2, .-__mempcpy_chk_sse2 -# undef libc_hidden_def -# undef libc_hidden_builtin_def +# undef libc_hidden_def +# undef libc_hidden_builtin_def /* It doesn't make sense to send libc-internal mempcpy calls through a PLT. The speedup we get from using SSSE3 instruction is likely eaten away by the indirect call in the PLT. */ -# define libc_hidden_def(name) \ +# define libc_hidden_def(name) \ .globl __GI_mempcpy; __GI_mempcpy = __mempcpy_sse2 -# define libc_hidden_builtin_def(name) \ +# define libc_hidden_builtin_def(name) \ .globl __GI___mempcpy; __GI___mempcpy = __mempcpy_sse2 -#endif +# endif + +# include "../mempcpy.S" -#include "../mempcpy.S" +#endif diff --git a/sysdeps/x86_64/multiarch/mempcpy_chk-c.c b/sysdeps/x86_64/multiarch/mempcpy_chk-c.c new file mode 100644 index 0000000000..ba170784c3 --- /dev/null +++ b/sysdeps/x86_64/multiarch/mempcpy_chk-c.c @@ -0,0 +1 @@ +#include <debug/mempcpy_chk.c> diff --git a/sysdeps/x86_64/multiarch/mempcpy_chk.S b/sysdeps/x86_64/multiarch/mempcpy_chk.S index 3801db399b..98acf9691c 100644 --- a/sysdeps/x86_64/multiarch/mempcpy_chk.S +++ b/sysdeps/x86_64/multiarch/mempcpy_chk.S @@ -18,14 +18,15 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ -#include <sysdep.h> -#include <init-arch.h> +#if !defined __CHKP__ && !defined __CHKWR__ +# include <sysdep.h> +# include <init-arch.h> /* Define multiple versions only for the definition in lib and for DSO. There are no multiarch mempcpy functions for static binaries. */ -#ifndef NOT_IN_libc -# ifdef SHARED +# ifndef NOT_IN_libc +# ifdef SHARED .text ENTRY(__mempcpy_chk) .type __mempcpy_chk, @gnu_indirect_function @@ -41,7 +42,8 @@ ENTRY(__mempcpy_chk) leaq __mempcpy_chk_ssse3_back(%rip), %rax 2: ret END(__mempcpy_chk) -# else -# include "../mempcpy_chk.S" +# else +# include "../mempcpy_chk.S" +# endif # endif #endif diff --git a/sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S b/sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S index 028c6d3d74..a3535ad500 100644 --- a/sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S +++ b/sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S @@ -25,6 +25,14 @@ # define STRCAT __strcat_sse2_unaligned # endif +# ifdef __CHKP__ +# define RETURN \ + bndcu -1(%rdi, %rax), %bnd0; \ + ret +# else +# define RETURN ret +# endif + # define USE_AS_STRCAT .text @@ -37,6 +45,10 @@ ENTRY (STRCAT) /* Inline corresponding strlen file, temporary until new strcpy implementation gets merged. */ +# ifdef __CHKP__ + bndcl (%rdi), %bnd0 + bndcu (%rdi), %bnd0 +# endif xor %rax, %rax mov %edi, %ecx and $0x3f, %ecx @@ -67,84 +79,132 @@ L(align16_start): pxor %xmm1, %xmm1 pxor %xmm2, %xmm2 pxor %xmm3, %xmm3 +# ifdef __CHKP__ + bndcu 16(%rax), %bnd0 +# endif pcmpeqb 16(%rax), %xmm0 pmovmskb %xmm0, %edx test %edx, %edx jnz L(exit16) +# ifdef __CHKP__ + bndcu 32(%rax), %bnd0 +# endif pcmpeqb 32(%rax), %xmm1 pmovmskb %xmm1, %edx test %edx, %edx jnz L(exit32) +# ifdef __CHKP__ + bndcu 48(%rax), %bnd0 +# endif pcmpeqb 48(%rax), %xmm2 pmovmskb %xmm2, %edx test %edx, %edx jnz L(exit48) +# ifdef __CHKP__ + bndcu 64(%rax), %bnd0 +# endif pcmpeqb 64(%rax), %xmm3 pmovmskb %xmm3, %edx test %edx, %edx jnz L(exit64) +# ifdef __CHKP__ + bndcu 80(%rax), %bnd0 +# endif pcmpeqb 80(%rax), %xmm0 add $64, %rax pmovmskb %xmm0, %edx test %edx, %edx jnz L(exit16) +# ifdef __CHKP__ + bndcu 32(%rax), %bnd0 +# endif pcmpeqb 32(%rax), %xmm1 pmovmskb %xmm1, %edx test %edx, %edx jnz L(exit32) +# ifdef __CHKP__ + bndcu 48(%rax), %bnd0 +# endif pcmpeqb 48(%rax), %xmm2 pmovmskb %xmm2, %edx test %edx, %edx jnz L(exit48) +# ifdef __CHKP__ + bndcu 64(%rax), %bnd0 +# endif pcmpeqb 64(%rax), %xmm3 pmovmskb %xmm3, %edx test %edx, %edx jnz L(exit64) +# ifdef __CHKP__ + bndcu 80(%rax), %bnd0 +# endif pcmpeqb 80(%rax), %xmm0 add $64, %rax pmovmskb %xmm0, %edx test %edx, %edx jnz L(exit16) +# ifdef __CHKP__ + bndcu 32(%rax), %bnd0 +# endif pcmpeqb 32(%rax), %xmm1 pmovmskb %xmm1, %edx test %edx, %edx jnz L(exit32) +# ifdef __CHKP__ + bndcu 48(%rax), %bnd0 +# endif pcmpeqb 48(%rax), %xmm2 pmovmskb %xmm2, %edx test %edx, %edx jnz L(exit48) +# ifdef __CHKP__ + bndcu 64(%rax), %bnd0 +# endif pcmpeqb 64(%rax), %xmm3 pmovmskb %xmm3, %edx test %edx, %edx jnz L(exit64) +# ifdef __CHKP__ + bndcu 80(%rax), %bnd0 +# endif pcmpeqb 80(%rax), %xmm0 add $64, %rax pmovmskb %xmm0, %edx test %edx, %edx jnz L(exit16) +# ifdef __CHKP__ + bndcu 32(%rax), %bnd0 +# endif pcmpeqb 32(%rax), %xmm1 pmovmskb %xmm1, %edx test %edx, %edx jnz L(exit32) +# ifdef __CHKP__ + bndcu 48(%rax), %bnd0 +# endif pcmpeqb 48(%rax), %xmm2 pmovmskb %xmm2, %edx test %edx, %edx jnz L(exit48) +# ifdef __CHKP__ + bndcu 64(%rax), %bnd0 +# endif pcmpeqb 64(%rax), %xmm3 pmovmskb %xmm3, %edx test %edx, %edx @@ -153,6 +213,9 @@ L(align16_start): test $0x3f, %rax jz L(align64_loop) +# ifdef __CHKP__ + bndcu 80(%rax), %bnd0 +# endif pcmpeqb 80(%rax), %xmm0 add $80, %rax pmovmskb %xmm0, %edx @@ -162,6 +225,9 @@ L(align16_start): test $0x3f, %rax jz L(align64_loop) +# ifdef __CHKP__ + bndcu 16(%rax), %bnd0 +# endif pcmpeqb 16(%rax), %xmm1 add $16, %rax pmovmskb %xmm1, %edx @@ -171,6 +237,9 @@ L(align16_start): test $0x3f, %rax jz L(align64_loop) +# ifdef __CHKP__ + bndcu 16(%rax), %bnd0 +# endif pcmpeqb 16(%rax), %xmm2 add $16, %rax pmovmskb %xmm2, %edx @@ -180,6 +249,9 @@ L(align16_start): test $0x3f, %rax jz L(align64_loop) +# ifdef __CHKP__ + bndcu 16(%rax), %bnd0 +# endif pcmpeqb 16(%rax), %xmm3 add $16, %rax pmovmskb %xmm3, %edx @@ -187,8 +259,12 @@ L(align16_start): jnz L(exit) add $16, %rax + .p2align 4 L(align64_loop): +# ifdef __CHKP__ + bndcu (%rax), %bnd0 +# endif movaps (%rax), %xmm4 pminub 16(%rax), %xmm4 movaps 32(%rax), %xmm5 diff --git a/sysdeps/x86_64/multiarch/strchr.S b/sysdeps/x86_64/multiarch/strchr.S index f170238b55..4311e8689c 100644 --- a/sysdeps/x86_64/multiarch/strchr.S +++ b/sysdeps/x86_64/multiarch/strchr.S @@ -91,6 +91,10 @@ __strchr_sse42: CALL_MCOUNT testb %sil, %sil je __strend_sse4 +# ifdef __CHKP__ + bndcl (%rdi), %bnd0 + bndcu (%rdi), %bnd0 +# endif pxor %xmm2, %xmm2 movd %esi, %xmm1 movl %edi, %ecx @@ -124,6 +128,9 @@ __strchr_sse42: ja L(return_null) L(unaligned_match): addq %rdi, %rax +# ifdef __CHKP__ + bndcu (%rax), %bnd0 +# endif ret .p2align 4 @@ -135,15 +142,27 @@ L(unaligned_no_match): L(loop): addq $16, %r8 L(aligned_start): +# ifdef __CHKP__ + bndcu (%r8), %bnd0 +# endif pcmpistri $0x2, (%r8), %xmm1 jbe L(wrap) addq $16, %r8 +# ifdef __CHKP__ + bndcu (%r8), %bnd0 +# endif pcmpistri $0x2, (%r8), %xmm1 jbe L(wrap) addq $16, %r8 +# ifdef __CHKP__ + bndcu (%r8), %bnd0 +# endif pcmpistri $0x2, (%r8), %xmm1 jbe L(wrap) addq $16, %r8 +# ifdef __CHKP__ + bndcu (%r8), %bnd0 +# endif pcmpistri $0x2, (%r8), %xmm1 jbe L(wrap) jmp L(loop) @@ -159,6 +178,9 @@ L(return_null): .p2align 4 L(loop_exit): leaq (%r8,%rcx), %rax +# ifdef __CHKP__ + bndcu (%rax), %bnd0 +# endif ret cfi_endproc .size __strchr_sse42, .-__strchr_sse42 diff --git a/sysdeps/x86_64/multiarch/strcmp-sse42.S b/sysdeps/x86_64/multiarch/strcmp-sse42.S index c84f1c2b31..edfa915707 100644 --- a/sysdeps/x86_64/multiarch/strcmp-sse42.S +++ b/sysdeps/x86_64/multiarch/strcmp-sse42.S @@ -127,6 +127,14 @@ STRCMP_SSE42: je LABEL(Byte0) mov %rdx, %r11 #endif + +#ifdef __CHKP__ + bndcl (%rdi), %bnd0 + bndcu (%rdi), %bnd0 + bndcl (%rsi), %bnd1 + bndcu (%rsi), %bnd1 +#endif + mov %esi, %ecx mov %edi, %eax /* Use 64bit AND here to avoid long NOP padding. */ @@ -210,6 +218,10 @@ LABEL(touppermask): #endif add $16, %rsi /* prepare to search next 16 bytes */ add $16, %rdi /* prepare to search next 16 bytes */ +#ifdef __CHKP__ + bndcu (%rdi), %bnd0 + bndcu (%rsi), %bnd1 +#endif /* * Determine source and destination string offsets from 16-byte @@ -231,6 +243,11 @@ LABEL(crosscache): mov %edx, %r8d /* r8d is offset flag for exit tail */ xchg %ecx, %eax xchg %rsi, %rdi +#ifdef __CHKP__ + bndmov %bnd0, %bnd2 + bndmov %bnd1, %bnd0 + bndmov %bnd2, %bnd1 +#endif LABEL(bigger): movdqa (%rdi), %xmm2 movdqa (%rsi), %xmm1 @@ -280,6 +297,10 @@ LABEL(ashr_0): mov %rcx, %rdx /* only for offset of sse4 instruction loop*/ .p2align 4 LABEL(ashr_0_use): +#ifdef __CHKP__ + bndcu -1(%rdi, %rdx), %bnd0 + bndcu -1(%rsi, %rdx), %bnd1 +#endif movdqa (%rdi,%rdx), %xmm0 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L pcmpistri $0x1a,(%rsi,%rdx), %xmm0 @@ -295,6 +316,10 @@ LABEL(ashr_0_use): jbe LABEL(strcmp_exitz) #endif +#ifdef __CHKP__ + bndcu -1(%rdi, %rdx), %bnd0 + bndcu -1(%rsi, %rdx), %bnd1 +#endif movdqa (%rdi,%rdx), %xmm0 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L pcmpistri $0x1a,(%rsi,%rdx), %xmm0 @@ -320,6 +345,10 @@ LABEL(ashr_0_exit_use): jbe LABEL(strcmp_exitz) #endif lea -16(%rdx, %rcx), %rcx +#ifdef __CHKP__ + bndcu -1(%rdi, %rcx), %bnd0 + bndcu -1(%rsi, %rcx), %bnd1 +#endif movzbl (%rdi, %rcx), %eax movzbl (%rsi, %rcx), %edx #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L @@ -362,6 +391,15 @@ LABEL(ashr_1): and $0xfff, %r10 /* offset into 4K page */ sub $0x1000, %r10 /* subtract 4K pagesize */ mov %rcx, %rdx /* only for offset of sse4 instruction loop*/ +#ifdef __CHKP__ + bndcu -16(%rdi, %rdx), %bnd0 + bndcu -16(%rsi, %rdx), %bnd1 + jmp LABEL(loop_ashr_1_use) +LABEL(ashr_1_check): + bndcu (%rdi, %rdx), %bnd0 + bndcu (%rsi, %rdx), %bnd1 + jmp LABEL(nibble_ashr_1_restart_use) +#endif .p2align 4 LABEL(loop_ashr_1_use): @@ -416,7 +454,11 @@ LABEL(nibble_ashr_1_use): jae LABEL(nibble_ashr_exit_use) #endif cmp $14, %ecx +#ifdef __CHKP__ + ja LABEL(ashr_1_check) +#else ja LABEL(nibble_ashr_1_restart_use) +#endif jmp LABEL(nibble_ashr_exit_use) @@ -450,6 +492,15 @@ LABEL(ashr_2): and $0xfff, %r10 /* offset into 4K page */ sub $0x1000, %r10 /* subtract 4K pagesize */ mov %rcx, %rdx /* only for offset of sse4 instruction loop*/ +#ifdef __CHKP__ + bndcu -16(%rdi, %rdx), %bnd0 + bndcu -16(%rsi, %rdx), %bnd1 + jmp LABEL(loop_ashr_2_use) +LABEL(ashr_2_check): + bndcu (%rdi, %rdx), %bnd0 + bndcu (%rsi, %rdx), %bnd1 + jmp LABEL(nibble_ashr_2_restart_use) +#endif .p2align 4 LABEL(loop_ashr_2_use): @@ -504,7 +555,11 @@ LABEL(nibble_ashr_2_use): jae LABEL(nibble_ashr_exit_use) #endif cmp $13, %ecx +#ifdef __CHKP__ + ja LABEL(ashr_2_check) +#else ja LABEL(nibble_ashr_2_restart_use) +#endif jmp LABEL(nibble_ashr_exit_use) @@ -539,6 +594,15 @@ LABEL(ashr_3): and $0xfff, %r10 /* offset into 4K page */ sub $0x1000, %r10 /* subtract 4K pagesize */ mov %rcx, %rdx /* only for offset of sse4 instruction loop*/ +#ifdef __CHKP__ + bndcu -16(%rdi, %rdx), %bnd0 + bndcu -16(%rsi, %rdx), %bnd1 + jmp LABEL(loop_ashr_3_use) +LABEL(ashr_3_check): + bndcu (%rdi, %rdx), %bnd0 + bndcu (%rsi, %rdx), %bnd1 + jmp LABEL(nibble_ashr_3_restart_use) +#endif LABEL(loop_ashr_3_use): add $16, %r10 @@ -592,7 +656,11 @@ LABEL(nibble_ashr_3_use): jae LABEL(nibble_ashr_exit_use) #endif cmp $12, %ecx +#ifdef __CHKP__ + ja LABEL(ashr_3_check) +#else ja LABEL(nibble_ashr_3_restart_use) +#endif jmp LABEL(nibble_ashr_exit_use) @@ -627,6 +695,15 @@ LABEL(ashr_4): and $0xfff, %r10 /* offset into 4K page */ sub $0x1000, %r10 /* subtract 4K pagesize */ mov %rcx, %rdx /* only for offset of sse4 instruction loop*/ +#ifdef __CHKP__ + bndcu -16(%rdi, %rdx), %bnd0 + bndcu -16(%rsi, %rdx), %bnd1 + jmp LABEL(loop_ashr_4_use) +LABEL(ashr_4_check): + bndcu (%rdi, %rdx), %bnd0 + bndcu (%rsi, %rdx), %bnd1 + jmp LABEL(nibble_ashr_4_restart_use) +#endif .p2align 4 LABEL(loop_ashr_4_use): @@ -681,7 +758,11 @@ LABEL(nibble_ashr_4_use): jae LABEL(nibble_ashr_exit_use) #endif cmp $11, %ecx +#ifdef __CHKP__ + ja LABEL(ashr_4_check) +#else ja LABEL(nibble_ashr_4_restart_use) +#endif jmp LABEL(nibble_ashr_exit_use) @@ -716,6 +797,15 @@ LABEL(ashr_5): and $0xfff, %r10 /* offset into 4K page */ sub $0x1000, %r10 /* subtract 4K pagesize */ mov %rcx, %rdx /* only for offset of sse4 instruction loop*/ +#ifdef __CHKP__ + bndcu -16(%rdi, %rdx), %bnd0 + bndcu -16(%rsi, %rdx), %bnd1 + jmp LABEL(loop_ashr_5_use) +LABEL(ashr_5_check): + bndcu (%rdi, %rdx), %bnd0 + bndcu (%rsi, %rdx), %bnd1 + jmp LABEL(nibble_ashr_5_restart_use) +#endif .p2align 4 LABEL(loop_ashr_5_use): @@ -771,7 +861,11 @@ LABEL(nibble_ashr_5_use): jae LABEL(nibble_ashr_exit_use) #endif cmp $10, %ecx +#ifdef __CHKP__ + ja LABEL(ashr_5_check) +#else ja LABEL(nibble_ashr_5_restart_use) +#endif jmp LABEL(nibble_ashr_exit_use) @@ -806,6 +900,15 @@ LABEL(ashr_6): and $0xfff, %r10 /* offset into 4K page */ sub $0x1000, %r10 /* subtract 4K pagesize */ mov %rcx, %rdx /* only for offset of sse4 instruction loop*/ +#ifdef __CHKP__ + bndcu -16(%rdi, %rdx), %bnd0 + bndcu -16(%rsi, %rdx), %bnd1 + jmp LABEL(loop_ashr_6_use) +LABEL(ashr_6_check): + bndcu (%rdi, %rdx), %bnd0 + bndcu (%rsi, %rdx), %bnd1 + jmp LABEL(nibble_ashr_6_restart_use) +#endif .p2align 4 LABEL(loop_ashr_6_use): @@ -860,7 +963,11 @@ LABEL(nibble_ashr_6_use): jae LABEL(nibble_ashr_exit_use) #endif cmp $9, %ecx +#ifdef __CHKP__ + ja LABEL(ashr_6_check) +#else ja LABEL(nibble_ashr_6_restart_use) +#endif jmp LABEL(nibble_ashr_exit_use) @@ -895,6 +1002,15 @@ LABEL(ashr_7): and $0xfff, %r10 /* offset into 4K page */ sub $0x1000, %r10 /* subtract 4K pagesize */ mov %rcx, %rdx /* only for offset of sse4 instruction loop*/ +#ifdef __CHKP__ + bndcu -16(%rdi, %rdx), %bnd0 + bndcu -16(%rsi, %rdx), %bnd1 + jmp LABEL(loop_ashr_7_use) +LABEL(ashr_7_check): + bndcu (%rdi, %rdx), %bnd0 + bndcu (%rsi, %rdx), %bnd1 + jmp LABEL(nibble_ashr_7_restart_use) +#endif .p2align 4 LABEL(loop_ashr_7_use): @@ -949,7 +1065,11 @@ LABEL(nibble_ashr_7_use): jae LABEL(nibble_ashr_exit_use) #endif cmp $8, %ecx +#ifdef __CHKP__ + ja LABEL(ashr_7_check) +#else ja LABEL(nibble_ashr_7_restart_use) +#endif jmp LABEL(nibble_ashr_exit_use) @@ -984,6 +1104,15 @@ LABEL(ashr_8): and $0xfff, %r10 /* offset into 4K page */ sub $0x1000, %r10 /* subtract 4K pagesize */ mov %rcx, %rdx /* only for offset of sse4 instruction loop*/ +#ifdef __CHKP__ + bndcu -16(%rdi, %rdx), %bnd0 + bndcu -16(%rsi, %rdx), %bnd1 + jmp LABEL(loop_ashr_8_use) +LABEL(ashr_8_check): + bndcu (%rdi, %rdx), %bnd0 + bndcu (%rsi, %rdx), %bnd1 + jmp LABEL(nibble_ashr_8_restart_use) +#endif .p2align 4 LABEL(loop_ashr_8_use): @@ -1038,7 +1167,11 @@ LABEL(nibble_ashr_8_use): jae LABEL(nibble_ashr_exit_use) #endif cmp $7, %ecx +#ifdef __CHKP__ + ja LABEL(ashr_8_check) +#else ja LABEL(nibble_ashr_8_restart_use) +#endif jmp LABEL(nibble_ashr_exit_use) @@ -1073,6 +1206,15 @@ LABEL(ashr_9): and $0xfff, %r10 /* offset into 4K page */ sub $0x1000, %r10 /* subtract 4K pagesize */ mov %rcx, %rdx /* only for offset of sse4 instruction loop*/ +#ifdef __CHKP__ + bndcu -16(%rdi, %rdx), %bnd0 + bndcu -16(%rsi, %rdx), %bnd1 + jmp LABEL(loop_ashr_9_use) +LABEL(ashr_9_check): + bndcu (%rdi, %rdx), %bnd0 + bndcu (%rsi, %rdx), %bnd1 + jmp LABEL(nibble_ashr_9_restart_use) +#endif .p2align 4 LABEL(loop_ashr_9_use): @@ -1128,7 +1270,11 @@ LABEL(nibble_ashr_9_use): jae LABEL(nibble_ashr_exit_use) #endif cmp $6, %ecx +#ifdef __CHKP__ + ja LABEL(ashr_9_check) +#else ja LABEL(nibble_ashr_9_restart_use) +#endif jmp LABEL(nibble_ashr_exit_use) @@ -1163,6 +1309,15 @@ LABEL(ashr_10): and $0xfff, %r10 /* offset into 4K page */ sub $0x1000, %r10 /* subtract 4K pagesize */ mov %rcx, %rdx /* only for offset of sse4 instruction loop*/ +#ifdef __CHKP__ + bndcu -16(%rdi, %rdx), %bnd0 + bndcu -16(%rsi, %rdx), %bnd1 + jmp LABEL(loop_ashr_10_use) +LABEL(ashr_10_check): + bndcu (%rdi, %rdx), %bnd0 + bndcu (%rsi, %rdx), %bnd1 + jmp LABEL(nibble_ashr_10_restart_use) +#endif .p2align 4 LABEL(loop_ashr_10_use): @@ -1217,7 +1372,11 @@ LABEL(nibble_ashr_10_use): jae LABEL(nibble_ashr_exit_use) #endif cmp $5, %ecx +#ifdef __CHKP__ + ja LABEL(ashr_10_check) +#else ja LABEL(nibble_ashr_10_restart_use) +#endif jmp LABEL(nibble_ashr_exit_use) @@ -1252,6 +1411,15 @@ LABEL(ashr_11): and $0xfff, %r10 /* offset into 4K page */ sub $0x1000, %r10 /* subtract 4K pagesize */ mov %rcx, %rdx /* only for offset of sse4 instruction loop*/ +#ifdef __CHKP__ + bndcu -16(%rdi, %rdx), %bnd0 + bndcu -16(%rsi, %rdx), %bnd1 + jmp LABEL(loop_ashr_11_use) +LABEL(ashr_11_check): + bndcu (%rdi, %rdx), %bnd0 + bndcu (%rsi, %rdx), %bnd1 + jmp LABEL(nibble_ashr_11_restart_use) +#endif .p2align 4 LABEL(loop_ashr_11_use): @@ -1306,7 +1474,11 @@ LABEL(nibble_ashr_11_use): jae LABEL(nibble_ashr_exit_use) #endif cmp $4, %ecx +#ifdef __CHKP__ + ja LABEL(ashr_11_check) +#else ja LABEL(nibble_ashr_11_restart_use) +#endif jmp LABEL(nibble_ashr_exit_use) @@ -1341,6 +1513,15 @@ LABEL(ashr_12): and $0xfff, %r10 /* offset into 4K page */ sub $0x1000, %r10 /* subtract 4K pagesize */ mov %rcx, %rdx /* only for offset of sse4 instruction loop*/ +#ifdef __CHKP__ + bndcu -16(%rdi, %rdx), %bnd0 + bndcu -16(%rsi, %rdx), %bnd1 + jmp LABEL(loop_ashr_12_use) +LABEL(ashr_12_check): + bndcu (%rdi, %rdx), %bnd0 + bndcu (%rsi, %rdx), %bnd1 + jmp LABEL(nibble_ashr_12_restart_use) +#endif .p2align 4 LABEL(loop_ashr_12_use): @@ -1395,7 +1576,11 @@ LABEL(nibble_ashr_12_use): jae LABEL(nibble_ashr_exit_use) #endif cmp $3, %ecx +#ifdef __CHKP__ + ja LABEL(ashr_12_check) +#else ja LABEL(nibble_ashr_12_restart_use) +#endif jmp LABEL(nibble_ashr_exit_use) @@ -1431,6 +1616,15 @@ LABEL(ashr_13): sub $0x1000, %r10 /* subtract 4K pagesize */ mov %rcx, %rdx /* only for offset of sse4 instruction loop*/ +#ifdef __CHKP__ + bndcu -16(%rdi, %rdx), %bnd0 + bndcu -16(%rsi, %rdx), %bnd1 + jmp LABEL(loop_ashr_13_use) +LABEL(ashr_13_check): + bndcu (%rdi, %rdx), %bnd0 + bndcu (%rsi, %rdx), %bnd1 + jmp LABEL(nibble_ashr_13_restart_use) +#endif .p2align 4 LABEL(loop_ashr_13_use): @@ -1485,7 +1679,11 @@ LABEL(nibble_ashr_13_use): jae LABEL(nibble_ashr_exit_use) #endif cmp $2, %ecx +#ifdef __CHKP__ + ja LABEL(ashr_13_check) +#else ja LABEL(nibble_ashr_13_restart_use) +#endif jmp LABEL(nibble_ashr_exit_use) @@ -1521,6 +1719,15 @@ LABEL(ashr_14): sub $0x1000, %r10 /* subtract 4K pagesize */ mov %rcx, %rdx /* only for offset of sse4 instruction loop*/ +#ifdef __CHKP__ + bndcu -16(%rdi, %rdx), %bnd0 + bndcu -16(%rsi, %rdx), %bnd1 + jmp LABEL(loop_ashr_14_use) +LABEL(ashr_14_check): + bndcu (%rdi, %rdx), %bnd0 + bndcu (%rsi, %rdx), %bnd1 + jmp LABEL(nibble_ashr_14_restart_use) +#endif .p2align 4 LABEL(loop_ashr_14_use): @@ -1575,7 +1782,11 @@ LABEL(nibble_ashr_14_use): jae LABEL(nibble_ashr_exit_use) #endif cmp $1, %ecx +#ifdef __CHKP__ + ja LABEL(ashr_14_check) +#else ja LABEL(nibble_ashr_14_restart_use) +#endif jmp LABEL(nibble_ashr_exit_use) @@ -1613,6 +1824,15 @@ LABEL(ashr_15): sub $0x1000, %r10 /* subtract 4K pagesize */ mov %rcx, %rdx /* only for offset of sse4 instruction loop*/ +#ifdef __CHKP__ + bndcu -16(%rdi, %rdx), %bnd0 + bndcu -16(%rsi, %rdx), %bnd1 + jmp LABEL(loop_ashr_15_use) +LABEL(ashr_15_check): + bndcu (%rdi, %rdx), %bnd0 + bndcu (%rsi, %rdx), %bnd1 + jmp LABEL(nibble_ashr_15_restart_use) +#endif .p2align 4 LABEL(loop_ashr_15_use): @@ -1667,7 +1887,11 @@ LABEL(nibble_ashr_15_use): jae LABEL(nibble_ashr_exit_use) #endif cmp $0, %ecx +#ifdef __CHKP__ + ja LABEL(ashr_15_check) +#else ja LABEL(nibble_ashr_15_restart_use) +#endif LABEL(nibble_ashr_exit_use): #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L @@ -1691,6 +1915,11 @@ LABEL(exit_use): test %r8d, %r8d jz LABEL(ret_use) xchg %eax, %edx +#ifdef __CHKP__ + bndmov %bnd0, %bnd2 + bndmov %bnd1, %bnd0 + bndmov %bnd2, %bnd1 +#endif LABEL(ret_use): #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L leaq _nl_C_LC_CTYPE_tolower+128*4(%rip), %rcx @@ -1707,6 +1936,11 @@ LABEL(less32bytes): test %r8d, %r8d jz LABEL(ret) xchg %rsi, %rdi /* recover original order according to flag(%r8d) */ +#ifdef __CHKP__ + bndmov %bnd0, %bnd2 + bndmov %bnd1, %bnd0 + bndmov %bnd2, %bnd1 +#endif .p2align 4 LABEL(ret): @@ -1717,6 +1951,10 @@ LABEL(less16bytes): sub %rdx, %r11 jbe LABEL(strcmp_exitz) #endif +#ifdef __CHKP__ + bndcu (%rdi, %rdx), %bnd0 + bndcu (%rsi, %rdx), %bnd1 +#endif movzbl (%rsi, %rdx), %ecx movzbl (%rdi, %rdx), %eax diff --git a/sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S b/sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S index 7710173c68..e6baee92db 100644 --- a/sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S +++ b/sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S @@ -33,7 +33,7 @@ lea TABLE(%rip), %r11; \ movslq (%r11, INDEX, SCALE), %rcx; \ lea (%r11, %rcx), %rcx; \ - jmp *%rcx + jmp *%rcx # ifndef USE_AS_STRCAT @@ -51,6 +51,16 @@ ENTRY (STRCPY) # endif +# ifdef __CHKP__ + bndcl (%rdi), %bnd0 + bndcu (%rdi), %bnd0 + bndcl (%rsi), %bnd1 + bndcu (%rsi), %bnd1 +# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT + bndcu -1(%rdi, %rdx), %bnd0 +# endif +# endif + and $63, %rcx cmp $32, %rcx jbe L(SourceStringAlignmentLess32) @@ -79,6 +89,9 @@ ENTRY (STRCPY) test %rdx, %rdx jnz L(CopyFrom1To16BytesTail) +# ifdef __CHKP__ + bndcu 16(%rsi), %bnd1 +# endif pcmpeqb 16(%rsi), %xmm0 pmovmskb %xmm0, %rdx @@ -91,6 +104,9 @@ ENTRY (STRCPY) jnz L(CopyFrom1To32Bytes) movdqu (%rsi, %rcx), %xmm1 /* copy 16 bytes */ +# ifdef __CHKP__ + bndcu 15(%rdi), %bnd0 +# endif movdqu %xmm1, (%rdi) /* If source address alignment != destination address alignment */ @@ -101,6 +117,10 @@ L(Unalign16Both): add %rcx, %r8 # endif mov $16, %rcx +# ifdef __CHKP__ + bndcu 16(%rsi, %rcx), %bnd1 + bndcu 15(%rdi, %rcx), %bnd0 +# endif movdqa (%rsi, %rcx), %xmm1 movaps 16(%rsi, %rcx), %xmm2 movdqu %xmm1, (%rdi, %rcx) @@ -118,6 +138,10 @@ L(Unalign16Both): jnz L(CopyFrom1To16Bytes) # endif +# ifdef __CHKP__ + bndcu 16(%rsi, %rcx), %bnd1 + bndcu 15(%rdi, %rcx), %bnd0 +# endif movaps 16(%rsi, %rcx), %xmm3 movdqu %xmm2, (%rdi, %rcx) pcmpeqb %xmm3, %xmm0 @@ -134,6 +158,10 @@ L(Unalign16Both): jnz L(CopyFrom1To16Bytes) # endif +# ifdef __CHKP__ + bndcu 16(%rsi, %rcx), %bnd1 + bndcu 15(%rdi, %rcx), %bnd0 +# endif movaps 16(%rsi, %rcx), %xmm4 movdqu %xmm3, (%rdi, %rcx) pcmpeqb %xmm4, %xmm0 @@ -150,6 +178,10 @@ L(Unalign16Both): jnz L(CopyFrom1To16Bytes) # endif +# ifdef __CHKP__ + bndcu 16(%rsi, %rcx), %bnd1 + bndcu 15(%rdi, %rcx), %bnd0 +# endif movaps 16(%rsi, %rcx), %xmm1 movdqu %xmm4, (%rdi, %rcx) pcmpeqb %xmm1, %xmm0 @@ -166,6 +198,10 @@ L(Unalign16Both): jnz L(CopyFrom1To16Bytes) # endif +# ifdef __CHKP__ + bndcu 16(%rsi, %rcx), %bnd1 + bndcu 15(%rdi, %rcx), %bnd0 +# endif movaps 16(%rsi, %rcx), %xmm2 movdqu %xmm1, (%rdi, %rcx) pcmpeqb %xmm2, %xmm0 @@ -182,6 +218,10 @@ L(Unalign16Both): jnz L(CopyFrom1To16Bytes) # endif +# ifdef __CHKP__ + bndcu 16(%rsi, %rcx), %bnd1 + bndcu 15(%rdi, %rcx), %bnd0 +# endif movaps 16(%rsi, %rcx), %xmm3 movdqu %xmm2, (%rdi, %rcx) pcmpeqb %xmm3, %xmm0 @@ -198,6 +238,10 @@ L(Unalign16Both): jnz L(CopyFrom1To16Bytes) # endif +# ifdef __CHKP__ + bndcu 16(%rsi, %rcx), %bnd1 + bndcu 15(%rdi, %rcx), %bnd0 +# endif movdqu %xmm3, (%rdi, %rcx) mov %rsi, %rdx lea 16(%rsi, %rcx), %rsi @@ -208,6 +252,9 @@ L(Unalign16Both): lea 128(%r8, %rdx), %r8 # endif L(Unaligned64Loop): +# ifdef __CHKP__ + bndcu 48(%rsi), %bnd1 +# endif movaps (%rsi), %xmm2 movaps %xmm2, %xmm4 movaps 16(%rsi), %xmm5 @@ -229,6 +276,10 @@ L(Unaligned64Loop): L(Unaligned64Loop_start): add $64, %rdi add $64, %rsi +# ifdef __CHKP__ + bndcu (%rsi), %bnd1 + bndcu (%rdi), %bnd0 +# endif movdqu %xmm4, -64(%rdi) movaps (%rsi), %xmm2 movdqa %xmm2, %xmm4 @@ -271,16 +322,28 @@ L(Unaligned64Leave): jnz L(CopyFrom1To16BytesUnaligned_32) bsf %rcx, %rdx +# ifdef __CHKP__ + bndcu 47(%rdi), %bnd0 +# endif movdqu %xmm4, (%rdi) movdqu %xmm5, 16(%rdi) movdqu %xmm6, 32(%rdi) # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT # ifdef USE_AS_STPCPY +# ifdef __CHKP__ + bndcu 48(%rdi, %rdx), %bnd0 +# endif lea 48(%rdi, %rdx), %rax # endif +# ifdef __CHKP__ + bndcu 63(%rdi), %bnd0 +# endif movdqu %xmm7, 48(%rdi) add $15, %r8 sub %rdx, %r8 +# ifdef __CHKP__ + bndcu 49(%rdi, %rdx), %bnd0 +# endif lea 49(%rdi, %rdx), %rdi jmp L(StrncpyFillTailWithZero) # else @@ -309,6 +372,10 @@ L(SourceStringAlignmentLess32): test %rdx, %rdx jnz L(CopyFrom1To16BytesTail1) +# ifdef __CHKP__ + bndcu 16(%rsi), %bnd1 + bndcu 15(%rdi), %bnd0 +# endif pcmpeqb %xmm2, %xmm0 movdqu %xmm1, (%rdi) pmovmskb %xmm0, %rdx @@ -372,6 +439,9 @@ L(CopyFrom1To16BytesUnaligned_0): # ifdef USE_AS_STPCPY lea (%rdi, %rdx), %rax # endif +# ifdef __CHKP__ + bndcu 15(%rdi), %bnd0 +# endif movdqu %xmm4, (%rdi) add $63, %r8 sub %rdx, %r8 @@ -384,6 +454,9 @@ L(CopyFrom1To16BytesUnaligned_0): .p2align 4 L(CopyFrom1To16BytesUnaligned_16): bsf %rcx, %rdx +# ifdef __CHKP__ + bndcu 31(%rdi), %bnd0 +# endif movdqu %xmm4, (%rdi) # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT # ifdef USE_AS_STPCPY @@ -403,6 +476,9 @@ L(CopyFrom1To16BytesUnaligned_16): .p2align 4 L(CopyFrom1To16BytesUnaligned_32): bsf %rdx, %rdx +# ifdef __CHKP__ + bndcu 47(%rdi), %bnd0 +# endif movdqu %xmm4, (%rdi) movdqu %xmm5, 16(%rdi) # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT @@ -529,6 +605,9 @@ L(CopyFrom1To16BytesTail1Case2OrCase3): .p2align 4 L(Exit1): +# ifdef __CHKP__ + bndcu (%rdi), %bnd0 +# endif mov %dh, (%rdi) # ifdef USE_AS_STPCPY lea (%rdi), %rax @@ -543,6 +622,9 @@ L(Exit1): .p2align 4 L(Exit2): mov (%rsi), %dx +# ifdef __CHKP__ + bndcu 1(%rdi), %bnd0 +# endif mov %dx, (%rdi) # ifdef USE_AS_STPCPY lea 1(%rdi), %rax @@ -557,6 +639,9 @@ L(Exit2): .p2align 4 L(Exit3): mov (%rsi), %cx +# ifdef __CHKP__ + bndcu 2(%rdi), %bnd0 +# endif mov %cx, (%rdi) mov %dh, 2(%rdi) # ifdef USE_AS_STPCPY @@ -572,6 +657,9 @@ L(Exit3): .p2align 4 L(Exit4): mov (%rsi), %edx +# ifdef __CHKP__ + bndcu 3(%rdi), %bnd0 +# endif mov %edx, (%rdi) # ifdef USE_AS_STPCPY lea 3(%rdi), %rax @@ -586,6 +674,9 @@ L(Exit4): .p2align 4 L(Exit5): mov (%rsi), %ecx +# ifdef __CHKP__ + bndcu 4(%rdi), %bnd0 +# endif mov %dh, 4(%rdi) mov %ecx, (%rdi) # ifdef USE_AS_STPCPY @@ -602,6 +693,9 @@ L(Exit5): L(Exit6): mov (%rsi), %ecx mov 4(%rsi), %dx +# ifdef __CHKP__ + bndcu 5(%rdi), %bnd0 +# endif mov %ecx, (%rdi) mov %dx, 4(%rdi) # ifdef USE_AS_STPCPY @@ -618,6 +712,9 @@ L(Exit6): L(Exit7): mov (%rsi), %ecx mov 3(%rsi), %edx +# ifdef __CHKP__ + bndcu 6(%rdi), %bnd0 +# endif mov %ecx, (%rdi) mov %edx, 3(%rdi) # ifdef USE_AS_STPCPY @@ -633,6 +730,9 @@ L(Exit7): .p2align 4 L(Exit8): mov (%rsi), %rdx +# ifdef __CHKP__ + bndcu 7(%rdi), %bnd0 +# endif mov %rdx, (%rdi) # ifdef USE_AS_STPCPY lea 7(%rdi), %rax @@ -647,6 +747,9 @@ L(Exit8): .p2align 4 L(Exit9): mov (%rsi), %rcx +# ifdef __CHKP__ + bndcu 8(%rdi), %bnd0 +# endif mov %dh, 8(%rdi) mov %rcx, (%rdi) # ifdef USE_AS_STPCPY @@ -663,6 +766,9 @@ L(Exit9): L(Exit10): mov (%rsi), %rcx mov 8(%rsi), %dx +# ifdef __CHKP__ + bndcu 9(%rdi), %bnd0 +# endif mov %rcx, (%rdi) mov %dx, 8(%rdi) # ifdef USE_AS_STPCPY @@ -679,6 +785,9 @@ L(Exit10): L(Exit11): mov (%rsi), %rcx mov 7(%rsi), %edx +# ifdef __CHKP__ + bndcu 10(%rdi), %bnd0 +# endif mov %rcx, (%rdi) mov %edx, 7(%rdi) # ifdef USE_AS_STPCPY @@ -695,6 +804,9 @@ L(Exit11): L(Exit12): mov (%rsi), %rcx mov 8(%rsi), %edx +# ifdef __CHKP__ + bndcu 11(%rdi), %bnd0 +# endif mov %rcx, (%rdi) mov %edx, 8(%rdi) # ifdef USE_AS_STPCPY @@ -711,6 +823,9 @@ L(Exit12): L(Exit13): mov (%rsi), %rcx mov 5(%rsi), %rdx +# ifdef __CHKP__ + bndcu 12(%rdi), %bnd0 +# endif mov %rcx, (%rdi) mov %rdx, 5(%rdi) # ifdef USE_AS_STPCPY @@ -727,6 +842,9 @@ L(Exit13): L(Exit14): mov (%rsi), %rcx mov 6(%rsi), %rdx +# ifdef __CHKP__ + bndcu 13(%rdi), %bnd0 +# endif mov %rcx, (%rdi) mov %rdx, 6(%rdi) # ifdef USE_AS_STPCPY @@ -743,6 +861,9 @@ L(Exit14): L(Exit15): mov (%rsi), %rcx mov 7(%rsi), %rdx +# ifdef __CHKP__ + bndcu 14(%rdi), %bnd0 +# endif mov %rcx, (%rdi) mov %rdx, 7(%rdi) # ifdef USE_AS_STPCPY @@ -758,6 +879,9 @@ L(Exit15): .p2align 4 L(Exit16): movdqu (%rsi), %xmm0 +# ifdef __CHKP__ + bndcu 15(%rdi), %bnd0 +# endif movdqu %xmm0, (%rdi) # ifdef USE_AS_STPCPY lea 15(%rdi), %rax @@ -772,6 +896,9 @@ L(Exit16): .p2align 4 L(Exit17): movdqu (%rsi), %xmm0 +# ifdef __CHKP__ + bndcu 16(%rdi), %bnd0 +# endif movdqu %xmm0, (%rdi) mov %dh, 16(%rdi) # ifdef USE_AS_STPCPY @@ -788,6 +915,9 @@ L(Exit17): L(Exit18): movdqu (%rsi), %xmm0 mov 16(%rsi), %cx +# ifdef __CHKP__ + bndcu 17(%rdi), %bnd0 +# endif movdqu %xmm0, (%rdi) mov %cx, 16(%rdi) # ifdef USE_AS_STPCPY @@ -804,6 +934,9 @@ L(Exit18): L(Exit19): movdqu (%rsi), %xmm0 mov 15(%rsi), %ecx +# ifdef __CHKP__ + bndcu 18(%rdi), %bnd0 +# endif movdqu %xmm0, (%rdi) mov %ecx, 15(%rdi) # ifdef USE_AS_STPCPY @@ -820,6 +953,9 @@ L(Exit19): L(Exit20): movdqu (%rsi), %xmm0 mov 16(%rsi), %ecx +# ifdef __CHKP__ + bndcu 19(%rdi), %bnd0 +# endif movdqu %xmm0, (%rdi) mov %ecx, 16(%rdi) # ifdef USE_AS_STPCPY @@ -836,6 +972,9 @@ L(Exit20): L(Exit21): movdqu (%rsi), %xmm0 mov 16(%rsi), %ecx +# ifdef __CHKP__ + bndcu 20(%rdi), %bnd0 +# endif movdqu %xmm0, (%rdi) mov %ecx, 16(%rdi) mov %dh, 20(%rdi) @@ -853,6 +992,9 @@ L(Exit21): L(Exit22): movdqu (%rsi), %xmm0 mov 14(%rsi), %rcx +# ifdef __CHKP__ + bndcu 21(%rdi), %bnd0 +# endif movdqu %xmm0, (%rdi) mov %rcx, 14(%rdi) # ifdef USE_AS_STPCPY @@ -869,6 +1011,9 @@ L(Exit22): L(Exit23): movdqu (%rsi), %xmm0 mov 15(%rsi), %rcx +# ifdef __CHKP__ + bndcu 22(%rdi), %bnd0 +# endif movdqu %xmm0, (%rdi) mov %rcx, 15(%rdi) # ifdef USE_AS_STPCPY @@ -885,6 +1030,9 @@ L(Exit23): L(Exit24): movdqu (%rsi), %xmm0 mov 16(%rsi), %rcx +# ifdef __CHKP__ + bndcu 23(%rdi), %bnd0 +# endif movdqu %xmm0, (%rdi) mov %rcx, 16(%rdi) # ifdef USE_AS_STPCPY @@ -901,6 +1049,9 @@ L(Exit24): L(Exit25): movdqu (%rsi), %xmm0 mov 16(%rsi), %rcx +# ifdef __CHKP__ + bndcu 24(%rdi), %bnd0 +# endif movdqu %xmm0, (%rdi) mov %rcx, 16(%rdi) mov %dh, 24(%rdi) @@ -919,6 +1070,9 @@ L(Exit26): movdqu (%rsi), %xmm0 mov 16(%rsi), %rdx mov 24(%rsi), %cx +# ifdef __CHKP__ + bndcu 25(%rdi), %bnd0 +# endif movdqu %xmm0, (%rdi) mov %rdx, 16(%rdi) mov %cx, 24(%rdi) @@ -937,6 +1091,9 @@ L(Exit27): movdqu (%rsi), %xmm0 mov 16(%rsi), %rdx mov 23(%rsi), %ecx +# ifdef __CHKP__ + bndcu 26(%rdi), %bnd0 +# endif movdqu %xmm0, (%rdi) mov %rdx, 16(%rdi) mov %ecx, 23(%rdi) @@ -955,6 +1112,9 @@ L(Exit28): movdqu (%rsi), %xmm0 mov 16(%rsi), %rdx mov 24(%rsi), %ecx +# ifdef __CHKP__ + bndcu 27(%rdi), %bnd0 +# endif movdqu %xmm0, (%rdi) mov %rdx, 16(%rdi) mov %ecx, 24(%rdi) @@ -972,6 +1132,9 @@ L(Exit28): L(Exit29): movdqu (%rsi), %xmm0 movdqu 13(%rsi), %xmm2 +# ifdef __CHKP__ + bndcu 28(%rdi), %bnd0 +# endif movdqu %xmm0, (%rdi) movdqu %xmm2, 13(%rdi) # ifdef USE_AS_STPCPY @@ -988,6 +1151,9 @@ L(Exit29): L(Exit30): movdqu (%rsi), %xmm0 movdqu 14(%rsi), %xmm2 +# ifdef __CHKP__ + bndcu 29(%rdi), %bnd0 +# endif movdqu %xmm0, (%rdi) movdqu %xmm2, 14(%rdi) # ifdef USE_AS_STPCPY @@ -1004,6 +1170,9 @@ L(Exit30): L(Exit31): movdqu (%rsi), %xmm0 movdqu 15(%rsi), %xmm2 +# ifdef __CHKP__ + bndcu 30(%rdi), %bnd0 +# endif movdqu %xmm0, (%rdi) movdqu %xmm2, 15(%rdi) # ifdef USE_AS_STPCPY @@ -1020,6 +1189,9 @@ L(Exit31): L(Exit32): movdqu (%rsi), %xmm0 movdqu 16(%rsi), %xmm2 +# ifdef __CHKP__ + bndcu 31(%rdi), %bnd0 +# endif movdqu %xmm0, (%rdi) movdqu %xmm2, 16(%rdi) # ifdef USE_AS_STPCPY diff --git a/sysdeps/x86_64/multiarch/strrchr.S b/sysdeps/x86_64/multiarch/strrchr.S index 3f92a41ef9..1fed105bf0 100644 --- a/sysdeps/x86_64/multiarch/strrchr.S +++ b/sysdeps/x86_64/multiarch/strrchr.S @@ -97,6 +97,10 @@ __strrchr_sse42: CALL_MCOUNT testb %sil, %sil je __strend_sse4 +# ifdef __CHKP__ + bndcl (%rdi), %bnd0 + bndcu (%rdi), %bnd0 +# endif xor %eax,%eax /* RAX has the last occurrence of s. */ movd %esi, %xmm1 punpcklbw %xmm1, %xmm1 @@ -135,6 +139,9 @@ L(unaligned_no_byte): contain the NULL terminator. */ jg L(exit) addq $16, %r8 +# ifdef __CHKP__ + bndcu (%r8), %bnd0 +# endif /* Loop start on aligned string. */ .p2align 4 @@ -142,6 +149,9 @@ L(loop): pcmpistri $0x4a, (%r8), %xmm1 jbe L(match_or_eos) addq $16, %r8 +# ifdef __CHKP__ + bndcu (%r8), %bnd0 +# endif jmp L(loop) .p2align 4 L(match_or_eos): @@ -149,11 +159,17 @@ L(match_or_eos): L(match_no_eos): leaq (%r8,%rcx), %rax addq $16, %r8 +# ifdef __CHKP__ + bndcu (%r8), %bnd0 +# endif jmp L(loop) .p2align 4 L(had_eos): jnc L(exit) leaq (%r8,%rcx), %rax +# ifdef __CHKP__ + bndcu (%rax), %bnd0 +# endif .p2align 4 L(exit): ret diff --git a/sysdeps/x86_64/multiarch/wcscpy-ssse3.S b/sysdeps/x86_64/multiarch/wcscpy-ssse3.S index b7de092228..77889dd555 100644 --- a/sysdeps/x86_64/multiarch/wcscpy-ssse3.S +++ b/sysdeps/x86_64/multiarch/wcscpy-ssse3.S @@ -25,13 +25,27 @@ ENTRY (__wcscpy_ssse3) mov %rsi, %rcx mov %rdi, %rdx +# ifdef __CHKP__ + bndcl (%rdi), %bnd0 + bndcl (%rsi), %bnd1 + bndcu (%rsi), %bnd1 +# endif cmpl $0, (%rcx) jz L(Exit4) +# ifdef __CHKP__ + bndcu 4(%rcx), %bnd1 +# endif cmpl $0, 4(%rcx) jz L(Exit8) +# ifdef __CHKP__ + bndcu 8(%rcx), %bnd1 +# endif cmpl $0, 8(%rcx) jz L(Exit12) +# ifdef __CHKP__ + bndcu 12(%rcx), %bnd1 +# endif cmpl $0, 12(%rcx) jz L(Exit16) @@ -40,10 +54,19 @@ ENTRY (__wcscpy_ssse3) pxor %xmm0, %xmm0 mov (%rcx), %r9 +# ifdef __CHKP__ + bndcu 7(%rdx), %bnd0 +# endif mov %r9, (%rdx) +# ifdef __CHKP__ + bndcu (%rsi), %bnd1 +# endif pcmpeqd (%rsi), %xmm0 mov 8(%rcx), %r9 +# ifdef __CHKP__ + bndcu 15(%rdx), %bnd0 +# endif mov %r9, 8(%rdx) pmovmskb %xmm0, %rax @@ -72,6 +95,10 @@ ENTRY (__wcscpy_ssse3) jmp L(Shl12) L(Align16Both): +# ifdef __CHKP__ + bndcu 16(%rcx), %bnd1 + bndcu 15(%rdx), %bnd0 +# endif movaps (%rcx), %xmm1 movaps 16(%rcx), %xmm2 movaps %xmm1, (%rdx) @@ -82,6 +109,10 @@ L(Align16Both): test %rax, %rax jnz L(CopyFrom1To16Bytes) +# ifdef __CHKP__ + bndcu 16(%rcx, %rsi), %bnd1 + bndcu 15(%rdx, %rsi), %bnd0 +# endif movaps 16(%rcx, %rsi), %xmm3 movaps %xmm2, (%rdx, %rsi) pcmpeqd %xmm3, %xmm0 @@ -91,6 +122,10 @@ L(Align16Both): test %rax, %rax jnz L(CopyFrom1To16Bytes) +# ifdef __CHKP__ + bndcu 16(%rcx, %rsi), %bnd1 + bndcu 15(%rdx, %rsi), %bnd0 +# endif movaps 16(%rcx, %rsi), %xmm4 movaps %xmm3, (%rdx, %rsi) pcmpeqd %xmm4, %xmm0 @@ -100,6 +135,10 @@ L(Align16Both): test %rax, %rax jnz L(CopyFrom1To16Bytes) +# ifdef __CHKP__ + bndcu 16(%rcx, %rsi), %bnd1 + bndcu 15(%rdx, %rsi), %bnd0 +# endif movaps 16(%rcx, %rsi), %xmm1 movaps %xmm4, (%rdx, %rsi) pcmpeqd %xmm1, %xmm0 @@ -109,6 +148,10 @@ L(Align16Both): test %rax, %rax jnz L(CopyFrom1To16Bytes) +# ifdef __CHKP__ + bndcu 16(%rcx, %rsi), %bnd1 + bndcu 15(%rdx, %rsi), %bnd0 +# endif movaps 16(%rcx, %rsi), %xmm2 movaps %xmm1, (%rdx, %rsi) pcmpeqd %xmm2, %xmm0 @@ -118,6 +161,10 @@ L(Align16Both): test %rax, %rax jnz L(CopyFrom1To16Bytes) +# ifdef __CHKP__ + bndcu 16(%rcx, %rsi), %bnd1 + bndcu 15(%rdx, %rsi), %bnd0 +# endif movaps 16(%rcx, %rsi), %xmm3 movaps %xmm2, (%rdx, %rsi) pcmpeqd %xmm3, %xmm0 @@ -127,6 +174,10 @@ L(Align16Both): test %rax, %rax jnz L(CopyFrom1To16Bytes) +# ifdef __CHKP__ + bndcu 16(%rcx, %rsi), %bnd1 + bndcu 15(%rdx, %rsi), %bnd0 +# endif movaps %xmm3, (%rdx, %rsi) mov %rcx, %rax lea 16(%rcx, %rsi), %rcx @@ -138,6 +189,10 @@ L(Align16Both): .p2align 4 L(Aligned64Loop): +# ifdef __CHKP__ + bndcu (%rcx), %bnd1 + bndcu 63(%rdx), %bnd0 +# endif movaps (%rcx), %xmm2 movaps %xmm2, %xmm4 movaps 16(%rcx), %xmm5 @@ -168,6 +223,9 @@ L(Aligned64Leave): pcmpeqd %xmm5, %xmm0 pmovmskb %xmm0, %rax +# ifdef __CHKP__ + bndcu -49(%rdx), %bnd0 +# endif movaps %xmm4, -64(%rdx) test %rax, %rax lea 16(%rsi), %rsi @@ -176,11 +234,17 @@ L(Aligned64Leave): pcmpeqd %xmm6, %xmm0 pmovmskb %xmm0, %rax +# ifdef __CHKP__ + bndcu -33(%rdx), %bnd0 +# endif movaps %xmm5, -48(%rdx) test %rax, %rax lea 16(%rsi), %rsi jnz L(CopyFrom1To16Bytes) +# ifdef __CHKP__ + bndcu -17(%rdx), %bnd0 +# endif movaps %xmm6, -32(%rdx) pcmpeqd %xmm7, %xmm0 @@ -190,11 +254,17 @@ L(Aligned64Leave): jnz L(CopyFrom1To16Bytes) mov $-0x40, %rsi +# ifdef __CHKP__ + bndcu -1(%rdx), %bnd0 +# endif movaps %xmm7, -16(%rdx) jmp L(Aligned64Loop) .p2align 4 L(Shl4): +# ifdef __CHKP__ + bndcu 12(%rcx), %bnd1 +# endif movaps -4(%rcx), %xmm1 movaps 12(%rcx), %xmm2 L(Shl4Start): @@ -206,6 +276,10 @@ L(Shl4Start): jnz L(Shl4LoopExit) palignr $4, %xmm1, %xmm2 +# ifdef __CHKP__ + bndcu 28(%rcx), %bnd1 + bndcu 15(%rdx), %bnd0 +# endif movaps %xmm2, (%rdx) movaps 28(%rcx), %xmm2 @@ -219,6 +293,10 @@ L(Shl4Start): jnz L(Shl4LoopExit) palignr $4, %xmm3, %xmm2 +# ifdef __CHKP__ + bndcu 28(%rcx), %bnd1 + bndcu 15(%rdx), %bnd0 +# endif movaps %xmm2, (%rdx) movaps 28(%rcx), %xmm2 @@ -232,6 +310,10 @@ L(Shl4Start): jnz L(Shl4LoopExit) palignr $4, %xmm1, %xmm2 +# ifdef __CHKP__ + bndcu 28(%rcx), %bnd1 + bndcu 15(%rdx), %bnd0 +# endif movaps %xmm2, (%rdx) movaps 28(%rcx), %xmm2 @@ -244,6 +326,9 @@ L(Shl4Start): jnz L(Shl4LoopExit) palignr $4, %xmm3, %xmm2 +# ifdef __CHKP__ + bndcu 15(%rdx), %bnd0 +# endif movaps %xmm2, (%rdx) lea 28(%rcx), %rcx lea 16(%rdx), %rdx @@ -258,6 +343,9 @@ L(Shl4Start): .p2align 4 L(Shl4LoopStart): +# ifdef __CHKP__ + bndcu 12(%rcx), %bnd1 +# endif movaps 12(%rcx), %xmm2 movaps 28(%rcx), %xmm3 movaps %xmm3, %xmm6 @@ -279,6 +367,9 @@ L(Shl4LoopStart): lea 64(%rcx), %rcx palignr $4, %xmm1, %xmm2 movaps %xmm7, %xmm1 +# ifdef __CHKP__ + bndcu 63(%rdx), %bnd0 +# endif movaps %xmm5, 48(%rdx) movaps %xmm4, 32(%rdx) movaps %xmm3, 16(%rdx) @@ -287,6 +378,10 @@ L(Shl4LoopStart): jmp L(Shl4LoopStart) L(Shl4LoopExit): +# ifdef __CHKP__ + bndcu -4(%rcx), %bnd1 + bndcu 11(%rdx), %bnd0 +# endif movdqu -4(%rcx), %xmm1 mov $12, %rsi movdqu %xmm1, -4(%rdx) @@ -294,6 +389,9 @@ L(Shl4LoopExit): .p2align 4 L(Shl8): +# ifdef __CHKP__ + bndcu 8(%rcx), %bnd1 +# endif movaps -8(%rcx), %xmm1 movaps 8(%rcx), %xmm2 L(Shl8Start): @@ -305,6 +403,10 @@ L(Shl8Start): jnz L(Shl8LoopExit) palignr $8, %xmm1, %xmm2 +# ifdef __CHKP__ + bndcu 24(%rcx), %bnd1 + bndcu 15(%rdx), %bnd0 +# endif movaps %xmm2, (%rdx) movaps 24(%rcx), %xmm2 @@ -318,6 +420,10 @@ L(Shl8Start): jnz L(Shl8LoopExit) palignr $8, %xmm3, %xmm2 +# ifdef __CHKP__ + bndcu 24(%rcx), %bnd1 + bndcu 15(%rdx), %bnd0 +# endif movaps %xmm2, (%rdx) movaps 24(%rcx), %xmm2 @@ -331,6 +437,10 @@ L(Shl8Start): jnz L(Shl8LoopExit) palignr $8, %xmm1, %xmm2 +# ifdef __CHKP__ + bndcu 24(%rcx), %bnd1 + bndcu 15(%rdx), %bnd0 +# endif movaps %xmm2, (%rdx) movaps 24(%rcx), %xmm2 @@ -343,6 +453,10 @@ L(Shl8Start): jnz L(Shl8LoopExit) palignr $8, %xmm3, %xmm2 +# ifdef __CHKP__ + bndcu 24(%rcx), %bnd1 + bndcu 15(%rdx), %bnd0 +# endif movaps %xmm2, (%rdx) lea 24(%rcx), %rcx lea 16(%rdx), %rdx @@ -357,6 +471,9 @@ L(Shl8Start): .p2align 4 L(Shl8LoopStart): +# ifdef __CHKP__ + bndcu 8(%rcx), %bnd1 +# endif movaps 8(%rcx), %xmm2 movaps 24(%rcx), %xmm3 movaps %xmm3, %xmm6 @@ -378,6 +495,9 @@ L(Shl8LoopStart): lea 64(%rcx), %rcx palignr $8, %xmm1, %xmm2 movaps %xmm7, %xmm1 +# ifdef __CHKP__ + bndcu 63(%rdx), %bnd0 +# endif movaps %xmm5, 48(%rdx) movaps %xmm4, 32(%rdx) movaps %xmm3, 16(%rdx) @@ -386,6 +506,10 @@ L(Shl8LoopStart): jmp L(Shl8LoopStart) L(Shl8LoopExit): +# ifdef __CHKP__ + bndcu (%rcx), %bnd1 + bndcu 7(%rdx), %bnd0 +# endif mov (%rcx), %r9 mov $8, %rsi mov %r9, (%rdx) @@ -393,6 +517,9 @@ L(Shl8LoopExit): .p2align 4 L(Shl12): +# ifdef __CHKP__ + bndcu 4(%rcx), %bnd1 +# endif movaps -12(%rcx), %xmm1 movaps 4(%rcx), %xmm2 L(Shl12Start): @@ -404,6 +531,10 @@ L(Shl12Start): jnz L(Shl12LoopExit) palignr $12, %xmm1, %xmm2 +# ifdef __CHKP__ + bndcu 20(%rcx), %bnd1 + bndcu 15(%rdx), %bnd0 +# endif movaps %xmm2, (%rdx) movaps 20(%rcx), %xmm2 @@ -417,6 +548,10 @@ L(Shl12Start): jnz L(Shl12LoopExit) palignr $12, %xmm3, %xmm2 +# ifdef __CHKP__ + bndcu 20(%rcx), %bnd1 + bndcu 15(%rdx), %bnd0 +# endif movaps %xmm2, (%rdx) movaps 20(%rcx), %xmm2 @@ -430,6 +565,10 @@ L(Shl12Start): jnz L(Shl12LoopExit) palignr $12, %xmm1, %xmm2 +# ifdef __CHKP__ + bndcu 20(%rcx), %bnd1 + bndcu 15(%rdx), %bnd0 +# endif movaps %xmm2, (%rdx) movaps 20(%rcx), %xmm2 @@ -442,6 +581,10 @@ L(Shl12Start): jnz L(Shl12LoopExit) palignr $12, %xmm3, %xmm2 +# ifdef __CHKP__ + bndcu 20(%rcx), %bnd1 + bndcu 15(%rdx), %bnd0 +# endif movaps %xmm2, (%rdx) lea 20(%rcx), %rcx lea 16(%rdx), %rdx @@ -456,6 +599,9 @@ L(Shl12Start): .p2align 4 L(Shl12LoopStart): +# ifdef __CHKP__ + bndcu 4(%rcx), %bnd1 +# endif movaps 4(%rcx), %xmm2 movaps 20(%rcx), %xmm3 movaps %xmm3, %xmm6 @@ -476,6 +622,9 @@ L(Shl12LoopStart): lea 64(%rcx), %rcx palignr $12, %xmm1, %xmm2 movaps %xmm7, %xmm1 +# ifdef __CHKP__ + bndcu 63(%rdx), %bnd0 +# endif movaps %xmm5, 48(%rdx) movaps %xmm4, 32(%rdx) movaps %xmm3, 16(%rdx) @@ -484,6 +633,10 @@ L(Shl12LoopStart): jmp L(Shl12LoopStart) L(Shl12LoopExit): +# ifdef __CHKP__ + bndcu (%rcx), %bnd1 + bndcu 3(%rdx), %bnd0 +# endif mov (%rcx), %r9d mov $4, %rsi mov %r9d, (%rdx) @@ -500,6 +653,9 @@ L(CopyFrom1To16Bytes): jnz L(Exit4) mov (%rcx), %rax +# ifdef __CHKP__ + bndcu 7(%rdx), %bnd0 +# endif mov %rax, (%rdx) mov %rdi, %rax ret @@ -510,6 +666,9 @@ L(ExitHigh): jnz L(Exit12) mov (%rcx), %rax +# ifdef __CHKP__ + bndcu 15(%rdx), %bnd0 +# endif mov %rax, (%rdx) mov 8(%rcx), %rax mov %rax, 8(%rdx) @@ -519,6 +678,9 @@ L(ExitHigh): .p2align 4 L(Exit4): movl (%rcx), %eax +# ifdef __CHKP__ + bndcu 3(%rdx), %bnd0 +# endif movl %eax, (%rdx) mov %rdi, %rax ret @@ -526,6 +688,9 @@ L(Exit4): .p2align 4 L(Exit8): mov (%rcx), %rax +# ifdef __CHKP__ + bndcu 7(%rdx), %bnd0 +# endif mov %rax, (%rdx) mov %rdi, %rax ret @@ -533,6 +698,9 @@ L(Exit8): .p2align 4 L(Exit12): mov (%rcx), %rax +# ifdef __CHKP__ + bndcu 11(%rdx), %bnd0 +# endif mov %rax, (%rdx) mov 8(%rcx), %eax mov %eax, 8(%rdx) @@ -542,6 +710,9 @@ L(Exit12): .p2align 4 L(Exit16): mov (%rcx), %rax +# ifdef __CHKP__ + bndcu 15(%rdx), %bnd0 +# endif mov %rax, (%rdx) mov 8(%rcx), %rax mov %rax, 8(%rdx) diff --git a/sysdeps/x86_64/rawmemchr.S b/sysdeps/x86_64/rawmemchr.S index f4d559155c..2f4cb25e00 100644 --- a/sysdeps/x86_64/rawmemchr.S +++ b/sysdeps/x86_64/rawmemchr.S @@ -20,11 +20,23 @@ #include <sysdep.h> +#ifdef __CHKP__ +# define RETURN \ + bndcu (%rax), %bnd0; \ + ret +#else +# define RETURN ret +#endif + .text ENTRY (rawmemchr) movd %rsi, %xmm1 mov %rdi, %rcx +#ifdef __CHKP__ + bndcl (%rdi), %bnd0 +#endif + punpcklbw %xmm1, %xmm1 punpcklbw %xmm1, %xmm1 @@ -63,7 +75,7 @@ L(crosscache): add %rdi, %rax add %rcx, %rax - ret + RETURN .p2align 4 L(unaligned_no_match): @@ -71,24 +83,36 @@ L(unaligned_no_match): .p2align 4 L(loop_prolog): +#ifdef __CHKP__ + bndcu (%rdi), %bnd0 +#endif movdqa (%rdi), %xmm0 pcmpeqb %xmm1, %xmm0 pmovmskb %xmm0, %eax test %eax, %eax jnz L(matches) +#ifdef __CHKP__ + bndcu 16(%rdi), %bnd0 +#endif movdqa 16(%rdi), %xmm2 pcmpeqb %xmm1, %xmm2 pmovmskb %xmm2, %eax test %eax, %eax jnz L(matches16) +#ifdef __CHKP__ + bndcu 32(%rdi), %bnd0 +#endif movdqa 32(%rdi), %xmm3 pcmpeqb %xmm1, %xmm3 pmovmskb %xmm3, %eax test %eax, %eax jnz L(matches32) +#ifdef __CHKP__ + bndcu 48(%rdi), %bnd0 +#endif movdqa 48(%rdi), %xmm4 pcmpeqb %xmm1, %xmm4 add $64, %rdi @@ -99,24 +123,36 @@ L(loop_prolog): test $0x3f, %rdi jz L(align64_loop) +#ifdef __CHKP__ + bndcu (%rdi), %bnd0 +#endif movdqa (%rdi), %xmm0 pcmpeqb %xmm1, %xmm0 pmovmskb %xmm0, %eax test %eax, %eax jnz L(matches) +#ifdef __CHKP__ + bndcu 16(%rdi), %bnd0 +#endif movdqa 16(%rdi), %xmm2 pcmpeqb %xmm1, %xmm2 pmovmskb %xmm2, %eax test %eax, %eax jnz L(matches16) +#ifdef __CHKP__ + bndcu 32(%rdi), %bnd0 +#endif movdqa 32(%rdi), %xmm3 pcmpeqb %xmm1, %xmm3 pmovmskb %xmm3, %eax test %eax, %eax jnz L(matches32) +#ifdef __CHKP__ + bndcu 48(%rdi), %bnd0 +#endif movdqa 48(%rdi), %xmm3 pcmpeqb %xmm1, %xmm3 pmovmskb %xmm3, %eax @@ -129,6 +165,9 @@ L(loop_prolog): .p2align 4 L(align64_loop): +#ifdef __CHKP__ + bndcu (%rdi), %bnd0 +#endif movdqa (%rdi), %xmm0 movdqa 16(%rdi), %xmm2 movdqa 32(%rdi), %xmm3 @@ -170,36 +209,36 @@ L(align64_loop): pmovmskb %xmm1, %eax bsf %eax, %eax lea 48(%rdi, %rax), %rax - ret + RETURN .p2align 4 L(matches0): bsf %eax, %eax lea -16(%rax, %rdi), %rax - ret + RETURN .p2align 4 L(matches): bsf %eax, %eax add %rdi, %rax - ret + RETURN .p2align 4 L(matches16): bsf %eax, %eax lea 16(%rax, %rdi), %rax - ret + RETURN .p2align 4 L(matches32): bsf %eax, %eax lea 32(%rax, %rdi), %rax - ret + RETURN .p2align 4 L(return_null): xor %rax, %rax - ret + RETURN END (rawmemchr) diff --git a/sysdeps/x86_64/stpcpy_chk-c.c b/sysdeps/x86_64/stpcpy_chk-c.c new file mode 100644 index 0000000000..900fa761cc --- /dev/null +++ b/sysdeps/x86_64/stpcpy_chk-c.c @@ -0,0 +1 @@ +#include <debug/stpcpy_chk.c> diff --git a/sysdeps/x86_64/strcat.S b/sysdeps/x86_64/strcat.S index 8bea6fb5db..783237937d 100644 --- a/sysdeps/x86_64/strcat.S +++ b/sysdeps/x86_64/strcat.S @@ -25,6 +25,11 @@ .text ENTRY (strcat) +#ifdef __CHKP__ + bndcl (%rdi), %bnd0 + bndcl (%rsi), %bnd1 +#endif + movq %rdi, %rcx /* Dest. register. */ andl $7, %ecx /* mask alignment bits */ movq %rdi, %rax /* Duplicate destination pointer. */ @@ -36,7 +41,11 @@ ENTRY (strcat) neg %ecx /* We need to align to 8 bytes. */ addl $8,%ecx /* Search the first bytes directly. */ -0: cmpb $0x0,(%rax) /* is byte NUL? */ +0: +#ifdef __CHKP__ + bndcu (%rax), %bnd0 +#endif + cmpb $0x0,(%rax) /* is byte NUL? */ je 2f /* yes => start copy */ incq %rax /* increment pointer */ decl %ecx @@ -48,6 +57,9 @@ ENTRY (strcat) .p2align 4 4: /* First unroll. */ +#ifdef __CHKP__ + bndcu (%rax), %bnd0 +#endif movq (%rax), %rcx /* get double word (= 8 bytes) in question */ addq $8,%rax /* adjust pointer for next word */ movq %r8, %rdx /* magic value */ @@ -62,6 +74,9 @@ ENTRY (strcat) jnz 3f /* found NUL => return pointer */ /* Second unroll. */ +#ifdef __CHKP__ + bndcu (%rax), %bnd0 +#endif movq (%rax), %rcx /* get double word (= 8 bytes) in question */ addq $8,%rax /* adjust pointer for next word */ movq %r8, %rdx /* magic value */ @@ -76,6 +91,9 @@ ENTRY (strcat) jnz 3f /* found NUL => return pointer */ /* Third unroll. */ +#ifdef __CHKP__ + bndcu (%rax), %bnd0 +#endif movq (%rax), %rcx /* get double word (= 8 bytes) in question */ addq $8,%rax /* adjust pointer for next word */ movq %r8, %rdx /* magic value */ @@ -90,6 +108,9 @@ ENTRY (strcat) jnz 3f /* found NUL => return pointer */ /* Fourth unroll. */ +#ifdef __CHKP__ + bndcu (%rax), %bnd0 +#endif movq (%rax), %rcx /* get double word (= 8 bytes) in question */ addq $8,%rax /* adjust pointer for next word */ movq %r8, %rdx /* magic value */ @@ -163,6 +184,9 @@ ENTRY (strcat) .p2align 4 22: /* 1st unroll. */ +#ifdef __CHKP__ + bndcu (%rsi), %bnd1 +#endif movq (%rsi), %rax /* Read double word (8 bytes). */ addq $8, %rsi /* Adjust pointer for next word. */ movq %rax, %r9 /* Save a copy for NUL finding. */ @@ -177,10 +201,16 @@ ENTRY (strcat) jnz 23f /* found NUL => return pointer */ +#ifdef __CHKP__ + bndcu (%rdx), %bnd0 +#endif movq %rax, (%rdx) /* Write value to destination. */ addq $8, %rdx /* Adjust pointer. */ /* 2nd unroll. */ +#ifdef __CHKP__ + bndcu (%rsi), %bnd1 +#endif movq (%rsi), %rax /* Read double word (8 bytes). */ addq $8, %rsi /* Adjust pointer for next word. */ movq %rax, %r9 /* Save a copy for NUL finding. */ @@ -195,10 +225,16 @@ ENTRY (strcat) jnz 23f /* found NUL => return pointer */ +#ifdef __CHKP__ + bndcu (%rdx), %bnd0 +#endif movq %rax, (%rdx) /* Write value to destination. */ addq $8, %rdx /* Adjust pointer. */ /* 3rd unroll. */ +#ifdef __CHKP__ + bndcu (%rsi), %bnd1 +#endif movq (%rsi), %rax /* Read double word (8 bytes). */ addq $8, %rsi /* Adjust pointer for next word. */ movq %rax, %r9 /* Save a copy for NUL finding. */ @@ -213,10 +249,16 @@ ENTRY (strcat) jnz 23f /* found NUL => return pointer */ +#ifdef __CHKP__ + bndcu (%rdx), %bnd0 +#endif movq %rax, (%rdx) /* Write value to destination. */ addq $8, %rdx /* Adjust pointer. */ /* 4th unroll. */ +#ifdef __CHKP__ + bndcu (%rsi), %bnd1 +#endif movq (%rsi), %rax /* Read double word (8 bytes). */ addq $8, %rsi /* Adjust pointer for next word. */ movq %rax, %r9 /* Save a copy for NUL finding. */ @@ -231,6 +273,9 @@ ENTRY (strcat) jnz 23f /* found NUL => return pointer */ +#ifdef __CHKP__ + bndcu (%rdx), %bnd0 +#endif movq %rax, (%rdx) /* Write value to destination. */ addq $8, %rdx /* Adjust pointer. */ jmp 22b /* Next iteration. */ @@ -239,10 +284,16 @@ ENTRY (strcat) The loop is unrolled twice. */ .p2align 4 23: +#ifdef __CHKP__ + bndcu (%rdx), %bnd0 +#endif movb %al, (%rdx) /* 1st byte. */ testb %al, %al /* Is it NUL. */ jz 24f /* yes, finish. */ incq %rdx /* Increment destination. */ +#ifdef __CHKP__ + bndcu (%rdx), %bnd0 +#endif movb %ah, (%rdx) /* 2nd byte. */ testb %ah, %ah /* Is it NUL?. */ jz 24f /* yes, finish. */ diff --git a/sysdeps/x86_64/strchr.S b/sysdeps/x86_64/strchr.S index d89f1eba8f..8519a81117 100644 --- a/sysdeps/x86_64/strchr.S +++ b/sysdeps/x86_64/strchr.S @@ -22,6 +22,10 @@ .text ENTRY (strchr) +#ifdef __CHKP__ + bndcl (%rdi), %bnd0 + bndcu (%rdi), %bnd0 +#endif movd %esi, %xmm1 movq %rdi, %rcx punpcklbw %xmm1, %xmm1 @@ -29,6 +33,9 @@ ENTRY (strchr) pxor %xmm2, %xmm2 punpcklbw %xmm1, %xmm1 orl $0xffffffff, %esi +#ifdef __CHKP__ + bndcu (%rdi), %bnd0 +#endif movdqa (%rdi), %xmm0 pshufd $0, %xmm1, %xmm1 subq %rdi, %rcx @@ -44,7 +51,11 @@ ENTRY (strchr) orl %edx, %ecx jnz 1f -2: movdqa (%rdi), %xmm0 +2: +#ifdef __CHKP__ + bndcu (%rdi), %bnd0 +#endif + movdqa (%rdi), %xmm0 leaq 16(%rdi), %rdi movdqa %xmm0, %xmm3 pcmpeqb %xmm1, %xmm0 diff --git a/sysdeps/x86_64/strchrnul.S b/sysdeps/x86_64/strchrnul.S index d8c345ba7d..3e4abfa217 100644 --- a/sysdeps/x86_64/strchrnul.S +++ b/sysdeps/x86_64/strchrnul.S @@ -23,6 +23,10 @@ .text ENTRY (__strchrnul) +#ifdef __CHKP__ + bndcl (%rdi), %bnd0 + bndcu (%rdi), %bnd0 +#endif movd %esi, %xmm1 movq %rdi, %rcx punpcklbw %xmm1, %xmm1 @@ -44,7 +48,11 @@ ENTRY (__strchrnul) andl %esi, %ecx jnz 1f -2: movdqa (%rdi), %xmm0 +2: +#ifdef __CHKP__ + bndcu (%rdi), %bnd0 +#endif + movdqa (%rdi), %xmm0 leaq 16(%rdi), %rdi movdqa %xmm0, %xmm3 pcmpeqb %xmm1, %xmm0 @@ -56,6 +64,9 @@ ENTRY (__strchrnul) 1: bsfl %ecx, %edx leaq -16(%rdi,%rdx), %rax +#ifdef __CHKP__ + bndcu (%rax), %bnd0 +#endif ret END (__strchrnul) diff --git a/sysdeps/x86_64/strcmp.S b/sysdeps/x86_64/strcmp.S index 76809373e8..89597841ad 100644 --- a/sysdeps/x86_64/strcmp.S +++ b/sysdeps/x86_64/strcmp.S @@ -128,7 +128,16 @@ libc_hidden_def (__strncasecmp) ENTRY (STRCMP) #ifdef NOT_IN_libc /* Simple version since we can't use SSE registers in ld.so. */ -L(oop): movb (%rdi), %al +#ifdef __CHKP__ + bndcl (%rdi), %bnd0 + bndcl (%rsi), %bnd1 +#endif +L(oop): +#ifdef __CHKP__ + bndcu (%rdi), %bnd0 + bndcu (%rsi), %bnd1 +#endif + movb (%rdi), %al cmpb (%rsi), %al jne L(neq) incq %rdi @@ -177,6 +186,12 @@ END (STRCMP) je LABEL(Byte0) mov %rdx, %r11 # endif +#ifdef __CHKP__ + bndcl (%rdi), %bnd0 + bndcu (%rdi), %bnd0 + bndcl (%rsi), %bnd1 + bndcu (%rsi), %bnd1 +#endif mov %esi, %ecx mov %edi, %eax /* Use 64bit AND here to avoid long NOP padding. */ @@ -243,6 +258,10 @@ END (STRCMP) # endif add $16, %rsi /* prepare to search next 16 bytes */ add $16, %rdi /* prepare to search next 16 bytes */ +#ifdef __CHKP__ + bndcu (%rdi), %bnd0 + bndcu (%rsi), %bnd1 +#endif /* * Determine source and destination string offsets from 16-byte alignment. @@ -263,6 +282,11 @@ LABEL(crosscache): mov %edx, %r8d /* r8d is offset flag for exit tail */ xchg %ecx, %eax xchg %rsi, %rdi +#ifdef __CHKP__ + bndmov %bnd0, %bnd2 + bndmov %bnd1, %bnd0 + bndmov %bnd2, %bnd1 +#endif LABEL(bigger): lea 15(%rax), %r9 sub %rcx, %r9 @@ -310,6 +334,10 @@ LABEL(ashr_0): */ .p2align 4 LABEL(loop_ashr_0): +#ifdef __CHKP__ + bndcu -1(%rdi, %rcx), %bnd0 + bndcu -1(%rsi, %rcx), %bnd1 +#endif movdqa (%rsi, %rcx), %xmm1 movdqa (%rdi, %rcx), %xmm2 TOLOWER (%xmm1, %xmm2) @@ -326,6 +354,10 @@ LABEL(loop_ashr_0): jbe LABEL(strcmp_exitz) # endif add $16, %rcx +#ifdef __CHKP__ + bndcu -1(%rdi, %rcx), %bnd0 + bndcu -1(%rsi, %rcx), %bnd1 +#endif movdqa (%rsi, %rcx), %xmm1 movdqa (%rdi, %rcx), %xmm2 TOLOWER (%xmm1, %xmm2) @@ -377,6 +409,15 @@ LABEL(ashr_1): lea 1(%rdi), %r10 and $0xfff, %r10 /* offset into 4K page */ sub $0x1000, %r10 /* subtract 4K pagesize */ +# ifdef __CHKP__ + bndcu -16(%rdi, %rcx), %bnd0 + bndcu -16(%rsi, %rcx), %bnd1 + jmp LABEL(loop_ashr_1) +LABEL(ashr_1_check): + bndcu (%rdi, %rcx), %bnd0 + bndcu (%rsi, %rcx), %bnd1 + jmp LABEL(gobble_ashr_1) +# endif .p2align 4 LABEL(loop_ashr_1): @@ -460,7 +501,11 @@ LABEL(nibble_ashr_1): pxor %xmm0, %xmm0 sub $0x1000, %r10 /* substract 4K from %r10 */ +# ifdef __CHKP__ + jmp LABEL(ashr_1_check) +# else jmp LABEL(gobble_ashr_1) +# endif /* * Once find null char, determine if there is a string mismatch @@ -507,6 +552,15 @@ LABEL(ashr_2): lea 2(%rdi), %r10 and $0xfff, %r10 /* offset into 4K page */ sub $0x1000, %r10 /* subtract 4K pagesize */ +# ifdef __CHKP__ + bndcu -16(%rdi, %rcx), %bnd0 + bndcu -16(%rsi, %rcx), %bnd1 + jmp LABEL(loop_ashr_2) +LABEL(ashr_2_check): + bndcu (%rdi, %rcx), %bnd0 + bndcu (%rsi, %rcx), %bnd1 + jmp LABEL(gobble_ashr_2) +# endif .p2align 4 LABEL(loop_ashr_2): @@ -588,7 +642,11 @@ LABEL(nibble_ashr_2): pxor %xmm0, %xmm0 sub $0x1000, %r10 +# ifdef __CHKP__ + jmp LABEL(ashr_2_check) +# else jmp LABEL(gobble_ashr_2) +# endif .p2align 4 LABEL(ashr_2_exittail): @@ -632,6 +690,15 @@ LABEL(ashr_3): lea 3(%rdi), %r10 and $0xfff, %r10 /* offset into 4K page */ sub $0x1000, %r10 /* subtract 4K pagesize */ +# ifdef __CHKP__ + bndcu -16(%rdi, %rcx), %bnd0 + bndcu -16(%rsi, %rcx), %bnd1 + jmp LABEL(loop_ashr_3) +LABEL(ashr_3_check): + bndcu (%rdi, %rcx), %bnd0 + bndcu (%rsi, %rcx), %bnd1 + jmp LABEL(gobble_ashr_3) +# endif .p2align 4 LABEL(loop_ashr_3): @@ -713,7 +780,11 @@ LABEL(nibble_ashr_3): pxor %xmm0, %xmm0 sub $0x1000, %r10 +# ifdef __CHKP__ + jmp LABEL(ashr_3_check) +# else jmp LABEL(gobble_ashr_3) +# endif .p2align 4 LABEL(ashr_3_exittail): @@ -757,6 +828,15 @@ LABEL(ashr_4): lea 4(%rdi), %r10 and $0xfff, %r10 /* offset into 4K page */ sub $0x1000, %r10 /* subtract 4K pagesize */ +# ifdef __CHKP__ + bndcu -16(%rdi, %rcx), %bnd0 + bndcu -16(%rsi, %rcx), %bnd1 + jmp LABEL(loop_ashr_4) +LABEL(ashr_4_check): + bndcu (%rdi, %rcx), %bnd0 + bndcu (%rsi, %rcx), %bnd1 + jmp LABEL(gobble_ashr_4) +# endif .p2align 4 LABEL(loop_ashr_4): @@ -838,7 +918,11 @@ LABEL(nibble_ashr_4): pxor %xmm0, %xmm0 sub $0x1000, %r10 +# ifdef __CHKP__ + jmp LABEL(ashr_4_check) +# else jmp LABEL(gobble_ashr_4) +# endif .p2align 4 LABEL(ashr_4_exittail): @@ -882,6 +966,15 @@ LABEL(ashr_5): lea 5(%rdi), %r10 and $0xfff, %r10 /* offset into 4K page */ sub $0x1000, %r10 /* subtract 4K pagesize */ +# ifdef __CHKP__ + bndcu -16(%rdi, %rcx), %bnd0 + bndcu -16(%rsi, %rcx), %bnd1 + jmp LABEL(loop_ashr_5) +LABEL(ashr_5_check): + bndcu (%rdi, %rcx), %bnd0 + bndcu (%rsi, %rcx), %bnd1 + jmp LABEL(gobble_ashr_5) +# endif .p2align 4 LABEL(loop_ashr_5): @@ -963,7 +1056,11 @@ LABEL(nibble_ashr_5): pxor %xmm0, %xmm0 sub $0x1000, %r10 +# ifdef __CHKP__ + jmp LABEL(ashr_5_check) +# else jmp LABEL(gobble_ashr_5) +# endif .p2align 4 LABEL(ashr_5_exittail): @@ -1007,6 +1104,15 @@ LABEL(ashr_6): lea 6(%rdi), %r10 and $0xfff, %r10 /* offset into 4K page */ sub $0x1000, %r10 /* subtract 4K pagesize */ +# ifdef __CHKP__ + bndcu -16(%rdi, %rcx), %bnd0 + bndcu -16(%rsi, %rcx), %bnd1 + jmp LABEL(loop_ashr_6) +LABEL(ashr_6_check): + bndcu (%rdi, %rcx), %bnd0 + bndcu (%rsi, %rcx), %bnd1 + jmp LABEL(gobble_ashr_6) +# endif .p2align 4 LABEL(loop_ashr_6): @@ -1088,7 +1194,11 @@ LABEL(nibble_ashr_6): pxor %xmm0, %xmm0 sub $0x1000, %r10 +# ifdef __CHKP__ + jmp LABEL(ashr_6_check) +# else jmp LABEL(gobble_ashr_6) +# endif .p2align 4 LABEL(ashr_6_exittail): @@ -1132,6 +1242,15 @@ LABEL(ashr_7): lea 7(%rdi), %r10 and $0xfff, %r10 /* offset into 4K page */ sub $0x1000, %r10 /* subtract 4K pagesize */ +# ifdef __CHKP__ + bndcu -16(%rdi, %rcx), %bnd0 + bndcu -16(%rsi, %rcx), %bnd1 + jmp LABEL(loop_ashr_7) +LABEL(ashr_7_check): + bndcu (%rdi, %rcx), %bnd0 + bndcu (%rsi, %rcx), %bnd1 + jmp LABEL(gobble_ashr_7) +# endif .p2align 4 LABEL(loop_ashr_7): @@ -1213,7 +1332,11 @@ LABEL(nibble_ashr_7): pxor %xmm0, %xmm0 sub $0x1000, %r10 +# ifdef __CHKP__ + jmp LABEL(ashr_7_check) +# else jmp LABEL(gobble_ashr_7) +# endif .p2align 4 LABEL(ashr_7_exittail): @@ -1257,6 +1380,15 @@ LABEL(ashr_8): lea 8(%rdi), %r10 and $0xfff, %r10 /* offset into 4K page */ sub $0x1000, %r10 /* subtract 4K pagesize */ +# ifdef __CHKP__ + bndcu -16(%rdi, %rcx), %bnd0 + bndcu -16(%rsi, %rcx), %bnd1 + jmp LABEL(loop_ashr_8) +LABEL(ashr_8_check): + bndcu (%rdi, %rcx), %bnd0 + bndcu (%rsi, %rcx), %bnd1 + jmp LABEL(gobble_ashr_8) +# endif .p2align 4 LABEL(loop_ashr_8): @@ -1338,7 +1470,11 @@ LABEL(nibble_ashr_8): pxor %xmm0, %xmm0 sub $0x1000, %r10 +# ifdef __CHKP__ + jmp LABEL(ashr_8_check) +# else jmp LABEL(gobble_ashr_8) +# endif .p2align 4 LABEL(ashr_8_exittail): @@ -1382,6 +1518,15 @@ LABEL(ashr_9): lea 9(%rdi), %r10 and $0xfff, %r10 /* offset into 4K page */ sub $0x1000, %r10 /* subtract 4K pagesize */ +# ifdef __CHKP__ + bndcu -16(%rdi, %rcx), %bnd0 + bndcu -16(%rsi, %rcx), %bnd1 + jmp LABEL(loop_ashr_9) +LABEL(ashr_9_check): + bndcu (%rdi, %rcx), %bnd0 + bndcu (%rsi, %rcx), %bnd1 + jmp LABEL(gobble_ashr_9) +# endif .p2align 4 LABEL(loop_ashr_9): @@ -1463,7 +1608,11 @@ LABEL(nibble_ashr_9): pxor %xmm0, %xmm0 sub $0x1000, %r10 +# ifdef __CHKP__ + jmp LABEL(ashr_9_check) +# else jmp LABEL(gobble_ashr_9) +# endif .p2align 4 LABEL(ashr_9_exittail): @@ -1507,6 +1656,15 @@ LABEL(ashr_10): lea 10(%rdi), %r10 and $0xfff, %r10 /* offset into 4K page */ sub $0x1000, %r10 /* subtract 4K pagesize */ +# ifdef __CHKP__ + bndcu -16(%rdi, %rcx), %bnd0 + bndcu -16(%rsi, %rcx), %bnd1 + jmp LABEL(loop_ashr_10) +LABEL(ashr_10_check): + bndcu (%rdi, %rcx), %bnd0 + bndcu (%rsi, %rcx), %bnd1 + jmp LABEL(gobble_ashr_10) +# endif .p2align 4 LABEL(loop_ashr_10): @@ -1588,7 +1746,11 @@ LABEL(nibble_ashr_10): pxor %xmm0, %xmm0 sub $0x1000, %r10 +# ifdef __CHKP__ + jmp LABEL(ashr_10_check) +# else jmp LABEL(gobble_ashr_10) +# endif .p2align 4 LABEL(ashr_10_exittail): @@ -1632,6 +1794,15 @@ LABEL(ashr_11): lea 11(%rdi), %r10 and $0xfff, %r10 /* offset into 4K page */ sub $0x1000, %r10 /* subtract 4K pagesize */ +# ifdef __CHKP__ + bndcu -16(%rdi, %rcx), %bnd0 + bndcu -16(%rsi, %rcx), %bnd1 + jmp LABEL(loop_ashr_11) +LABEL(ashr_11_check): + bndcu (%rdi, %rcx), %bnd0 + bndcu (%rsi, %rcx), %bnd1 + jmp LABEL(gobble_ashr_11) +# endif .p2align 4 LABEL(loop_ashr_11): @@ -1713,7 +1884,11 @@ LABEL(nibble_ashr_11): pxor %xmm0, %xmm0 sub $0x1000, %r10 +# ifdef __CHKP__ + jmp LABEL(ashr_11_check) +# else jmp LABEL(gobble_ashr_11) +# endif .p2align 4 LABEL(ashr_11_exittail): @@ -1757,6 +1932,15 @@ LABEL(ashr_12): lea 12(%rdi), %r10 and $0xfff, %r10 /* offset into 4K page */ sub $0x1000, %r10 /* subtract 4K pagesize */ +# ifdef __CHKP__ + bndcu -16(%rdi, %rcx), %bnd0 + bndcu -16(%rsi, %rcx), %bnd1 + jmp LABEL(loop_ashr_12) +LABEL(ashr_12_check): + bndcu (%rdi, %rcx), %bnd0 + bndcu (%rsi, %rcx), %bnd1 + jmp LABEL(gobble_ashr_12) +# endif .p2align 4 LABEL(loop_ashr_12): @@ -1838,7 +2022,11 @@ LABEL(nibble_ashr_12): pxor %xmm0, %xmm0 sub $0x1000, %r10 +# ifdef __CHKP__ + jmp LABEL(ashr_12_check) +# else jmp LABEL(gobble_ashr_12) +# endif .p2align 4 LABEL(ashr_12_exittail): @@ -1882,6 +2070,15 @@ LABEL(ashr_13): lea 13(%rdi), %r10 and $0xfff, %r10 /* offset into 4K page */ sub $0x1000, %r10 /* subtract 4K pagesize */ +# ifdef __CHKP__ + bndcu -16(%rdi, %rcx), %bnd0 + bndcu -16(%rsi, %rcx), %bnd1 + jmp LABEL(loop_ashr_13) +LABEL(ashr_13_check): + bndcu (%rdi, %rcx), %bnd0 + bndcu (%rsi, %rcx), %bnd1 + jmp LABEL(gobble_ashr_13) +# endif .p2align 4 LABEL(loop_ashr_13): @@ -1963,7 +2160,11 @@ LABEL(nibble_ashr_13): pxor %xmm0, %xmm0 sub $0x1000, %r10 +# ifdef __CHKP__ + jmp LABEL(ashr_13_check) +# else jmp LABEL(gobble_ashr_13) +# endif .p2align 4 LABEL(ashr_13_exittail): @@ -2007,6 +2208,15 @@ LABEL(ashr_14): lea 14(%rdi), %r10 and $0xfff, %r10 /* offset into 4K page */ sub $0x1000, %r10 /* subtract 4K pagesize */ +# ifdef __CHKP__ + bndcu -16(%rdi, %rcx), %bnd0 + bndcu -16(%rsi, %rcx), %bnd1 + jmp LABEL(loop_ashr_14) +LABEL(ashr_14_check): + bndcu (%rdi, %rcx), %bnd0 + bndcu (%rsi, %rcx), %bnd1 + jmp LABEL(gobble_ashr_14) +# endif .p2align 4 LABEL(loop_ashr_14): @@ -2088,7 +2298,11 @@ LABEL(nibble_ashr_14): pxor %xmm0, %xmm0 sub $0x1000, %r10 +# ifdef __CHKP__ + jmp LABEL(ashr_14_check) +# else jmp LABEL(gobble_ashr_14) +# endif .p2align 4 LABEL(ashr_14_exittail): @@ -2134,6 +2348,15 @@ LABEL(ashr_15): and $0xfff, %r10 /* offset into 4K page */ sub $0x1000, %r10 /* subtract 4K pagesize */ +# ifdef __CHKP__ + bndcu -16(%rdi, %rcx), %bnd0 + bndcu -16(%rsi, %rcx), %bnd1 + jmp LABEL(loop_ashr_15) +LABEL(ashr_15_check): + bndcu (%rdi, %rcx), %bnd0 + bndcu (%rsi, %rcx), %bnd1 + jmp LABEL(gobble_ashr_15) +# endif .p2align 4 LABEL(loop_ashr_15): @@ -2215,7 +2438,11 @@ LABEL(nibble_ashr_15): pxor %xmm0, %xmm0 sub $0x1000, %r10 +# ifdef __CHKP__ + jmp LABEL(ashr_15_check) +# else jmp LABEL(gobble_ashr_15) +# endif .p2align 4 LABEL(ashr_15_exittail): @@ -2240,6 +2467,11 @@ LABEL(less32bytes): test %r8d, %r8d jz LABEL(ret) xchg %rsi, %rdi /* recover original order according to flag(%r8d) */ +#ifdef __CHKP__ + bndmov %bnd0, %bnd2 + bndmov %bnd1, %bnd0 + bndmov %bnd2, %bnd1 +#endif .p2align 4 LABEL(ret): @@ -2250,6 +2482,10 @@ LABEL(less16bytes): sub %rdx, %r11 jbe LABEL(strcmp_exitz) # endif +/*#ifdef __CHKP__ + bndcu (%rdi, %rdx), %bnd0 + bndcu (%rsi, %rdx), %bnd1 +#endif*/ movzbl (%rsi, %rdx), %ecx movzbl (%rdi, %rdx), %eax diff --git a/sysdeps/x86_64/strcpy.S b/sysdeps/x86_64/strcpy.S index 612824700a..2b78e95427 100644 --- a/sysdeps/x86_64/strcpy.S +++ b/sysdeps/x86_64/strcpy.S @@ -26,6 +26,10 @@ .text ENTRY (STRCPY) +#ifdef __CHKP__ + bndcl (%rdi), %bnd0 + bndcl (%rsi), %bnd1 +#endif movq %rsi, %rcx /* Source register. */ andl $7, %ecx /* mask alignment bits */ movq %rdi, %rdx /* Duplicate destination pointer. */ @@ -36,8 +40,14 @@ ENTRY (STRCPY) addl $8,%ecx /* Search the first bytes directly. */ 0: +#ifdef __CHKP__ + bndcu (%rsi), %bnd1 +#endif movb (%rsi), %al /* Fetch a byte */ testb %al, %al /* Is it NUL? */ +#ifdef __CHKP__ + bndcu (%rdx), %bnd0 +#endif movb %al, (%rdx) /* Store it */ jz 4f /* If it was NUL, done! */ incq %rsi @@ -54,6 +64,9 @@ ENTRY (STRCPY) .p2align 4 1: /* 1st unroll. */ +#ifdef __CHKP__ + bndcu (%rsi), %bnd1 +#endif movq (%rsi), %rax /* Read double word (8 bytes). */ addq $8, %rsi /* Adjust pointer for next word. */ movq %rax, %r9 /* Save a copy for NUL finding. */ @@ -68,10 +81,16 @@ ENTRY (STRCPY) jnz 3f /* found NUL => return pointer */ +#ifdef __CHKP__ + bndcu (%rdx), %bnd0 +#endif movq %rax, (%rdx) /* Write value to destination. */ addq $8, %rdx /* Adjust pointer. */ /* 2nd unroll. */ +#ifdef __CHKP__ + bndcu (%rsi), %bnd1 +#endif movq (%rsi), %rax /* Read double word (8 bytes). */ addq $8, %rsi /* Adjust pointer for next word. */ movq %rax, %r9 /* Save a copy for NUL finding. */ @@ -86,10 +105,16 @@ ENTRY (STRCPY) jnz 3f /* found NUL => return pointer */ +#ifdef __CHKP__ + bndcu (%rdx), %bnd0 +#endif movq %rax, (%rdx) /* Write value to destination. */ addq $8, %rdx /* Adjust pointer. */ /* 3rd unroll. */ +#ifdef __CHKP__ + bndcu (%rsi), %bnd1 +#endif movq (%rsi), %rax /* Read double word (8 bytes). */ addq $8, %rsi /* Adjust pointer for next word. */ movq %rax, %r9 /* Save a copy for NUL finding. */ @@ -104,10 +129,16 @@ ENTRY (STRCPY) jnz 3f /* found NUL => return pointer */ +#ifdef __CHKP__ + bndcu (%rdx), %bnd0 +#endif movq %rax, (%rdx) /* Write value to destination. */ addq $8, %rdx /* Adjust pointer. */ /* 4th unroll. */ +#ifdef __CHKP__ + bndcu (%rsi), %bnd1 +#endif movq (%rsi), %rax /* Read double word (8 bytes). */ addq $8, %rsi /* Adjust pointer for next word. */ movq %rax, %r9 /* Save a copy for NUL finding. */ @@ -122,6 +153,9 @@ ENTRY (STRCPY) jnz 3f /* found NUL => return pointer */ +#ifdef __CHKP__ + bndcu (%rdx), %bnd0 +#endif movq %rax, (%rdx) /* Write value to destination. */ addq $8, %rdx /* Adjust pointer. */ jmp 1b /* Next iteration. */ @@ -132,10 +166,16 @@ ENTRY (STRCPY) 3: /* Note that stpcpy needs to return with the value of the NUL byte. */ +#ifdef __CHKP__ + bndcu (%rdx), %bnd0 +#endif movb %al, (%rdx) /* 1st byte. */ testb %al, %al /* Is it NUL. */ jz 4f /* yes, finish. */ incq %rdx /* Increment destination. */ +#ifdef __CHKP__ + bndcu (%rdx), %bnd0 +#endif movb %ah, (%rdx) /* 2nd byte. */ testb %ah, %ah /* Is it NUL?. */ jz 4f /* yes, finish. */ diff --git a/sysdeps/x86_64/strcpy_chk-c.c b/sysdeps/x86_64/strcpy_chk-c.c new file mode 100644 index 0000000000..b48704cec4 --- /dev/null +++ b/sysdeps/x86_64/strcpy_chk-c.c @@ -0,0 +1 @@ +#include <debug/strcpy_chk.c> diff --git a/sysdeps/x86_64/strcpy_chk.S b/sysdeps/x86_64/strcpy_chk.S index 7e171deefc..a1028204c5 100644 --- a/sysdeps/x86_64/strcpy_chk.S +++ b/sysdeps/x86_64/strcpy_chk.S @@ -18,6 +18,7 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ +#if !defined __CHKP__ && !defined __CHKWR__ #include <sysdep.h> #include "asm-syntax.h" @@ -206,3 +207,4 @@ ENTRY (STRCPY_CHK) jmp HIDDEN_JUMPTARGET (__chk_fail) END (STRCPY_CHK) +#endif diff --git a/sysdeps/x86_64/strcspn.S b/sysdeps/x86_64/strcspn.S index 65f8a9e96e..0acca21399 100644 --- a/sysdeps/x86_64/strcspn.S +++ b/sysdeps/x86_64/strcspn.S @@ -29,6 +29,12 @@ .text ENTRY (strcspn) +# ifdef __CHKP__ + bndcl (%rdi), %bnd0 + bndcu (%rdi), %bnd0 + bndcl (%rsi), %bnd1 + bndcu (%rsi), %bnd1 +# endif movq %rdi, %rdx /* Save SRC. */ @@ -54,21 +60,34 @@ ENTRY (strcspn) have a correct zero-extended 64-bit value in %rcx. */ .p2align 4 -L(2): movb (%rax), %cl /* get byte from skipset */ +L(2): +# ifdef __CHKP__ + bndcu (%rax), %bnd1 +# endif + movb (%rax), %cl /* get byte from skipset */ testb %cl, %cl /* is NUL char? */ jz L(1) /* yes => start compare loop */ movb %cl, (%rsp,%rcx) /* set corresponding byte in skipset table */ +# ifdef __CHKP__ + bndcu 1(%rax), %bnd1 +# endif movb 1(%rax), %cl /* get byte from skipset */ testb $0xff, %cl /* is NUL char? */ jz L(1) /* yes => start compare loop */ movb %cl, (%rsp,%rcx) /* set corresponding byte in skipset table */ +# ifdef __CHKP__ + bndcu 2(%rax), %bnd1 +# endif movb 2(%rax), %cl /* get byte from skipset */ testb $0xff, %cl /* is NUL char? */ jz L(1) /* yes => start compare loop */ movb %cl, (%rsp,%rcx) /* set corresponding byte in skipset table */ +# ifdef __CHKP__ + bndcu 3(%rax), %bnd1 +# endif movb 3(%rax), %cl /* get byte from skipset */ addq $4, %rax /* increment skipset pointer */ movb %cl, (%rsp,%rcx) /* set corresponding byte in skipset table */ @@ -89,18 +108,30 @@ L(1): leaq -4(%rdx), %rax /* prepare loop */ .p2align 4 L(3): addq $4, %rax /* adjust pointer for full loop round */ +# ifdef __CHKP__ + bndcu (%rax), %bnd0 +# endif movb (%rax), %cl /* get byte from string */ cmpb %cl, (%rsp,%rcx) /* is it contained in skipset? */ je L(4) /* yes => return */ +# ifdef __CHKP__ + bndcu 1(%rax), %bnd0 +# endif movb 1(%rax), %cl /* get byte from string */ cmpb %cl, (%rsp,%rcx) /* is it contained in skipset? */ je L(5) /* yes => return */ +# ifdef __CHKP__ + bndcu 2(%rax), %bnd0 +# endif movb 2(%rax), %cl /* get byte from string */ cmpb %cl, (%rsp,%rcx) /* is it contained in skipset? */ jz L(6) /* yes => return */ +# ifdef __CHKP__ + bndcu 3(%rax), %bnd0 +# endif movb 3(%rax), %cl /* get byte from string */ cmpb %cl, (%rsp,%rcx) /* is it contained in skipset? */ jne L(3) /* no => start loop again */ diff --git a/sysdeps/x86_64/strlen.S b/sysdeps/x86_64/strlen.S index eeb1092218..065f0e6d56 100644 --- a/sysdeps/x86_64/strlen.S +++ b/sysdeps/x86_64/strlen.S @@ -63,6 +63,10 @@ L(n_nonzero): mov %rsi, %r11 #endif +#ifdef __CHKP__ + bndcl (%rdi), %bnd0 + bndcu (%rdi), %bnd0 +#endif pxor %xmm8, %xmm8 pxor %xmm9, %xmm9 pxor %xmm10, %xmm10 @@ -157,6 +161,9 @@ L(loop_init): L(loop): addq $64, %rax +# ifdef __CHKP__ + bndcu (%rax), %bnd0 +# endif cmpq %rax, %r10 je L(exit_end) @@ -182,6 +189,9 @@ L(first): bsfq %rdx, %rdx addq %rdx, %rax subq %rdi, %rax +# ifdef __CHKP__ + bndcu -1(%rdi, %rax), %bnd0 +# endif ret .p2align 4 @@ -192,6 +202,9 @@ L(exit): bsfq %rdx, %rdx addq %rdx, %rax subq %rdi, %rax +# ifdef __CHKP__ + bndcu -1(%rdi, %rax), %bnd0 +# endif ret #else @@ -199,6 +212,9 @@ L(exit): /* Main loop. Unrolled twice to improve L2 cache performance on core2. */ .p2align 4 L(loop): +# ifdef __CHKP__ + bndcu 64(%rax), %bnd0 +# endif movdqa 64(%rax), %xmm8 pminub 80(%rax), %xmm8 @@ -231,6 +247,9 @@ L(exit0): bsfq %rdx, %rdx addq %rdx, %rax subq %rdi, %rax +# ifdef __CHKP__ + bndcu -1(%rdi, %rax), %bnd0 +# endif ret #endif diff --git a/sysdeps/x86_64/strrchr.S b/sysdeps/x86_64/strrchr.S index e413b07438..0bd3405b6b 100644 --- a/sysdeps/x86_64/strrchr.S +++ b/sysdeps/x86_64/strrchr.S @@ -22,6 +22,10 @@ .text ENTRY (strrchr) +# ifdef __CHKP__ + bndcl (%rdi), %bnd0 + bndcu (%rdi), %bnd0 +# endif movd %esi, %xmm1 movq %rdi, %rcx punpcklbw %xmm1, %xmm1 @@ -46,7 +50,11 @@ ENTRY (strrchr) orl %ecx, %esi jnz 1f -2: movdqa (%rdi), %xmm0 +2: +# ifdef __CHKP__ + bndcu (%rdi), %bnd0 +# endif + movdqa (%rdi), %xmm0 leaq 16(%rdi), %rdi movdqa %xmm0, %xmm3 pcmpeqb %xmm1, %xmm0 @@ -73,6 +81,9 @@ ENTRY (strrchr) bsrl %edx, %edx jz 4f leaq -16(%rdi,%rdx), %rax +# ifdef __CHKP__ + bndcu (%rax), %bnd0 +# endif 4: ret END (strrchr) diff --git a/sysdeps/x86_64/strspn.S b/sysdeps/x86_64/strspn.S index 2911da2aa5..bd3be8ad6a 100644 --- a/sysdeps/x86_64/strspn.S +++ b/sysdeps/x86_64/strspn.S @@ -25,6 +25,12 @@ .text ENTRY (strspn) +#ifdef __CHKP__ + bndcl (%rdi), %bnd0 + bndcu (%rdi), %bnd0 + bndcl (%rsi), %bnd1 + bndcu (%rsi), %bnd1 +#endif movq %rdi, %rdx /* Save SRC. */ @@ -50,21 +56,34 @@ ENTRY (strspn) have a correct zero-extended 64-bit value in %rcx. */ .p2align 4 -L(2): movb (%rax), %cl /* get byte from stopset */ +L(2): +#ifdef __CHKP__ + bndcu (%rax), %bnd1 +#endif + movb (%rax), %cl /* get byte from stopset */ testb %cl, %cl /* is NUL char? */ jz L(1) /* yes => start compare loop */ movb %cl, (%rsp,%rcx) /* set corresponding byte in stopset table */ +#ifdef __CHKP__ + bndcu 1(%rax), %bnd1 +#endif movb 1(%rax), %cl /* get byte from stopset */ testb $0xff, %cl /* is NUL char? */ jz L(1) /* yes => start compare loop */ movb %cl, (%rsp,%rcx) /* set corresponding byte in stopset table */ +#ifdef __CHKP__ + bndcu 2(%rax), %bnd1 +#endif movb 2(%rax), %cl /* get byte from stopset */ testb $0xff, %cl /* is NUL char? */ jz L(1) /* yes => start compare loop */ movb %cl, (%rsp,%rcx) /* set corresponding byte in stopset table */ +#ifdef __CHKP__ + bndcu 3(%rax), %bnd1 +#endif movb 3(%rax), %cl /* get byte from stopset */ addq $4, %rax /* increment stopset pointer */ movb %cl, (%rsp,%rcx) /* set corresponding byte in stopset table */ @@ -85,18 +104,30 @@ L(1): leaq -4(%rdx), %rax /* prepare loop */ .p2align 4 L(3): addq $4, %rax /* adjust pointer for full loop round */ +#ifdef __CHKP__ + bndcu (%rax), %bnd0 +#endif movb (%rax), %cl /* get byte from string */ testb %cl, (%rsp,%rcx) /* is it contained in skipset? */ jz L(4) /* no => return */ +#ifdef __CHKP__ + bndcu 1(%rax), %bnd0 +#endif movb 1(%rax), %cl /* get byte from string */ testb %cl, (%rsp,%rcx) /* is it contained in skipset? */ jz L(5) /* no => return */ +#ifdef __CHKP__ + bndcu 2(%rax), %bnd0 +#endif movb 2(%rax), %cl /* get byte from string */ testb %cl, (%rsp,%rcx) /* is it contained in skipset? */ jz L(6) /* no => return */ +#ifdef __CHKP__ + bndcu 3(%rax), %bnd0 +#endif movb 3(%rax), %cl /* get byte from string */ testb %cl, (%rsp,%rcx) /* is it contained in skipset? */ jnz L(3) /* yes => start loop again */ diff --git a/sysdeps/x86_64/strtok.S b/sysdeps/x86_64/strtok.S index 5636d9a625..17e2521608 100644 --- a/sysdeps/x86_64/strtok.S +++ b/sysdeps/x86_64/strtok.S @@ -90,6 +90,9 @@ ENTRY (FUNCTION) the last run. */ cmpq $0, %rdx cmove %rax, %rdx +#ifdef __CHKP__ + bndldx (,%rax,1),%bnd0 +#endif testq %rdx, %rdx jz L(returnNULL) movq %rsi, %rax /* Get start of delimiter set. */ diff --git a/sysdeps/x86_64/wcschr.S b/sysdeps/x86_64/wcschr.S index 3f098dc51c..3ab1e47f82 100644 --- a/sysdeps/x86_64/wcschr.S +++ b/sysdeps/x86_64/wcschr.S @@ -22,6 +22,11 @@ .text ENTRY (wcschr) +#ifdef __CHKP__ + bndcl (%rdi), %bnd0 + bndcu (%rdi), %bnd0 +#endif + movd %rsi, %xmm1 pxor %xmm2, %xmm2 mov %rdi, %rcx @@ -43,6 +48,9 @@ ENTRY (wcschr) and $-16, %rdi +#ifdef __CHKP__ + bndcu (%rdi), %bnd0 +#endif movdqa (%rdi), %xmm0 pcmpeqd %xmm0, %xmm2 add $16, %rdi @@ -78,6 +86,9 @@ L(cross_cache): L(unaligned_match): add %rdi, %rax add %rcx, %rax +#ifdef __CHKP__ + bndcu (%rax), %bnd0 +#endif ret .p2align 4 @@ -91,6 +102,9 @@ L(unaligned_no_match): .p2align 4 /* Loop start on aligned string. */ L(loop): +#ifdef __CHKP__ + bndcu (%rdi), %bnd0 +#endif movdqa (%rdi), %xmm0 pcmpeqd %xmm0, %xmm2 add $16, %rdi @@ -100,6 +114,9 @@ L(loop): or %rax, %rdx jnz L(matches) +#ifdef __CHKP__ + bndcu (%rdi), %bnd0 +#endif movdqa (%rdi), %xmm0 pcmpeqd %xmm0, %xmm2 add $16, %rdi @@ -109,6 +126,9 @@ L(loop): or %rax, %rdx jnz L(matches) +#ifdef __CHKP__ + bndcu (%rdi), %bnd0 +#endif movdqa (%rdi), %xmm0 pcmpeqd %xmm0, %xmm2 add $16, %rdi @@ -118,6 +138,9 @@ L(loop): or %rax, %rdx jnz L(matches) +#ifdef __CHKP__ + bndcu (%rdi), %bnd0 +#endif movdqa (%rdi), %xmm0 pcmpeqd %xmm0, %xmm2 add $16, %rdi @@ -142,6 +165,9 @@ L(matches): L(match): sub $16, %rdi add %rdi, %rax +#ifdef __CHKP__ + bndcu (%rax), %bnd0 +#endif ret .p2align 4 diff --git a/sysdeps/x86_64/wcscmp.S b/sysdeps/x86_64/wcscmp.S index d6b516bce1..38e2849d5c 100644 --- a/sysdeps/x86_64/wcscmp.S +++ b/sysdeps/x86_64/wcscmp.S @@ -28,6 +28,14 @@ ENTRY (wcscmp) */ mov %esi, %eax mov %edi, %edx + +#ifdef __CHKP__ + bndcl (%rdi), %bnd0 + bndcu (%rdi), %bnd0 + bndcl (%rsi), %bnd1 + bndcu (%rsi), %bnd1 +#endif + pxor %xmm0, %xmm0 /* clear %xmm0 for null char checks */ mov %al, %ch mov %dl, %cl diff --git a/sysdeps/x86_64/wcslen.S b/sysdeps/x86_64/wcslen.S index 5927352437..a7d944ff75 100644 --- a/sysdeps/x86_64/wcslen.S +++ b/sysdeps/x86_64/wcslen.S @@ -21,20 +21,45 @@ .text ENTRY (__wcslen) +#ifdef __CHKP__ + bndcl (%rdi), %bnd0 + bndcu (%rdi), %bnd0 +#endif cmpl $0, (%rdi) jz L(exit_tail0) +#ifdef __CHKP__ + bndcu 4(%rdi), %bnd0 +#endif cmpl $0, 4(%rdi) jz L(exit_tail1) +#ifdef __CHKP__ + bndcu 8(%rdi), %bnd0 +#endif cmpl $0, 8(%rdi) jz L(exit_tail2) +#ifdef __CHKP__ + bndcu 12(%rdi), %bnd0 +#endif cmpl $0, 12(%rdi) jz L(exit_tail3) +#ifdef __CHKP__ + bndcu 16(%rdi), %bnd0 +#endif cmpl $0, 16(%rdi) jz L(exit_tail4) +#ifdef __CHKP__ + bndcu 20(%rdi), %bnd0 +#endif cmpl $0, 20(%rdi) jz L(exit_tail5) +#ifdef __CHKP__ + bndcu 24(%rdi), %bnd0 +#endif cmpl $0, 24(%rdi) jz L(exit_tail6) +#ifdef __CHKP__ + bndcu 28(%rdi), %bnd0 +#endif cmpl $0, 28(%rdi) jz L(exit_tail7) @@ -44,6 +69,9 @@ ENTRY (__wcslen) lea 16(%rdi), %rcx and $-16, %rax +#ifdef __CHKP__ + bndcu (%rax), %bnd0 +#endif pcmpeqd (%rax), %xmm0 pmovmskb %xmm0, %edx pxor %xmm1, %xmm1 @@ -51,6 +79,9 @@ ENTRY (__wcslen) lea 16(%rax), %rax jnz L(exit) +#ifdef __CHKP__ + bndcu (%rax), %bnd0 +#endif pcmpeqd (%rax), %xmm1 pmovmskb %xmm1, %edx pxor %xmm2, %xmm2 @@ -58,6 +89,9 @@ ENTRY (__wcslen) lea 16(%rax), %rax jnz L(exit) +#ifdef __CHKP__ + bndcu (%rax), %bnd0 +#endif pcmpeqd (%rax), %xmm2 pmovmskb %xmm2, %edx pxor %xmm3, %xmm3 @@ -65,54 +99,81 @@ ENTRY (__wcslen) lea 16(%rax), %rax jnz L(exit) +#ifdef __CHKP__ + bndcu (%rax), %bnd0 +#endif pcmpeqd (%rax), %xmm3 pmovmskb %xmm3, %edx test %edx, %edx lea 16(%rax), %rax jnz L(exit) +#ifdef __CHKP__ + bndcu (%rax), %bnd0 +#endif pcmpeqd (%rax), %xmm0 pmovmskb %xmm0, %edx test %edx, %edx lea 16(%rax), %rax jnz L(exit) +#ifdef __CHKP__ + bndcu (%rax), %bnd0 +#endif pcmpeqd (%rax), %xmm1 pmovmskb %xmm1, %edx test %edx, %edx lea 16(%rax), %rax jnz L(exit) +#ifdef __CHKP__ + bndcu (%rax), %bnd0 +#endif pcmpeqd (%rax), %xmm2 pmovmskb %xmm2, %edx test %edx, %edx lea 16(%rax), %rax jnz L(exit) +#ifdef __CHKP__ + bndcu (%rax), %bnd0 +#endif pcmpeqd (%rax), %xmm3 pmovmskb %xmm3, %edx test %edx, %edx lea 16(%rax), %rax jnz L(exit) +#ifdef __CHKP__ + bndcu (%rax), %bnd0 +#endif pcmpeqd (%rax), %xmm0 pmovmskb %xmm0, %edx test %edx, %edx lea 16(%rax), %rax jnz L(exit) +#ifdef __CHKP__ + bndcu (%rax), %bnd0 +#endif pcmpeqd (%rax), %xmm1 pmovmskb %xmm1, %edx test %edx, %edx lea 16(%rax), %rax jnz L(exit) +#ifdef __CHKP__ + bndcu (%rax), %bnd0 +#endif pcmpeqd (%rax), %xmm2 pmovmskb %xmm2, %edx test %edx, %edx lea 16(%rax), %rax jnz L(exit) +#ifdef __CHKP__ + bndcu (%rax), %bnd0 +#endif pcmpeqd (%rax), %xmm3 pmovmskb %xmm3, %edx test %edx, %edx @@ -123,6 +184,9 @@ ENTRY (__wcslen) .p2align 4 L(aligned_64_loop): +#ifdef __CHKP__ + bndcu (%rax), %bnd0 +#endif movaps (%rax), %xmm0 movaps 16(%rax), %xmm1 movaps 32(%rax), %xmm2 @@ -173,6 +237,9 @@ L(exit): mov %dl, %cl and $15, %cl jz L(exit_1) +#ifdef __CHKP__ + bndcu -1(%rdi, %rax, 4), %bnd0 +#endif ret .p2align 4 @@ -181,11 +248,17 @@ L(exit_high): and $15, %ch jz L(exit_3) add $2, %rax +#ifdef __CHKP__ + bndcu -1(%rdi, %rax, 4), %bnd0 +#endif ret .p2align 4 L(exit_1): add $1, %rax +#ifdef __CHKP__ + bndcu -1(%rdi, %rax, 4), %bnd0 +#endif ret .p2align 4 diff --git a/sysdeps/x86_64/wcsrchr.S b/sysdeps/x86_64/wcsrchr.S index ea1e2e55da..8edfc468b8 100644 --- a/sysdeps/x86_64/wcsrchr.S +++ b/sysdeps/x86_64/wcsrchr.S @@ -19,9 +19,22 @@ #include <sysdep.h> +#ifdef __CHKP__ +# define RETURN \ + bndcu (%rax), %bnd0; \ + ret +#else +# define RETURN ret +#endif + + .text ENTRY (wcsrchr) +#ifdef __CHKP__ + bndcl (%rdi), %bnd0 + bndcu (%rdi), %bnd0 +#endif movd %rsi, %xmm1 mov %rdi, %rcx punpckldq %xmm1, %xmm1 @@ -92,6 +105,9 @@ L(unaligned_match): /* Loop start on aligned string. */ .p2align 4 L(loop): +#ifdef __CHKP__ + bndcu (%rdi), %bnd0 +#endif movdqa (%rdi), %xmm0 pcmpeqd %xmm0, %xmm2 add $16, %rdi @@ -101,6 +117,9 @@ L(loop): or %rax, %rcx jnz L(matches) +#ifdef __CHKP__ + bndcu (%rdi), %bnd0 +#endif movdqa (%rdi), %xmm3 pcmpeqd %xmm3, %xmm2 add $16, %rdi @@ -110,6 +129,9 @@ L(loop): or %rax, %rcx jnz L(matches) +#ifdef __CHKP__ + bndcu (%rdi), %bnd0 +#endif movdqa (%rdi), %xmm4 pcmpeqd %xmm4, %xmm2 add $16, %rdi @@ -119,6 +141,9 @@ L(loop): or %rax, %rcx jnz L(matches) +#ifdef __CHKP__ + bndcu (%rdi), %bnd0 +#endif movdqa (%rdi), %xmm5 pcmpeqd %xmm5, %xmm2 add $16, %rdi @@ -145,7 +170,7 @@ L(return_value): test $15 << 4, %al jnz L(match_second_wchar) lea -16(%rdi), %rax - ret + RETURN .p2align 4 L(match): @@ -175,14 +200,14 @@ L(find_zero): test $15 << 4, %al jnz L(match_second_wchar) lea -16(%rdi), %rax - ret + RETURN .p2align 4 L(find_zero_in_first_wchar): test $1, %rax jz L(return_value) lea -16(%rdi), %rax - ret + RETURN .p2align 4 L(find_zero_in_second_wchar): @@ -192,7 +217,7 @@ L(find_zero_in_second_wchar): test $15 << 4, %al jnz L(match_second_wchar) lea -16(%rdi), %rax - ret + RETURN .p2align 4 L(find_zero_in_third_wchar): @@ -204,12 +229,12 @@ L(find_zero_in_third_wchar): test $15 << 4, %al jnz L(match_second_wchar) lea -16(%rdi), %rax - ret + RETURN .p2align 4 L(prolog_find_zero): add %rcx, %rdi - mov %rdx, %rcx + mov %rdx, %rcx L(prolog_find_zero_1): test $15, %cl jnz L(prolog_find_zero_in_first_wchar) @@ -228,14 +253,14 @@ L(prolog_find_zero_1): test $15 << 4, %al jnz L(match_second_wchar) lea -16(%rdi), %rax - ret + RETURN .p2align 4 L(prolog_find_zero_in_first_wchar): test $1, %rax jz L(return_null) lea -16(%rdi), %rax - ret + RETURN .p2align 4 L(prolog_find_zero_in_second_wchar): @@ -245,7 +270,7 @@ L(prolog_find_zero_in_second_wchar): test $15 << 4, %al jnz L(match_second_wchar) lea -16(%rdi), %rax - ret + RETURN .p2align 4 L(prolog_find_zero_in_third_wchar): @@ -257,22 +282,22 @@ L(prolog_find_zero_in_third_wchar): test $15 << 4, %al jnz L(match_second_wchar) lea -16(%rdi), %rax - ret + RETURN .p2align 4 L(match_second_wchar): lea -12(%rdi), %rax - ret + RETURN .p2align 4 L(match_third_wchar): lea -8(%rdi), %rax - ret + RETURN .p2align 4 L(match_fourth_wchar): lea -4(%rdi), %rax - ret + RETURN .p2align 4 L(return_null): |