summaryrefslogtreecommitdiff
path: root/libc/sysdeps
diff options
context:
space:
mode:
authorjoseph <joseph@7b3dc134-2b1b-0410-93df-9e9f96275f8d>2010-12-06 20:43:02 +0000
committerjoseph <joseph@7b3dc134-2b1b-0410-93df-9e9f96275f8d>2010-12-06 20:43:02 +0000
commite7de0d4fc0526318aa0be1fcb6ec60cf27ce18c7 (patch)
tree8bccaab3a0d93c25f8c901c391970e12dea9e48c /libc/sysdeps
parent486caa1f257e98014ac78f7f7715876f705c17dd (diff)
downloadeglibc2-e7de0d4fc0526318aa0be1fcb6ec60cf27ce18c7.tar.gz
Merge changes between r11980 and r12228 from /fsf/trunk.
git-svn-id: svn://svn.eglibc.org/trunk@12229 7b3dc134-2b1b-0410-93df-9e9f96275f8d
Diffstat (limited to 'libc/sysdeps')
-rw-r--r--libc/sysdeps/i386/bits/byteswap.h15
-rw-r--r--libc/sysdeps/i386/fpu/libm-test-ulps4
-rw-r--r--libc/sysdeps/i386/i686/multiarch/strcmp-ssse3.S9
-rw-r--r--libc/sysdeps/ieee754/ldbl-128/s_fmal.c2
-rw-r--r--libc/sysdeps/ieee754/ldbl-128ibm/e_sqrtl.c4
-rw-r--r--libc/sysdeps/unix/sysv/linux/i386/bits/mman.h4
-rw-r--r--libc/sysdeps/unix/sysv/linux/sys/swap.h3
-rw-r--r--libc/sysdeps/unix/sysv/linux/x86_64/bits/mman.h3
-rw-r--r--libc/sysdeps/x86_64/bits/byteswap.h29
-rw-r--r--libc/sysdeps/x86_64/memset.S311
-rw-r--r--libc/sysdeps/x86_64/multiarch/Makefile3
-rw-r--r--libc/sysdeps/x86_64/multiarch/bzero.S56
-rw-r--r--libc/sysdeps/x86_64/multiarch/cacheinfo.c2
-rw-r--r--libc/sysdeps/x86_64/multiarch/init-arch.c6
-rw-r--r--libc/sysdeps/x86_64/multiarch/init-arch.h12
-rw-r--r--libc/sysdeps/x86_64/multiarch/memset-x86-64.S18
-rw-r--r--libc/sysdeps/x86_64/multiarch/memset.S74
-rw-r--r--libc/sysdeps/x86_64/multiarch/memset_chk.S44
-rw-r--r--libc/sysdeps/x86_64/multiarch/strcmp.S6
19 files changed, 412 insertions, 193 deletions
diff --git a/libc/sysdeps/i386/bits/byteswap.h b/libc/sysdeps/i386/bits/byteswap.h
index 1f3fc5e52..c246ae86c 100644
--- a/libc/sysdeps/i386/bits/byteswap.h
+++ b/libc/sysdeps/i386/bits/byteswap.h
@@ -1,5 +1,5 @@
/* Macros to swap the order of bytes in integer values.
- Copyright (C) 1997, 1998, 2000, 2002, 2003, 2006, 2007, 2008
+ Copyright (C) 1997, 1998, 2000, 2002, 2003, 2006, 2007, 2008, 2010
Free Software Foundation, Inc.
This file is part of the GNU C Library.
@@ -27,26 +27,27 @@
/* Swap bytes in 16 bit value. */
#define __bswap_constant_16(x) \
- ((((x) >> 8) & 0xff) | (((x) & 0xff) << 8))
+ ((unsigned short int) ((((x) >> 8) & 0xff) | (((x) & 0xff) << 8)))
#ifdef __GNUC__
# if __GNUC__ >= 2
# define __bswap_16(x) \
(__extension__ \
- ({ register unsigned short int __v, __x = (x); \
+ ({ register unsigned short int __v, __x = (unsigned short int) (x); \
if (__builtin_constant_p (__x)) \
__v = __bswap_constant_16 (__x); \
else \
__asm__ ("rorw $8, %w0" \
: "=r" (__v) \
- : "0" (__x) \
- : "cc"); \
+ : "0" (__x) \
+ : "cc"); \
__v; }))
# else
/* This is better than nothing. */
# define __bswap_16(x) \
(__extension__ \
- ({ register unsigned short int __x = (x); __bswap_constant_16 (__x); }))
+ ({ register unsigned short int __x = (unsigned short int) (x); \
+ __bswap_constant_16 (__x); }))
# endif
#else
static __inline unsigned short int
@@ -122,7 +123,7 @@ __bswap_32 (unsigned int __bsx)
(__extension__ \
({ union { __extension__ unsigned long long int __ll; \
unsigned long int __l[2]; } __w, __r; \
- if (__builtin_constant_p (x)) \
+ if (__builtin_constant_p (x)) \
__r.__ll = __bswap_constant_64 (x); \
else \
{ \
diff --git a/libc/sysdeps/i386/fpu/libm-test-ulps b/libc/sysdeps/i386/fpu/libm-test-ulps
index 5231789be..4b1a9e734 100644
--- a/libc/sysdeps/i386/fpu/libm-test-ulps
+++ b/libc/sysdeps/i386/fpu/libm-test-ulps
@@ -857,8 +857,8 @@ ifloat: 1
ildouble: 2
ldouble: 2
Test "yn (10, 0.75) == -2133501638.90573424452445412893839236":
-float: 1
-ifloat: 1
+float: 2
+ifloat: 2
ildouble: 4
ldouble: 4
Test "yn (10, 1.0) == -121618014.278689189288130426667971145":
diff --git a/libc/sysdeps/i386/i686/multiarch/strcmp-ssse3.S b/libc/sysdeps/i386/i686/multiarch/strcmp-ssse3.S
index a4de2259d..607b69b6f 100644
--- a/libc/sysdeps/i386/i686/multiarch/strcmp-ssse3.S
+++ b/libc/sysdeps/i386/i686/multiarch/strcmp-ssse3.S
@@ -70,9 +70,7 @@ ENTRY (STRCMP)
movl CNT(%esp), %ebp
cmp $16, %ebp
jb L(less16bytes_sncmp)
- jmp L(more16bytes)
-#endif
-
+#else
movzbl (%eax), %ecx
cmpb %cl, (%edx)
jne L(neq)
@@ -123,11 +121,6 @@ ENTRY (STRCMP)
add $8, %edx
add $8, %eax
-#ifdef USE_AS_STRNCMP
- cmp $8, %ebp
- lea -8(%ebp), %ebp
- je L(eq)
-L(more16bytes):
#endif
movl %edx, %ecx
and $0xfff, %ecx
diff --git a/libc/sysdeps/ieee754/ldbl-128/s_fmal.c b/libc/sysdeps/ieee754/ldbl-128/s_fmal.c
index b750538b8..2dec70ee1 100644
--- a/libc/sysdeps/ieee754/ldbl-128/s_fmal.c
+++ b/libc/sysdeps/ieee754/ldbl-128/s_fmal.c
@@ -176,7 +176,7 @@ __fmal (long double x, long double y, long double z)
u.ieee.mantissa3 |= fetestexcept (FE_INEXACT) != 0;
v.d = a1 + u.d;
/* Ensure the addition is not scheduled after fetestexcept call. */
- asm volatile ("" : "m" (v));
+ asm volatile ("" : : "m" (v));
int j = fetestexcept (FE_INEXACT) != 0;
feupdateenv (&env);
/* Ensure the following computations are performed in default rounding
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/e_sqrtl.c b/libc/sysdeps/ieee754/ldbl-128ibm/e_sqrtl.c
index 1f533cae4..fe6bb55b0 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/e_sqrtl.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/e_sqrtl.c
@@ -73,9 +73,9 @@ long double __ieee754_sqrtl(long double x)
m = ((a.i[2] >> 20) & 0x7ff) - 54;
}
m += n;
- if (m > 0)
+ if ((int) m > 0)
a.i[2] = (a.i[2] & 0x800fffff) | (m << 20);
- else if (m <= -54) {
+ else if ((int) m <= -54) {
a.i[2] &= 0x80000000;
a.i[3] = 0;
} else {
diff --git a/libc/sysdeps/unix/sysv/linux/i386/bits/mman.h b/libc/sysdeps/unix/sysv/linux/i386/bits/mman.h
index 22aa401e0..7ac1aff08 100644
--- a/libc/sysdeps/unix/sysv/linux/i386/bits/mman.h
+++ b/libc/sysdeps/unix/sysv/linux/i386/bits/mman.h
@@ -1,5 +1,6 @@
/* Definitions for POSIX memory map interface. Linux/i386 version.
- Copyright (C) 1997,2000,2003,2005,2006,2009 Free Software Foundation, Inc.
+ Copyright (C) 1997,2000,2003,2005,2006,2009,2010
+ Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -64,6 +65,7 @@
# define MAP_POPULATE 0x08000 /* Populate (prefault) pagetables. */
# define MAP_NONBLOCK 0x10000 /* Do not block on IO. */
# define MAP_STACK 0x20000 /* Allocation is for a stack. */
+# define MAP_HUGETLB 0x40000 /* Create huge page mapping. */
#endif
/* Flags to `msync'. */
diff --git a/libc/sysdeps/unix/sysv/linux/sys/swap.h b/libc/sysdeps/unix/sysv/linux/sys/swap.h
index b6e7bef5d..961006cbb 100644
--- a/libc/sysdeps/unix/sysv/linux/sys/swap.h
+++ b/libc/sysdeps/unix/sysv/linux/sys/swap.h
@@ -1,5 +1,5 @@
/* Calls to enable and disable swapping on specified locations. Linux version.
- Copyright (C) 1996, 1998, 1999 Free Software Foundation, Inc.
+ Copyright (C) 1996, 1998, 1999, 2010 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -28,6 +28,7 @@
#define SWAP_FLAG_PREFER 0x8000 /* Set if swap priority is specified. */
#define SWAP_FLAG_PRIO_MASK 0x7fff
#define SWAP_FLAG_PRIO_SHIFT 0
+#define SWAP_FLAG_DISCARD 0x10000 /* Discard swap cluster after use. */
__BEGIN_DECLS
diff --git a/libc/sysdeps/unix/sysv/linux/x86_64/bits/mman.h b/libc/sysdeps/unix/sysv/linux/x86_64/bits/mman.h
index 3dcbf9c31..6f3ad0de4 100644
--- a/libc/sysdeps/unix/sysv/linux/x86_64/bits/mman.h
+++ b/libc/sysdeps/unix/sysv/linux/x86_64/bits/mman.h
@@ -1,5 +1,5 @@
/* Definitions for POSIX memory map interface. Linux/x86_64 version.
- Copyright (C) 2001, 2003, 2005, 2006, 2009 Free Software Foundation, Inc.
+ Copyright (C) 2001,2003,2005,2006,2009,2010 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -65,6 +65,7 @@
# define MAP_POPULATE 0x08000 /* Populate (prefault) pagetables. */
# define MAP_NONBLOCK 0x10000 /* Do not block on IO. */
# define MAP_STACK 0x20000 /* Allocation is for a stack. */
+# define MAP_HUGETLB 0x40000 /* Create huge page mapping. */
#endif
/* Flags to `msync'. */
diff --git a/libc/sysdeps/x86_64/bits/byteswap.h b/libc/sysdeps/x86_64/bits/byteswap.h
index 08b38e852..e350fb806 100644
--- a/libc/sysdeps/x86_64/bits/byteswap.h
+++ b/libc/sysdeps/x86_64/bits/byteswap.h
@@ -1,5 +1,5 @@
/* Macros to swap the order of bytes in integer values.
- Copyright (C) 1997, 1998, 2000, 2002, 2003, 2007, 2008
+ Copyright (C) 1997, 1998, 2000, 2002, 2003, 2007, 2008, 2010
Free Software Foundation, Inc.
This file is part of the GNU C Library.
@@ -29,12 +29,12 @@
/* Swap bytes in 16 bit value. */
#define __bswap_constant_16(x) \
- ((((x) >> 8) & 0xff) | (((x) & 0xff) << 8))
+ ((unsigned short int) ((((x) >> 8) & 0xff) | (((x) & 0xff) << 8)))
#if defined __GNUC__ && __GNUC__ >= 2
# define __bswap_16(x) \
(__extension__ \
- ({ register unsigned short int __v, __x = (x); \
+ ({ register unsigned short int __v, __x = (unsigned short int) (x); \
if (__builtin_constant_p (__x)) \
__v = __bswap_constant_16 (__x); \
else \
@@ -47,7 +47,8 @@
/* This is better than nothing. */
# define __bswap_16(x) \
(__extension__ \
- ({ register unsigned short int __x = (x); __bswap_constant_16 (__x); }))
+ ({ register unsigned short int __x = (unsigned short int) (x); \
+ __bswap_constant_16 (__x); }))
#endif
@@ -120,16 +121,16 @@
# define __bswap_64(x) \
(__extension__ \
({ union { __extension__ unsigned long long int __ll; \
- unsigned int __l[2]; } __w, __r; \
- if (__builtin_constant_p (x)) \
- __r.__ll = __bswap_constant_64 (x); \
- else \
- { \
- __w.__ll = (x); \
- __r.__l[0] = __bswap_32 (__w.__l[1]); \
- __r.__l[1] = __bswap_32 (__w.__l[0]); \
- } \
- __r.__ll; }))
+ unsigned int __l[2]; } __w, __r; \
+ if (__builtin_constant_p (x)) \
+ __r.__ll = __bswap_constant_64 (x); \
+ else \
+ { \
+ __w.__ll = (x); \
+ __r.__l[0] = __bswap_32 (__w.__l[1]); \
+ __r.__l[1] = __bswap_32 (__w.__l[0]); \
+ } \
+ __r.__ll; }))
# endif
#endif
diff --git a/libc/sysdeps/x86_64/memset.S b/libc/sysdeps/x86_64/memset.S
index 681ab870e..f6eb71fc7 100644
--- a/libc/sysdeps/x86_64/memset.S
+++ b/libc/sysdeps/x86_64/memset.S
@@ -24,7 +24,7 @@
#define __STOS_UPPER_BOUNDARY $65536
.text
-#ifndef NOT_IN_libc
+#if !defined NOT_IN_libc && !defined USE_MULTIARCH
ENTRY(__bzero)
mov %rsi,%rdx /* Adjust parameter. */
xorl %esi,%esi /* Fill with 0s. */
@@ -34,10 +34,10 @@ weak_alias (__bzero, bzero)
#endif
#if defined PIC && !defined NOT_IN_libc
-ENTRY (__memset_chk)
+ENTRY_CHK (__memset_chk)
cmpq %rdx, %rcx
jb HIDDEN_JUMPTARGET (__chk_fail)
-END (__memset_chk)
+END_CHK (__memset_chk)
#endif
ENTRY (memset)
L(memset_entry):
@@ -591,157 +591,13 @@ L(A6Q1): mov %dx,-0xe(%rdi)
L(A7Q0): mov %dl,-0x7(%rdi)
L(A6Q0): mov %dx,-0x6(%rdi)
mov %edx,-0x4(%rdi)
- jmp L(aligned_now)
-
- .balign 16
-L(aligned_now):
-
- cmpl $0x1,__x86_64_preferred_memory_instruction(%rip)
- jg L(SSE_pre)
-
-L(8byte_move_try):
- cmpq __STOS_LOWER_BOUNDARY,%r8
- jae L(8byte_stos_try)
-
- .balign 16
-L(8byte_move):
- movq %r8,%rcx
- shrq $7,%rcx
- jz L(8byte_move_skip)
-
- .p2align 4
-
-L(8byte_move_loop):
- decq %rcx
-
- movq %rdx, (%rdi)
- movq %rdx, 8 (%rdi)
- movq %rdx, 16 (%rdi)
- movq %rdx, 24 (%rdi)
- movq %rdx, 32 (%rdi)
- movq %rdx, 40 (%rdi)
- movq %rdx, 48 (%rdi)
- movq %rdx, 56 (%rdi)
- movq %rdx, 64 (%rdi)
- movq %rdx, 72 (%rdi)
- movq %rdx, 80 (%rdi)
- movq %rdx, 88 (%rdi)
- movq %rdx, 96 (%rdi)
- movq %rdx, 104 (%rdi)
- movq %rdx, 112 (%rdi)
- movq %rdx, 120 (%rdi)
-
- leaq 128 (%rdi),%rdi
-
- jnz L(8byte_move_loop)
-
-L(8byte_move_skip):
- andl $127,%r8d
- lea (%rdi,%r8,1),%rdi
-
-#ifndef PIC
- lea L(setPxQx)(%rip),%r11
- jmpq *(%r11,%r8,8) # old scheme remained for nonPIC
-#else
- lea L(Got0)(%rip),%r11
- lea L(setPxQx)(%rip),%rcx
- movswq (%rcx,%r8,2),%rcx
- lea (%rcx,%r11,1),%r11
- jmpq *%r11
-#endif
-
- .balign 16
-L(8byte_stos_try):
- mov __x86_64_shared_cache_size(%rip),%r9d // ck largest cache size
- cmpq %r8,%r9 // calculate the lesser of remaining
- cmovaq %r8,%r9 // bytes and largest cache size
- jbe L(8byte_stos)
-
-L(8byte_move_reuse_try):
- cmp __STOS_UPPER_BOUNDARY,%r8
- jae L(8byte_move)
-
- .balign 16
-L(8byte_stos):
- movq %r9,%rcx
- andq $-8,%r9
-
- shrq $3,%rcx
- jz L(8byte_stos_skip)
-
- xchgq %rax,%rdx
-
- rep
- stosq
-
- xchgq %rax,%rdx
-
-L(8byte_stos_skip):
- subq %r9,%r8
- ja L(8byte_nt_move)
-
- andl $7,%r8d
- lea (%rdi,%r8,1),%rdi
-#ifndef PIC
- lea L(setPxQx)(%rip),%r11
- jmpq *(%r11,%r8,8) # old scheme remained for nonPIC
-#else
- lea L(Got0)(%rip),%r11
- lea L(setPxQx)(%rip),%rcx
- movswq (%rcx,%r8,2),%rcx
- lea (%rcx,%r11,1),%r11
- jmpq *%r11
-#endif
- .balign 16
-L(8byte_nt_move):
- movq %r8,%rcx
- shrq $7,%rcx
- jz L(8byte_nt_move_skip)
-
- .balign 16
-L(8byte_nt_move_loop):
- decq %rcx
-
- movntiq %rdx, (%rdi)
- movntiq %rdx, 8 (%rdi)
- movntiq %rdx, 16 (%rdi)
- movntiq %rdx, 24 (%rdi)
- movntiq %rdx, 32 (%rdi)
- movntiq %rdx, 40 (%rdi)
- movntiq %rdx, 48 (%rdi)
- movntiq %rdx, 56 (%rdi)
- movntiq %rdx, 64 (%rdi)
- movntiq %rdx, 72 (%rdi)
- movntiq %rdx, 80 (%rdi)
- movntiq %rdx, 88 (%rdi)
- movntiq %rdx, 96 (%rdi)
- movntiq %rdx, 104 (%rdi)
- movntiq %rdx, 112 (%rdi)
- movntiq %rdx, 120 (%rdi)
-
- leaq 128 (%rdi),%rdi
-
- jnz L(8byte_nt_move_loop)
-
- sfence
-
-L(8byte_nt_move_skip):
- andl $127,%r8d
-
- lea (%rdi,%r8,1),%rdi
-#ifndef PIC
- lea L(setPxQx)(%rip),%r11
- jmpq *(%r11,%r8,8) # old scheme remained for nonPIC
-#else
- lea L(Got0)(%rip),%r11
- lea L(setPxQx)(%rip),%rcx
- movswq (%rcx,%r8,2),%rcx
- lea (%rcx,%r11,1),%r11
- jmpq *%r11
-#endif
+#ifndef USE_MULTIARCH
+ jmp L(aligned_now)
L(SSE_pre):
+#endif
+#if !defined USE_MULTIARCH || defined USE_SSE2
# fill RegXMM0 with the pattern
movd %rdx,%xmm0
punpcklqdq %xmm0,%xmm0
@@ -1342,11 +1198,162 @@ L(SSExDx):
.short L(SSE15QB)-L(SSE0Q0)
#endif
.popsection
+#endif /* !defined USE_MULTIARCH || defined USE_SSE2 */
+
+ .balign 16
+L(aligned_now):
+
+#ifndef USE_MULTIARCH
+ cmpl $0x1,__x86_64_preferred_memory_instruction(%rip)
+ jg L(SSE_pre)
+#endif /* USE_MULTIARCH */
+
+L(8byte_move_try):
+ cmpq __STOS_LOWER_BOUNDARY,%r8
+ jae L(8byte_stos_try)
+
+ .balign 16
+L(8byte_move):
+ movq %r8,%rcx
+ shrq $7,%rcx
+ jz L(8byte_move_skip)
+
+ .p2align 4
+
+L(8byte_move_loop):
+ decq %rcx
+
+ movq %rdx, (%rdi)
+ movq %rdx, 8 (%rdi)
+ movq %rdx, 16 (%rdi)
+ movq %rdx, 24 (%rdi)
+ movq %rdx, 32 (%rdi)
+ movq %rdx, 40 (%rdi)
+ movq %rdx, 48 (%rdi)
+ movq %rdx, 56 (%rdi)
+ movq %rdx, 64 (%rdi)
+ movq %rdx, 72 (%rdi)
+ movq %rdx, 80 (%rdi)
+ movq %rdx, 88 (%rdi)
+ movq %rdx, 96 (%rdi)
+ movq %rdx, 104 (%rdi)
+ movq %rdx, 112 (%rdi)
+ movq %rdx, 120 (%rdi)
+
+ leaq 128 (%rdi),%rdi
+
+ jnz L(8byte_move_loop)
+
+L(8byte_move_skip):
+ andl $127,%r8d
+ lea (%rdi,%r8,1),%rdi
+
+#ifndef PIC
+ lea L(setPxQx)(%rip),%r11
+ jmpq *(%r11,%r8,8) # old scheme remained for nonPIC
+#else
+ lea L(Got0)(%rip),%r11
+ lea L(setPxQx)(%rip),%rcx
+ movswq (%rcx,%r8,2),%rcx
+ lea (%rcx,%r11,1),%r11
+ jmpq *%r11
+#endif
+
+ .balign 16
+L(8byte_stos_try):
+ mov __x86_64_shared_cache_size(%rip),%r9d // ck largest cache size
+ cmpq %r8,%r9 // calculate the lesser of remaining
+ cmovaq %r8,%r9 // bytes and largest cache size
+ jbe L(8byte_stos)
+
+L(8byte_move_reuse_try):
+ cmp __STOS_UPPER_BOUNDARY,%r8
+ jae L(8byte_move)
+
+ .balign 16
+L(8byte_stos):
+ movq %r9,%rcx
+ andq $-8,%r9
+
+ shrq $3,%rcx
+ jz L(8byte_stos_skip)
+
+ xchgq %rax,%rdx
+
+ rep
+ stosq
+
+ xchgq %rax,%rdx
+
+L(8byte_stos_skip):
+ subq %r9,%r8
+ ja L(8byte_nt_move)
+
+ andl $7,%r8d
+ lea (%rdi,%r8,1),%rdi
+#ifndef PIC
+ lea L(setPxQx)(%rip),%r11
+ jmpq *(%r11,%r8,8) # old scheme remained for nonPIC
+#else
+ lea L(Got0)(%rip),%r11
+ lea L(setPxQx)(%rip),%rcx
+ movswq (%rcx,%r8,2),%rcx
+ lea (%rcx,%r11,1),%r11
+ jmpq *%r11
+#endif
+
+ .balign 16
+L(8byte_nt_move):
+ movq %r8,%rcx
+ shrq $7,%rcx
+ jz L(8byte_nt_move_skip)
+
+ .balign 16
+L(8byte_nt_move_loop):
+ decq %rcx
+
+ movntiq %rdx, (%rdi)
+ movntiq %rdx, 8 (%rdi)
+ movntiq %rdx, 16 (%rdi)
+ movntiq %rdx, 24 (%rdi)
+ movntiq %rdx, 32 (%rdi)
+ movntiq %rdx, 40 (%rdi)
+ movntiq %rdx, 48 (%rdi)
+ movntiq %rdx, 56 (%rdi)
+ movntiq %rdx, 64 (%rdi)
+ movntiq %rdx, 72 (%rdi)
+ movntiq %rdx, 80 (%rdi)
+ movntiq %rdx, 88 (%rdi)
+ movntiq %rdx, 96 (%rdi)
+ movntiq %rdx, 104 (%rdi)
+ movntiq %rdx, 112 (%rdi)
+ movntiq %rdx, 120 (%rdi)
+
+ leaq 128 (%rdi),%rdi
+
+ jnz L(8byte_nt_move_loop)
+
+ sfence
+
+L(8byte_nt_move_skip):
+ andl $127,%r8d
+
+ lea (%rdi,%r8,1),%rdi
+#ifndef PIC
+ lea L(setPxQx)(%rip),%r11
+ jmpq *(%r11,%r8,8) # old scheme remained for nonPIC
+#else
+ lea L(Got0)(%rip),%r11
+ lea L(setPxQx)(%rip),%rcx
+ movswq (%rcx,%r8,2),%rcx
+ lea (%rcx,%r11,1),%r11
+ jmpq *%r11
+#endif
END (memset)
libc_hidden_builtin_def (memset)
-#if defined PIC && !defined NOT_IN_libc
+#if defined PIC && !defined NOT_IN_libc && !defined USE_MULTIARCH
strong_alias (__memset_chk, __memset_zero_constant_len_parameter)
.section .gnu.warning.__memset_zero_constant_len_parameter
.string "memset used with constant zero length parameter; this could be due to transposed parameters"
diff --git a/libc/sysdeps/x86_64/multiarch/Makefile b/libc/sysdeps/x86_64/multiarch/Makefile
index 5d2e34ebc..19aa4be4c 100644
--- a/libc/sysdeps/x86_64/multiarch/Makefile
+++ b/libc/sysdeps/x86_64/multiarch/Makefile
@@ -8,7 +8,8 @@ sysdep_routines += stpncpy-c strncpy-c strcmp-ssse3 strncmp-ssse3 \
strend-sse4 memcmp-sse4 memcpy-ssse3 mempcpy-ssse3 \
memmove-ssse3 memcpy-ssse3-back mempcpy-ssse3-back \
memmove-ssse3-back strcasestr-nonascii strcasecmp_l-ssse3 \
- strncase_l-ssse3 strlen-sse4 strlen-no-bsf
+ strncase_l-ssse3 strlen-sse4 strlen-no-bsf \
+ memset-x86-64
ifeq (yes,$(config-cflags-sse4))
sysdep_routines += strcspn-c strpbrk-c strspn-c strstr-c strcasestr-c varshift
CFLAGS-varshift.c += -msse4
diff --git a/libc/sysdeps/x86_64/multiarch/bzero.S b/libc/sysdeps/x86_64/multiarch/bzero.S
new file mode 100644
index 000000000..9c9eebd5e
--- /dev/null
+++ b/libc/sysdeps/x86_64/multiarch/bzero.S
@@ -0,0 +1,56 @@
+/* Multiple versions of bzero
+ Copyright (C) 2010 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <sysdep.h>
+#include <init-arch.h>
+
+ .text
+ENTRY(__bzero)
+ .type __bzero, @gnu_indirect_function
+ cmpl $0, __cpu_features+KIND_OFFSET(%rip)
+ jne 1f
+ call __init_cpu_features
+1: leaq __bzero_x86_64(%rip), %rax
+ testl $bit_Prefer_SSE_for_memop, __cpu_features+FEATURE_OFFSET+index_Prefer_SSE_for_memop(%rip)
+ jz 2f
+ leaq __bzero_sse2(%rip), %rax
+2: ret
+END(__bzero)
+
+ .type __bzero_sse2, @function
+__bzero_sse2:
+ cfi_startproc
+ CALL_MCOUNT
+ mov %rsi,%rdx /* Adjust parameter. */
+ xorl %esi,%esi /* Fill with 0s. */
+ jmp __memset_sse2
+ cfi_endproc
+ .size __bzero_sse2, .-__bzero_sse2
+
+ .type __bzero_x86_64, @function
+__bzero_x86_64:
+ cfi_startproc
+ CALL_MCOUNT
+ mov %rsi,%rdx /* Adjust parameter. */
+ xorl %esi,%esi /* Fill with 0s. */
+ jmp __memset_x86_64
+ cfi_endproc
+ .size __bzero_x86_64, .-__bzero_x86_64
+
+weak_alias (__bzero, bzero)
diff --git a/libc/sysdeps/x86_64/multiarch/cacheinfo.c b/libc/sysdeps/x86_64/multiarch/cacheinfo.c
new file mode 100644
index 000000000..f87b8dce6
--- /dev/null
+++ b/libc/sysdeps/x86_64/multiarch/cacheinfo.c
@@ -0,0 +1,2 @@
+#define DISABLE_PREFERRED_MEMORY_INSTRUCTION
+#include "../cacheinfo.c"
diff --git a/libc/sysdeps/x86_64/multiarch/init-arch.c b/libc/sysdeps/x86_64/multiarch/init-arch.c
index 786466d5f..f0d2bb7d1 100644
--- a/libc/sysdeps/x86_64/multiarch/init-arch.c
+++ b/libc/sysdeps/x86_64/multiarch/init-arch.c
@@ -59,6 +59,11 @@ __init_cpu_features (void)
get_common_indeces (&family, &model);
+ /* Intel processors prefer SSE instruction for memory/string
+ routines if they are avaiable. */
+ __cpu_features.feature[index_Prefer_SSE_for_memop]
+ |= bit_Prefer_SSE_for_memop;
+
unsigned int eax = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax;
unsigned int extended_family = (eax >> 20) & 0xff;
unsigned int extended_model = (eax >> 12) & 0xf0;
@@ -82,6 +87,7 @@ __init_cpu_features (void)
case 0x1e:
case 0x1f:
case 0x25:
+ case 0x2c:
case 0x2e:
case 0x2f:
/* Rep string instructions and copy backward are fast on
diff --git a/libc/sysdeps/x86_64/multiarch/init-arch.h b/libc/sysdeps/x86_64/multiarch/init-arch.h
index 783b02015..6e409b8f1 100644
--- a/libc/sysdeps/x86_64/multiarch/init-arch.h
+++ b/libc/sysdeps/x86_64/multiarch/init-arch.h
@@ -16,9 +16,10 @@
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA. */
-#define bit_Fast_Rep_String (1 << 0)
-#define bit_Fast_Copy_Backward (1 << 1)
-#define bit_Slow_BSF (1 << 2)
+#define bit_Fast_Rep_String (1 << 0)
+#define bit_Fast_Copy_Backward (1 << 1)
+#define bit_Slow_BSF (1 << 2)
+#define bit_Prefer_SSE_for_memop (1 << 3)
#ifdef __ASSEMBLER__
@@ -37,6 +38,7 @@
# define index_Fast_Rep_String FEATURE_INDEX_1*FEATURE_SIZE
# define index_Fast_Copy_Backward FEATURE_INDEX_1*FEATURE_SIZE
# define index_Slow_BSF FEATURE_INDEX_1*FEATURE_SIZE
+# define index_Prefer_SSE_for_memop FEATURE_INDEX_1*FEATURE_SIZE
#else /* __ASSEMBLER__ */
@@ -109,6 +111,7 @@ extern const struct cpu_features *__get_cpu_features (void)
# define index_Fast_Rep_String FEATURE_INDEX_1
# define index_Fast_Copy_Backward FEATURE_INDEX_1
# define index_Slow_BSF FEATURE_INDEX_1
+# define index_Prefer_SSE_for_memop FEATURE_INDEX_1
#define HAS_ARCH_FEATURE(idx, bit) \
((__get_cpu_features ()->feature[idx] & (bit)) != 0)
@@ -122,4 +125,7 @@ extern const struct cpu_features *__get_cpu_features (void)
#define HAS_SLOW_BSF \
HAS_ARCH_FEATURE (index_Slow_BSF, bit_Slow_BSF)
+#define HAS_PREFER_SSE_FOR_MEMOP \
+ HAS_ARCH_FEATURE (index_Prefer_SSE_for_memop, bit_Prefer_SSE_for_memop)
+
#endif /* __ASSEMBLER__ */
diff --git a/libc/sysdeps/x86_64/multiarch/memset-x86-64.S b/libc/sysdeps/x86_64/multiarch/memset-x86-64.S
new file mode 100644
index 000000000..5e8cfb3e9
--- /dev/null
+++ b/libc/sysdeps/x86_64/multiarch/memset-x86-64.S
@@ -0,0 +1,18 @@
+#include <sysdep.h>
+
+#ifndef NOT_IN_libc
+# undef ENTRY_CHK
+# define ENTRY_CHK(name) \
+ .type __memset_chk_x86_64, @function; \
+ .globl __memset_chk_x86_64; \
+ .p2align 4; \
+ __memset_chk_x86_64: cfi_startproc; \
+ CALL_MCOUNT
+# undef END_CHK
+# define END_CHK(name) \
+ cfi_endproc; .size __memset_chk_x86_64, .-__memset_chk_x86_64
+
+# define libc_hidden_builtin_def(name)
+# define memset __memset_x86_64
+# include "../memset.S"
+#endif
diff --git a/libc/sysdeps/x86_64/multiarch/memset.S b/libc/sysdeps/x86_64/multiarch/memset.S
new file mode 100644
index 000000000..a8d0e9ea2
--- /dev/null
+++ b/libc/sysdeps/x86_64/multiarch/memset.S
@@ -0,0 +1,74 @@
+/* Multiple versions of memset
+ Copyright (C) 2010 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <sysdep.h>
+#include <init-arch.h>
+
+/* Define multiple versions only for the definition in lib. */
+#ifndef NOT_IN_libc
+ENTRY(memset)
+ .type memset, @gnu_indirect_function
+ cmpl $0, __cpu_features+KIND_OFFSET(%rip)
+ jne 1f
+ call __init_cpu_features
+1: leaq __memset_x86_64(%rip), %rax
+ testl $bit_Prefer_SSE_for_memop, __cpu_features+FEATURE_OFFSET+index_Prefer_SSE_for_memop(%rip)
+ jz 2f
+ leaq __memset_sse2(%rip), %rax
+2: ret
+END(memset)
+
+# define USE_SSE2 1
+
+# undef ENTRY
+# define ENTRY(name) \
+ .type __memset_sse2, @function; \
+ .globl __memset_sse2; \
+ .p2align 4; \
+ __memset_sse2: cfi_startproc; \
+ CALL_MCOUNT
+# undef END
+# define END(name) \
+ cfi_endproc; .size __memset_sse2, .-__memset_sse2
+
+# undef ENTRY_CHK
+# define ENTRY_CHK(name) \
+ .type __memset_chk_sse2, @function; \
+ .globl __memset_chk_sse2; \
+ .p2align 4; \
+ __memset_chk_sse2: cfi_startproc; \
+ CALL_MCOUNT
+# undef END_CHK
+# define END_CHK(name) \
+ cfi_endproc; .size __memset_chk_sse2, .-__memset_chk_sse2
+
+# ifdef SHARED
+# undef libc_hidden_builtin_def
+/* It doesn't make sense to send libc-internal memset calls through a PLT.
+ The speedup we get from using GPR instruction is likely eaten away
+ by the indirect call in the PLT. */
+# define libc_hidden_builtin_def(name) \
+ .globl __GI_memset; __GI_memset = __memset_sse2
+# endif
+
+# undef strong_alias
+# define strong_alias(original, alias)
+#endif
+
+#include "../memset.S"
diff --git a/libc/sysdeps/x86_64/multiarch/memset_chk.S b/libc/sysdeps/x86_64/multiarch/memset_chk.S
new file mode 100644
index 000000000..16afe60c6
--- /dev/null
+++ b/libc/sysdeps/x86_64/multiarch/memset_chk.S
@@ -0,0 +1,44 @@
+/* Multiple versions of __memset_chk
+ Copyright (C) 2010 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <sysdep.h>
+#include <init-arch.h>
+
+/* Define multiple versions only for the definition in lib. */
+#ifndef NOT_IN_libc
+# ifdef SHARED
+ENTRY(__memset_chk)
+ .type __memset_chk, @gnu_indirect_function
+ cmpl $0, __cpu_features+KIND_OFFSET(%rip)
+ jne 1f
+ call __init_cpu_features
+1: leaq __memset_chk_x86_64(%rip), %rax
+ testl $bit_Prefer_SSE_for_memop, __cpu_features+FEATURE_OFFSET+index_Prefer_SSE_for_memop(%rip)
+ jz 2f
+ leaq __memset_chk_sse2(%rip), %rax
+2: ret
+END(__memset_chk)
+
+strong_alias (__memset_chk, __memset_zero_constant_len_parameter)
+ .section .gnu.warning.__memset_zero_constant_len_parameter
+ .string "memset used with constant zero length parameter; this could be due to transposed parameters"
+# else
+# include "../memset_chk.S"
+# endif
+#endif
diff --git a/libc/sysdeps/x86_64/multiarch/strcmp.S b/libc/sysdeps/x86_64/multiarch/strcmp.S
index 54e5bbc31..185928957 100644
--- a/libc/sysdeps/x86_64/multiarch/strcmp.S
+++ b/libc/sysdeps/x86_64/multiarch/strcmp.S
@@ -1809,7 +1809,13 @@ LABEL(nibble_ashr_15_use_sse4_2):
ja LABEL(loop_ashr_15_use_sse4_2)
LABEL(nibble_ashr_use_sse4_2_exit):
+# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a,(%rsi,%rdx), %xmm0
+# else
+ movdqa (%rsi,%rdx), %xmm1
+ TOLOWER (%xmm0, %xmm1)
+ pcmpistri $0x1a, %xmm1, %xmm0
+# endif
.p2align 4
LABEL(use_sse4_2_exit):
jnc LABEL(strcmp_exitz_sse4_2)