diff options
author | joseph <joseph@7b3dc134-2b1b-0410-93df-9e9f96275f8d> | 2010-12-06 20:43:02 +0000 |
---|---|---|
committer | joseph <joseph@7b3dc134-2b1b-0410-93df-9e9f96275f8d> | 2010-12-06 20:43:02 +0000 |
commit | e7de0d4fc0526318aa0be1fcb6ec60cf27ce18c7 (patch) | |
tree | 8bccaab3a0d93c25f8c901c391970e12dea9e48c /libc/sysdeps | |
parent | 486caa1f257e98014ac78f7f7715876f705c17dd (diff) | |
download | eglibc2-e7de0d4fc0526318aa0be1fcb6ec60cf27ce18c7.tar.gz |
Merge changes between r11980 and r12228 from /fsf/trunk.
git-svn-id: svn://svn.eglibc.org/trunk@12229 7b3dc134-2b1b-0410-93df-9e9f96275f8d
Diffstat (limited to 'libc/sysdeps')
-rw-r--r-- | libc/sysdeps/i386/bits/byteswap.h | 15 | ||||
-rw-r--r-- | libc/sysdeps/i386/fpu/libm-test-ulps | 4 | ||||
-rw-r--r-- | libc/sysdeps/i386/i686/multiarch/strcmp-ssse3.S | 9 | ||||
-rw-r--r-- | libc/sysdeps/ieee754/ldbl-128/s_fmal.c | 2 | ||||
-rw-r--r-- | libc/sysdeps/ieee754/ldbl-128ibm/e_sqrtl.c | 4 | ||||
-rw-r--r-- | libc/sysdeps/unix/sysv/linux/i386/bits/mman.h | 4 | ||||
-rw-r--r-- | libc/sysdeps/unix/sysv/linux/sys/swap.h | 3 | ||||
-rw-r--r-- | libc/sysdeps/unix/sysv/linux/x86_64/bits/mman.h | 3 | ||||
-rw-r--r-- | libc/sysdeps/x86_64/bits/byteswap.h | 29 | ||||
-rw-r--r-- | libc/sysdeps/x86_64/memset.S | 311 | ||||
-rw-r--r-- | libc/sysdeps/x86_64/multiarch/Makefile | 3 | ||||
-rw-r--r-- | libc/sysdeps/x86_64/multiarch/bzero.S | 56 | ||||
-rw-r--r-- | libc/sysdeps/x86_64/multiarch/cacheinfo.c | 2 | ||||
-rw-r--r-- | libc/sysdeps/x86_64/multiarch/init-arch.c | 6 | ||||
-rw-r--r-- | libc/sysdeps/x86_64/multiarch/init-arch.h | 12 | ||||
-rw-r--r-- | libc/sysdeps/x86_64/multiarch/memset-x86-64.S | 18 | ||||
-rw-r--r-- | libc/sysdeps/x86_64/multiarch/memset.S | 74 | ||||
-rw-r--r-- | libc/sysdeps/x86_64/multiarch/memset_chk.S | 44 | ||||
-rw-r--r-- | libc/sysdeps/x86_64/multiarch/strcmp.S | 6 |
19 files changed, 412 insertions, 193 deletions
diff --git a/libc/sysdeps/i386/bits/byteswap.h b/libc/sysdeps/i386/bits/byteswap.h index 1f3fc5e52..c246ae86c 100644 --- a/libc/sysdeps/i386/bits/byteswap.h +++ b/libc/sysdeps/i386/bits/byteswap.h @@ -1,5 +1,5 @@ /* Macros to swap the order of bytes in integer values. - Copyright (C) 1997, 1998, 2000, 2002, 2003, 2006, 2007, 2008 + Copyright (C) 1997, 1998, 2000, 2002, 2003, 2006, 2007, 2008, 2010 Free Software Foundation, Inc. This file is part of the GNU C Library. @@ -27,26 +27,27 @@ /* Swap bytes in 16 bit value. */ #define __bswap_constant_16(x) \ - ((((x) >> 8) & 0xff) | (((x) & 0xff) << 8)) + ((unsigned short int) ((((x) >> 8) & 0xff) | (((x) & 0xff) << 8))) #ifdef __GNUC__ # if __GNUC__ >= 2 # define __bswap_16(x) \ (__extension__ \ - ({ register unsigned short int __v, __x = (x); \ + ({ register unsigned short int __v, __x = (unsigned short int) (x); \ if (__builtin_constant_p (__x)) \ __v = __bswap_constant_16 (__x); \ else \ __asm__ ("rorw $8, %w0" \ : "=r" (__v) \ - : "0" (__x) \ - : "cc"); \ + : "0" (__x) \ + : "cc"); \ __v; })) # else /* This is better than nothing. */ # define __bswap_16(x) \ (__extension__ \ - ({ register unsigned short int __x = (x); __bswap_constant_16 (__x); })) + ({ register unsigned short int __x = (unsigned short int) (x); \ + __bswap_constant_16 (__x); })) # endif #else static __inline unsigned short int @@ -122,7 +123,7 @@ __bswap_32 (unsigned int __bsx) (__extension__ \ ({ union { __extension__ unsigned long long int __ll; \ unsigned long int __l[2]; } __w, __r; \ - if (__builtin_constant_p (x)) \ + if (__builtin_constant_p (x)) \ __r.__ll = __bswap_constant_64 (x); \ else \ { \ diff --git a/libc/sysdeps/i386/fpu/libm-test-ulps b/libc/sysdeps/i386/fpu/libm-test-ulps index 5231789be..4b1a9e734 100644 --- a/libc/sysdeps/i386/fpu/libm-test-ulps +++ b/libc/sysdeps/i386/fpu/libm-test-ulps @@ -857,8 +857,8 @@ ifloat: 1 ildouble: 2 ldouble: 2 Test "yn (10, 0.75) == -2133501638.90573424452445412893839236": -float: 1 -ifloat: 1 +float: 2 +ifloat: 2 ildouble: 4 ldouble: 4 Test "yn (10, 1.0) == -121618014.278689189288130426667971145": diff --git a/libc/sysdeps/i386/i686/multiarch/strcmp-ssse3.S b/libc/sysdeps/i386/i686/multiarch/strcmp-ssse3.S index a4de2259d..607b69b6f 100644 --- a/libc/sysdeps/i386/i686/multiarch/strcmp-ssse3.S +++ b/libc/sysdeps/i386/i686/multiarch/strcmp-ssse3.S @@ -70,9 +70,7 @@ ENTRY (STRCMP) movl CNT(%esp), %ebp cmp $16, %ebp jb L(less16bytes_sncmp) - jmp L(more16bytes) -#endif - +#else movzbl (%eax), %ecx cmpb %cl, (%edx) jne L(neq) @@ -123,11 +121,6 @@ ENTRY (STRCMP) add $8, %edx add $8, %eax -#ifdef USE_AS_STRNCMP - cmp $8, %ebp - lea -8(%ebp), %ebp - je L(eq) -L(more16bytes): #endif movl %edx, %ecx and $0xfff, %ecx diff --git a/libc/sysdeps/ieee754/ldbl-128/s_fmal.c b/libc/sysdeps/ieee754/ldbl-128/s_fmal.c index b750538b8..2dec70ee1 100644 --- a/libc/sysdeps/ieee754/ldbl-128/s_fmal.c +++ b/libc/sysdeps/ieee754/ldbl-128/s_fmal.c @@ -176,7 +176,7 @@ __fmal (long double x, long double y, long double z) u.ieee.mantissa3 |= fetestexcept (FE_INEXACT) != 0; v.d = a1 + u.d; /* Ensure the addition is not scheduled after fetestexcept call. */ - asm volatile ("" : "m" (v)); + asm volatile ("" : : "m" (v)); int j = fetestexcept (FE_INEXACT) != 0; feupdateenv (&env); /* Ensure the following computations are performed in default rounding diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/e_sqrtl.c b/libc/sysdeps/ieee754/ldbl-128ibm/e_sqrtl.c index 1f533cae4..fe6bb55b0 100644 --- a/libc/sysdeps/ieee754/ldbl-128ibm/e_sqrtl.c +++ b/libc/sysdeps/ieee754/ldbl-128ibm/e_sqrtl.c @@ -73,9 +73,9 @@ long double __ieee754_sqrtl(long double x) m = ((a.i[2] >> 20) & 0x7ff) - 54; } m += n; - if (m > 0) + if ((int) m > 0) a.i[2] = (a.i[2] & 0x800fffff) | (m << 20); - else if (m <= -54) { + else if ((int) m <= -54) { a.i[2] &= 0x80000000; a.i[3] = 0; } else { diff --git a/libc/sysdeps/unix/sysv/linux/i386/bits/mman.h b/libc/sysdeps/unix/sysv/linux/i386/bits/mman.h index 22aa401e0..7ac1aff08 100644 --- a/libc/sysdeps/unix/sysv/linux/i386/bits/mman.h +++ b/libc/sysdeps/unix/sysv/linux/i386/bits/mman.h @@ -1,5 +1,6 @@ /* Definitions for POSIX memory map interface. Linux/i386 version. - Copyright (C) 1997,2000,2003,2005,2006,2009 Free Software Foundation, Inc. + Copyright (C) 1997,2000,2003,2005,2006,2009,2010 + Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -64,6 +65,7 @@ # define MAP_POPULATE 0x08000 /* Populate (prefault) pagetables. */ # define MAP_NONBLOCK 0x10000 /* Do not block on IO. */ # define MAP_STACK 0x20000 /* Allocation is for a stack. */ +# define MAP_HUGETLB 0x40000 /* Create huge page mapping. */ #endif /* Flags to `msync'. */ diff --git a/libc/sysdeps/unix/sysv/linux/sys/swap.h b/libc/sysdeps/unix/sysv/linux/sys/swap.h index b6e7bef5d..961006cbb 100644 --- a/libc/sysdeps/unix/sysv/linux/sys/swap.h +++ b/libc/sysdeps/unix/sysv/linux/sys/swap.h @@ -1,5 +1,5 @@ /* Calls to enable and disable swapping on specified locations. Linux version. - Copyright (C) 1996, 1998, 1999 Free Software Foundation, Inc. + Copyright (C) 1996, 1998, 1999, 2010 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -28,6 +28,7 @@ #define SWAP_FLAG_PREFER 0x8000 /* Set if swap priority is specified. */ #define SWAP_FLAG_PRIO_MASK 0x7fff #define SWAP_FLAG_PRIO_SHIFT 0 +#define SWAP_FLAG_DISCARD 0x10000 /* Discard swap cluster after use. */ __BEGIN_DECLS diff --git a/libc/sysdeps/unix/sysv/linux/x86_64/bits/mman.h b/libc/sysdeps/unix/sysv/linux/x86_64/bits/mman.h index 3dcbf9c31..6f3ad0de4 100644 --- a/libc/sysdeps/unix/sysv/linux/x86_64/bits/mman.h +++ b/libc/sysdeps/unix/sysv/linux/x86_64/bits/mman.h @@ -1,5 +1,5 @@ /* Definitions for POSIX memory map interface. Linux/x86_64 version. - Copyright (C) 2001, 2003, 2005, 2006, 2009 Free Software Foundation, Inc. + Copyright (C) 2001,2003,2005,2006,2009,2010 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -65,6 +65,7 @@ # define MAP_POPULATE 0x08000 /* Populate (prefault) pagetables. */ # define MAP_NONBLOCK 0x10000 /* Do not block on IO. */ # define MAP_STACK 0x20000 /* Allocation is for a stack. */ +# define MAP_HUGETLB 0x40000 /* Create huge page mapping. */ #endif /* Flags to `msync'. */ diff --git a/libc/sysdeps/x86_64/bits/byteswap.h b/libc/sysdeps/x86_64/bits/byteswap.h index 08b38e852..e350fb806 100644 --- a/libc/sysdeps/x86_64/bits/byteswap.h +++ b/libc/sysdeps/x86_64/bits/byteswap.h @@ -1,5 +1,5 @@ /* Macros to swap the order of bytes in integer values. - Copyright (C) 1997, 1998, 2000, 2002, 2003, 2007, 2008 + Copyright (C) 1997, 1998, 2000, 2002, 2003, 2007, 2008, 2010 Free Software Foundation, Inc. This file is part of the GNU C Library. @@ -29,12 +29,12 @@ /* Swap bytes in 16 bit value. */ #define __bswap_constant_16(x) \ - ((((x) >> 8) & 0xff) | (((x) & 0xff) << 8)) + ((unsigned short int) ((((x) >> 8) & 0xff) | (((x) & 0xff) << 8))) #if defined __GNUC__ && __GNUC__ >= 2 # define __bswap_16(x) \ (__extension__ \ - ({ register unsigned short int __v, __x = (x); \ + ({ register unsigned short int __v, __x = (unsigned short int) (x); \ if (__builtin_constant_p (__x)) \ __v = __bswap_constant_16 (__x); \ else \ @@ -47,7 +47,8 @@ /* This is better than nothing. */ # define __bswap_16(x) \ (__extension__ \ - ({ register unsigned short int __x = (x); __bswap_constant_16 (__x); })) + ({ register unsigned short int __x = (unsigned short int) (x); \ + __bswap_constant_16 (__x); })) #endif @@ -120,16 +121,16 @@ # define __bswap_64(x) \ (__extension__ \ ({ union { __extension__ unsigned long long int __ll; \ - unsigned int __l[2]; } __w, __r; \ - if (__builtin_constant_p (x)) \ - __r.__ll = __bswap_constant_64 (x); \ - else \ - { \ - __w.__ll = (x); \ - __r.__l[0] = __bswap_32 (__w.__l[1]); \ - __r.__l[1] = __bswap_32 (__w.__l[0]); \ - } \ - __r.__ll; })) + unsigned int __l[2]; } __w, __r; \ + if (__builtin_constant_p (x)) \ + __r.__ll = __bswap_constant_64 (x); \ + else \ + { \ + __w.__ll = (x); \ + __r.__l[0] = __bswap_32 (__w.__l[1]); \ + __r.__l[1] = __bswap_32 (__w.__l[0]); \ + } \ + __r.__ll; })) # endif #endif diff --git a/libc/sysdeps/x86_64/memset.S b/libc/sysdeps/x86_64/memset.S index 681ab870e..f6eb71fc7 100644 --- a/libc/sysdeps/x86_64/memset.S +++ b/libc/sysdeps/x86_64/memset.S @@ -24,7 +24,7 @@ #define __STOS_UPPER_BOUNDARY $65536 .text -#ifndef NOT_IN_libc +#if !defined NOT_IN_libc && !defined USE_MULTIARCH ENTRY(__bzero) mov %rsi,%rdx /* Adjust parameter. */ xorl %esi,%esi /* Fill with 0s. */ @@ -34,10 +34,10 @@ weak_alias (__bzero, bzero) #endif #if defined PIC && !defined NOT_IN_libc -ENTRY (__memset_chk) +ENTRY_CHK (__memset_chk) cmpq %rdx, %rcx jb HIDDEN_JUMPTARGET (__chk_fail) -END (__memset_chk) +END_CHK (__memset_chk) #endif ENTRY (memset) L(memset_entry): @@ -591,157 +591,13 @@ L(A6Q1): mov %dx,-0xe(%rdi) L(A7Q0): mov %dl,-0x7(%rdi) L(A6Q0): mov %dx,-0x6(%rdi) mov %edx,-0x4(%rdi) - jmp L(aligned_now) - - .balign 16 -L(aligned_now): - - cmpl $0x1,__x86_64_preferred_memory_instruction(%rip) - jg L(SSE_pre) - -L(8byte_move_try): - cmpq __STOS_LOWER_BOUNDARY,%r8 - jae L(8byte_stos_try) - - .balign 16 -L(8byte_move): - movq %r8,%rcx - shrq $7,%rcx - jz L(8byte_move_skip) - - .p2align 4 - -L(8byte_move_loop): - decq %rcx - - movq %rdx, (%rdi) - movq %rdx, 8 (%rdi) - movq %rdx, 16 (%rdi) - movq %rdx, 24 (%rdi) - movq %rdx, 32 (%rdi) - movq %rdx, 40 (%rdi) - movq %rdx, 48 (%rdi) - movq %rdx, 56 (%rdi) - movq %rdx, 64 (%rdi) - movq %rdx, 72 (%rdi) - movq %rdx, 80 (%rdi) - movq %rdx, 88 (%rdi) - movq %rdx, 96 (%rdi) - movq %rdx, 104 (%rdi) - movq %rdx, 112 (%rdi) - movq %rdx, 120 (%rdi) - - leaq 128 (%rdi),%rdi - - jnz L(8byte_move_loop) - -L(8byte_move_skip): - andl $127,%r8d - lea (%rdi,%r8,1),%rdi - -#ifndef PIC - lea L(setPxQx)(%rip),%r11 - jmpq *(%r11,%r8,8) # old scheme remained for nonPIC -#else - lea L(Got0)(%rip),%r11 - lea L(setPxQx)(%rip),%rcx - movswq (%rcx,%r8,2),%rcx - lea (%rcx,%r11,1),%r11 - jmpq *%r11 -#endif - - .balign 16 -L(8byte_stos_try): - mov __x86_64_shared_cache_size(%rip),%r9d // ck largest cache size - cmpq %r8,%r9 // calculate the lesser of remaining - cmovaq %r8,%r9 // bytes and largest cache size - jbe L(8byte_stos) - -L(8byte_move_reuse_try): - cmp __STOS_UPPER_BOUNDARY,%r8 - jae L(8byte_move) - - .balign 16 -L(8byte_stos): - movq %r9,%rcx - andq $-8,%r9 - - shrq $3,%rcx - jz L(8byte_stos_skip) - - xchgq %rax,%rdx - - rep - stosq - - xchgq %rax,%rdx - -L(8byte_stos_skip): - subq %r9,%r8 - ja L(8byte_nt_move) - - andl $7,%r8d - lea (%rdi,%r8,1),%rdi -#ifndef PIC - lea L(setPxQx)(%rip),%r11 - jmpq *(%r11,%r8,8) # old scheme remained for nonPIC -#else - lea L(Got0)(%rip),%r11 - lea L(setPxQx)(%rip),%rcx - movswq (%rcx,%r8,2),%rcx - lea (%rcx,%r11,1),%r11 - jmpq *%r11 -#endif - .balign 16 -L(8byte_nt_move): - movq %r8,%rcx - shrq $7,%rcx - jz L(8byte_nt_move_skip) - - .balign 16 -L(8byte_nt_move_loop): - decq %rcx - - movntiq %rdx, (%rdi) - movntiq %rdx, 8 (%rdi) - movntiq %rdx, 16 (%rdi) - movntiq %rdx, 24 (%rdi) - movntiq %rdx, 32 (%rdi) - movntiq %rdx, 40 (%rdi) - movntiq %rdx, 48 (%rdi) - movntiq %rdx, 56 (%rdi) - movntiq %rdx, 64 (%rdi) - movntiq %rdx, 72 (%rdi) - movntiq %rdx, 80 (%rdi) - movntiq %rdx, 88 (%rdi) - movntiq %rdx, 96 (%rdi) - movntiq %rdx, 104 (%rdi) - movntiq %rdx, 112 (%rdi) - movntiq %rdx, 120 (%rdi) - - leaq 128 (%rdi),%rdi - - jnz L(8byte_nt_move_loop) - - sfence - -L(8byte_nt_move_skip): - andl $127,%r8d - - lea (%rdi,%r8,1),%rdi -#ifndef PIC - lea L(setPxQx)(%rip),%r11 - jmpq *(%r11,%r8,8) # old scheme remained for nonPIC -#else - lea L(Got0)(%rip),%r11 - lea L(setPxQx)(%rip),%rcx - movswq (%rcx,%r8,2),%rcx - lea (%rcx,%r11,1),%r11 - jmpq *%r11 -#endif +#ifndef USE_MULTIARCH + jmp L(aligned_now) L(SSE_pre): +#endif +#if !defined USE_MULTIARCH || defined USE_SSE2 # fill RegXMM0 with the pattern movd %rdx,%xmm0 punpcklqdq %xmm0,%xmm0 @@ -1342,11 +1198,162 @@ L(SSExDx): .short L(SSE15QB)-L(SSE0Q0) #endif .popsection +#endif /* !defined USE_MULTIARCH || defined USE_SSE2 */ + + .balign 16 +L(aligned_now): + +#ifndef USE_MULTIARCH + cmpl $0x1,__x86_64_preferred_memory_instruction(%rip) + jg L(SSE_pre) +#endif /* USE_MULTIARCH */ + +L(8byte_move_try): + cmpq __STOS_LOWER_BOUNDARY,%r8 + jae L(8byte_stos_try) + + .balign 16 +L(8byte_move): + movq %r8,%rcx + shrq $7,%rcx + jz L(8byte_move_skip) + + .p2align 4 + +L(8byte_move_loop): + decq %rcx + + movq %rdx, (%rdi) + movq %rdx, 8 (%rdi) + movq %rdx, 16 (%rdi) + movq %rdx, 24 (%rdi) + movq %rdx, 32 (%rdi) + movq %rdx, 40 (%rdi) + movq %rdx, 48 (%rdi) + movq %rdx, 56 (%rdi) + movq %rdx, 64 (%rdi) + movq %rdx, 72 (%rdi) + movq %rdx, 80 (%rdi) + movq %rdx, 88 (%rdi) + movq %rdx, 96 (%rdi) + movq %rdx, 104 (%rdi) + movq %rdx, 112 (%rdi) + movq %rdx, 120 (%rdi) + + leaq 128 (%rdi),%rdi + + jnz L(8byte_move_loop) + +L(8byte_move_skip): + andl $127,%r8d + lea (%rdi,%r8,1),%rdi + +#ifndef PIC + lea L(setPxQx)(%rip),%r11 + jmpq *(%r11,%r8,8) # old scheme remained for nonPIC +#else + lea L(Got0)(%rip),%r11 + lea L(setPxQx)(%rip),%rcx + movswq (%rcx,%r8,2),%rcx + lea (%rcx,%r11,1),%r11 + jmpq *%r11 +#endif + + .balign 16 +L(8byte_stos_try): + mov __x86_64_shared_cache_size(%rip),%r9d // ck largest cache size + cmpq %r8,%r9 // calculate the lesser of remaining + cmovaq %r8,%r9 // bytes and largest cache size + jbe L(8byte_stos) + +L(8byte_move_reuse_try): + cmp __STOS_UPPER_BOUNDARY,%r8 + jae L(8byte_move) + + .balign 16 +L(8byte_stos): + movq %r9,%rcx + andq $-8,%r9 + + shrq $3,%rcx + jz L(8byte_stos_skip) + + xchgq %rax,%rdx + + rep + stosq + + xchgq %rax,%rdx + +L(8byte_stos_skip): + subq %r9,%r8 + ja L(8byte_nt_move) + + andl $7,%r8d + lea (%rdi,%r8,1),%rdi +#ifndef PIC + lea L(setPxQx)(%rip),%r11 + jmpq *(%r11,%r8,8) # old scheme remained for nonPIC +#else + lea L(Got0)(%rip),%r11 + lea L(setPxQx)(%rip),%rcx + movswq (%rcx,%r8,2),%rcx + lea (%rcx,%r11,1),%r11 + jmpq *%r11 +#endif + + .balign 16 +L(8byte_nt_move): + movq %r8,%rcx + shrq $7,%rcx + jz L(8byte_nt_move_skip) + + .balign 16 +L(8byte_nt_move_loop): + decq %rcx + + movntiq %rdx, (%rdi) + movntiq %rdx, 8 (%rdi) + movntiq %rdx, 16 (%rdi) + movntiq %rdx, 24 (%rdi) + movntiq %rdx, 32 (%rdi) + movntiq %rdx, 40 (%rdi) + movntiq %rdx, 48 (%rdi) + movntiq %rdx, 56 (%rdi) + movntiq %rdx, 64 (%rdi) + movntiq %rdx, 72 (%rdi) + movntiq %rdx, 80 (%rdi) + movntiq %rdx, 88 (%rdi) + movntiq %rdx, 96 (%rdi) + movntiq %rdx, 104 (%rdi) + movntiq %rdx, 112 (%rdi) + movntiq %rdx, 120 (%rdi) + + leaq 128 (%rdi),%rdi + + jnz L(8byte_nt_move_loop) + + sfence + +L(8byte_nt_move_skip): + andl $127,%r8d + + lea (%rdi,%r8,1),%rdi +#ifndef PIC + lea L(setPxQx)(%rip),%r11 + jmpq *(%r11,%r8,8) # old scheme remained for nonPIC +#else + lea L(Got0)(%rip),%r11 + lea L(setPxQx)(%rip),%rcx + movswq (%rcx,%r8,2),%rcx + lea (%rcx,%r11,1),%r11 + jmpq *%r11 +#endif END (memset) libc_hidden_builtin_def (memset) -#if defined PIC && !defined NOT_IN_libc +#if defined PIC && !defined NOT_IN_libc && !defined USE_MULTIARCH strong_alias (__memset_chk, __memset_zero_constant_len_parameter) .section .gnu.warning.__memset_zero_constant_len_parameter .string "memset used with constant zero length parameter; this could be due to transposed parameters" diff --git a/libc/sysdeps/x86_64/multiarch/Makefile b/libc/sysdeps/x86_64/multiarch/Makefile index 5d2e34ebc..19aa4be4c 100644 --- a/libc/sysdeps/x86_64/multiarch/Makefile +++ b/libc/sysdeps/x86_64/multiarch/Makefile @@ -8,7 +8,8 @@ sysdep_routines += stpncpy-c strncpy-c strcmp-ssse3 strncmp-ssse3 \ strend-sse4 memcmp-sse4 memcpy-ssse3 mempcpy-ssse3 \ memmove-ssse3 memcpy-ssse3-back mempcpy-ssse3-back \ memmove-ssse3-back strcasestr-nonascii strcasecmp_l-ssse3 \ - strncase_l-ssse3 strlen-sse4 strlen-no-bsf + strncase_l-ssse3 strlen-sse4 strlen-no-bsf \ + memset-x86-64 ifeq (yes,$(config-cflags-sse4)) sysdep_routines += strcspn-c strpbrk-c strspn-c strstr-c strcasestr-c varshift CFLAGS-varshift.c += -msse4 diff --git a/libc/sysdeps/x86_64/multiarch/bzero.S b/libc/sysdeps/x86_64/multiarch/bzero.S new file mode 100644 index 000000000..9c9eebd5e --- /dev/null +++ b/libc/sysdeps/x86_64/multiarch/bzero.S @@ -0,0 +1,56 @@ +/* Multiple versions of bzero + Copyright (C) 2010 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include <sysdep.h> +#include <init-arch.h> + + .text +ENTRY(__bzero) + .type __bzero, @gnu_indirect_function + cmpl $0, __cpu_features+KIND_OFFSET(%rip) + jne 1f + call __init_cpu_features +1: leaq __bzero_x86_64(%rip), %rax + testl $bit_Prefer_SSE_for_memop, __cpu_features+FEATURE_OFFSET+index_Prefer_SSE_for_memop(%rip) + jz 2f + leaq __bzero_sse2(%rip), %rax +2: ret +END(__bzero) + + .type __bzero_sse2, @function +__bzero_sse2: + cfi_startproc + CALL_MCOUNT + mov %rsi,%rdx /* Adjust parameter. */ + xorl %esi,%esi /* Fill with 0s. */ + jmp __memset_sse2 + cfi_endproc + .size __bzero_sse2, .-__bzero_sse2 + + .type __bzero_x86_64, @function +__bzero_x86_64: + cfi_startproc + CALL_MCOUNT + mov %rsi,%rdx /* Adjust parameter. */ + xorl %esi,%esi /* Fill with 0s. */ + jmp __memset_x86_64 + cfi_endproc + .size __bzero_x86_64, .-__bzero_x86_64 + +weak_alias (__bzero, bzero) diff --git a/libc/sysdeps/x86_64/multiarch/cacheinfo.c b/libc/sysdeps/x86_64/multiarch/cacheinfo.c new file mode 100644 index 000000000..f87b8dce6 --- /dev/null +++ b/libc/sysdeps/x86_64/multiarch/cacheinfo.c @@ -0,0 +1,2 @@ +#define DISABLE_PREFERRED_MEMORY_INSTRUCTION +#include "../cacheinfo.c" diff --git a/libc/sysdeps/x86_64/multiarch/init-arch.c b/libc/sysdeps/x86_64/multiarch/init-arch.c index 786466d5f..f0d2bb7d1 100644 --- a/libc/sysdeps/x86_64/multiarch/init-arch.c +++ b/libc/sysdeps/x86_64/multiarch/init-arch.c @@ -59,6 +59,11 @@ __init_cpu_features (void) get_common_indeces (&family, &model); + /* Intel processors prefer SSE instruction for memory/string + routines if they are avaiable. */ + __cpu_features.feature[index_Prefer_SSE_for_memop] + |= bit_Prefer_SSE_for_memop; + unsigned int eax = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax; unsigned int extended_family = (eax >> 20) & 0xff; unsigned int extended_model = (eax >> 12) & 0xf0; @@ -82,6 +87,7 @@ __init_cpu_features (void) case 0x1e: case 0x1f: case 0x25: + case 0x2c: case 0x2e: case 0x2f: /* Rep string instructions and copy backward are fast on diff --git a/libc/sysdeps/x86_64/multiarch/init-arch.h b/libc/sysdeps/x86_64/multiarch/init-arch.h index 783b02015..6e409b8f1 100644 --- a/libc/sysdeps/x86_64/multiarch/init-arch.h +++ b/libc/sysdeps/x86_64/multiarch/init-arch.h @@ -16,9 +16,10 @@ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. */ -#define bit_Fast_Rep_String (1 << 0) -#define bit_Fast_Copy_Backward (1 << 1) -#define bit_Slow_BSF (1 << 2) +#define bit_Fast_Rep_String (1 << 0) +#define bit_Fast_Copy_Backward (1 << 1) +#define bit_Slow_BSF (1 << 2) +#define bit_Prefer_SSE_for_memop (1 << 3) #ifdef __ASSEMBLER__ @@ -37,6 +38,7 @@ # define index_Fast_Rep_String FEATURE_INDEX_1*FEATURE_SIZE # define index_Fast_Copy_Backward FEATURE_INDEX_1*FEATURE_SIZE # define index_Slow_BSF FEATURE_INDEX_1*FEATURE_SIZE +# define index_Prefer_SSE_for_memop FEATURE_INDEX_1*FEATURE_SIZE #else /* __ASSEMBLER__ */ @@ -109,6 +111,7 @@ extern const struct cpu_features *__get_cpu_features (void) # define index_Fast_Rep_String FEATURE_INDEX_1 # define index_Fast_Copy_Backward FEATURE_INDEX_1 # define index_Slow_BSF FEATURE_INDEX_1 +# define index_Prefer_SSE_for_memop FEATURE_INDEX_1 #define HAS_ARCH_FEATURE(idx, bit) \ ((__get_cpu_features ()->feature[idx] & (bit)) != 0) @@ -122,4 +125,7 @@ extern const struct cpu_features *__get_cpu_features (void) #define HAS_SLOW_BSF \ HAS_ARCH_FEATURE (index_Slow_BSF, bit_Slow_BSF) +#define HAS_PREFER_SSE_FOR_MEMOP \ + HAS_ARCH_FEATURE (index_Prefer_SSE_for_memop, bit_Prefer_SSE_for_memop) + #endif /* __ASSEMBLER__ */ diff --git a/libc/sysdeps/x86_64/multiarch/memset-x86-64.S b/libc/sysdeps/x86_64/multiarch/memset-x86-64.S new file mode 100644 index 000000000..5e8cfb3e9 --- /dev/null +++ b/libc/sysdeps/x86_64/multiarch/memset-x86-64.S @@ -0,0 +1,18 @@ +#include <sysdep.h> + +#ifndef NOT_IN_libc +# undef ENTRY_CHK +# define ENTRY_CHK(name) \ + .type __memset_chk_x86_64, @function; \ + .globl __memset_chk_x86_64; \ + .p2align 4; \ + __memset_chk_x86_64: cfi_startproc; \ + CALL_MCOUNT +# undef END_CHK +# define END_CHK(name) \ + cfi_endproc; .size __memset_chk_x86_64, .-__memset_chk_x86_64 + +# define libc_hidden_builtin_def(name) +# define memset __memset_x86_64 +# include "../memset.S" +#endif diff --git a/libc/sysdeps/x86_64/multiarch/memset.S b/libc/sysdeps/x86_64/multiarch/memset.S new file mode 100644 index 000000000..a8d0e9ea2 --- /dev/null +++ b/libc/sysdeps/x86_64/multiarch/memset.S @@ -0,0 +1,74 @@ +/* Multiple versions of memset + Copyright (C) 2010 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include <sysdep.h> +#include <init-arch.h> + +/* Define multiple versions only for the definition in lib. */ +#ifndef NOT_IN_libc +ENTRY(memset) + .type memset, @gnu_indirect_function + cmpl $0, __cpu_features+KIND_OFFSET(%rip) + jne 1f + call __init_cpu_features +1: leaq __memset_x86_64(%rip), %rax + testl $bit_Prefer_SSE_for_memop, __cpu_features+FEATURE_OFFSET+index_Prefer_SSE_for_memop(%rip) + jz 2f + leaq __memset_sse2(%rip), %rax +2: ret +END(memset) + +# define USE_SSE2 1 + +# undef ENTRY +# define ENTRY(name) \ + .type __memset_sse2, @function; \ + .globl __memset_sse2; \ + .p2align 4; \ + __memset_sse2: cfi_startproc; \ + CALL_MCOUNT +# undef END +# define END(name) \ + cfi_endproc; .size __memset_sse2, .-__memset_sse2 + +# undef ENTRY_CHK +# define ENTRY_CHK(name) \ + .type __memset_chk_sse2, @function; \ + .globl __memset_chk_sse2; \ + .p2align 4; \ + __memset_chk_sse2: cfi_startproc; \ + CALL_MCOUNT +# undef END_CHK +# define END_CHK(name) \ + cfi_endproc; .size __memset_chk_sse2, .-__memset_chk_sse2 + +# ifdef SHARED +# undef libc_hidden_builtin_def +/* It doesn't make sense to send libc-internal memset calls through a PLT. + The speedup we get from using GPR instruction is likely eaten away + by the indirect call in the PLT. */ +# define libc_hidden_builtin_def(name) \ + .globl __GI_memset; __GI_memset = __memset_sse2 +# endif + +# undef strong_alias +# define strong_alias(original, alias) +#endif + +#include "../memset.S" diff --git a/libc/sysdeps/x86_64/multiarch/memset_chk.S b/libc/sysdeps/x86_64/multiarch/memset_chk.S new file mode 100644 index 000000000..16afe60c6 --- /dev/null +++ b/libc/sysdeps/x86_64/multiarch/memset_chk.S @@ -0,0 +1,44 @@ +/* Multiple versions of __memset_chk + Copyright (C) 2010 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include <sysdep.h> +#include <init-arch.h> + +/* Define multiple versions only for the definition in lib. */ +#ifndef NOT_IN_libc +# ifdef SHARED +ENTRY(__memset_chk) + .type __memset_chk, @gnu_indirect_function + cmpl $0, __cpu_features+KIND_OFFSET(%rip) + jne 1f + call __init_cpu_features +1: leaq __memset_chk_x86_64(%rip), %rax + testl $bit_Prefer_SSE_for_memop, __cpu_features+FEATURE_OFFSET+index_Prefer_SSE_for_memop(%rip) + jz 2f + leaq __memset_chk_sse2(%rip), %rax +2: ret +END(__memset_chk) + +strong_alias (__memset_chk, __memset_zero_constant_len_parameter) + .section .gnu.warning.__memset_zero_constant_len_parameter + .string "memset used with constant zero length parameter; this could be due to transposed parameters" +# else +# include "../memset_chk.S" +# endif +#endif diff --git a/libc/sysdeps/x86_64/multiarch/strcmp.S b/libc/sysdeps/x86_64/multiarch/strcmp.S index 54e5bbc31..185928957 100644 --- a/libc/sysdeps/x86_64/multiarch/strcmp.S +++ b/libc/sysdeps/x86_64/multiarch/strcmp.S @@ -1809,7 +1809,13 @@ LABEL(nibble_ashr_15_use_sse4_2): ja LABEL(loop_ashr_15_use_sse4_2) LABEL(nibble_ashr_use_sse4_2_exit): +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L pcmpistri $0x1a,(%rsi,%rdx), %xmm0 +# else + movdqa (%rsi,%rdx), %xmm1 + TOLOWER (%xmm0, %xmm1) + pcmpistri $0x1a, %xmm1, %xmm0 +# endif .p2align 4 LABEL(use_sse4_2_exit): jnc LABEL(strcmp_exitz_sse4_2) |