summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorjoseph <joseph@7b3dc134-2b1b-0410-93df-9e9f96275f8d>2012-01-03 21:42:51 +0000
committerjoseph <joseph@7b3dc134-2b1b-0410-93df-9e9f96275f8d>2012-01-03 21:42:51 +0000
commita2f36c82bede91326449c9144d17f384663c5ab8 (patch)
tree743c2759816fd3eef8f9709d72e538001852db31
parentacd1e4884e92bebf4ca870d510936604f5188e96 (diff)
downloadeglibc2-a2f36c82bede91326449c9144d17f384663c5ab8.tar.gz
Merge changes between r16332 and r16348 from /fsf/trunk.
git-svn-id: svn://svn.eglibc.org/trunk@16493 7b3dc134-2b1b-0410-93df-9e9f96275f8d
-rw-r--r--libc/ChangeLog70
-rw-r--r--libc/NEWS17
-rw-r--r--libc/bits/byteswap.h21
-rw-r--r--libc/elf/dl-addr.c4
-rw-r--r--libc/elf/tst-auditmod4b.c2
-rw-r--r--libc/elf/tst-auditmod6b.c2
-rw-r--r--libc/elf/tst-auditmod6c.c2
-rw-r--r--libc/elf/tst-auditmod7b.c2
-rw-r--r--libc/iconvdata/cp1258.c3
-rw-r--r--libc/iconvdata/tcvn5712-1.c9
-rw-r--r--libc/include/features.h2
-rw-r--r--libc/inet/getnameinfo.c11
-rw-r--r--libc/localedata/ChangeLog26
-rw-r--r--libc/localedata/SUPPORTED3
-rw-r--r--libc/localedata/locales/fi_FI100
-rw-r--r--libc/localedata/locales/si_LK62
-rw-r--r--libc/localedata/locales/sv_SE4
-rw-r--r--libc/localedata/locales/ta_LK85
-rw-r--r--libc/localedata/locales/wal_ET59
-rw-r--r--libc/nptl/ChangeLog15
-rw-r--r--libc/nptl/Versions2
-rw-r--r--libc/nptl/nptl-init.c12
-rw-r--r--libc/nptl/pthreadP.h1
-rw-r--r--libc/nptl/sysdeps/pthread/gai_misc.h6
-rw-r--r--libc/nptl/sysdeps/unix/sysv/linux/mq_notify.c4
-rw-r--r--libc/nptl/sysdeps/unix/sysv/linux/timer_routines.c4
-rw-r--r--libc/sysdeps/i386/bits/byteswap.h20
-rw-r--r--libc/sysdeps/i386/fpu/bits/fenv.h50
-rw-r--r--libc/sysdeps/i386/fpu/fgetexcptflg.c22
-rw-r--r--libc/sysdeps/i386/i686/multiarch/strcpy-ssse3.S1261
-rw-r--r--libc/sysdeps/i386/i686/multiarch/wcscpy-ssse3.S62
-rw-r--r--libc/sysdeps/ia64/bits/byteswap.h35
-rw-r--r--libc/sysdeps/s390/bits/byteswap.h112
-rw-r--r--libc/sysdeps/x86_64/bits/byteswap.h18
-rw-r--r--libc/sysdeps/x86_64/dl-machine.h8
-rw-r--r--libc/sysdeps/x86_64/multiarch/strcpy-ssse3.S767
-rw-r--r--libc/sysdeps/x86_64/multiarch/wcscpy-ssse3.S64
-rw-r--r--libc/version.h4
-rw-r--r--ports/ChangeLog.m68k5
-rw-r--r--ports/sysdeps/m68k/bits/byteswap.h23
40 files changed, 1434 insertions, 1545 deletions
diff --git a/libc/ChangeLog b/libc/ChangeLog
index f74e0a5d8..e81ffc170 100644
--- a/libc/ChangeLog
+++ b/libc/ChangeLog
@@ -1,3 +1,72 @@
+2011-12-23 Ulrich Drepper <drepper@gmail.com>
+
+ * version.h (RELEASE): Bump for 2.15 release.
+ * include/features.h (__GLIBC_MINOR__): Bump to 15.
+
+ * sysdeps/x86_64/dl-machine.h: Fix typos in comments.
+ Patch by Marek Polacek <mpolacek@redhat.com>.
+
+ * bits/byteswap.h: Protect long long constants with __extension__.
+ * sysdeps/i386/bits/byteswap.h: Likewise.
+ * sysdeps/ia64/bits/byteswap.h: Likewise.
+ * sysdeps/s390/bits/byteswap.h: Likewise.
+ * sysdeps/x86_64/bits/byteswap.h: Likewise.
+
+2011-12-23 Liubov Dmitrieva <liubov.dmitrieva@gmail.com>
+
+ [BZ #13540]
+ * sysdeps/x86_64/multiarch/strcpy-ssse3.S: Fix overrun in
+ destination buffer.
+ * sysdeps/x86_64/multiarch/wcscpy-ssse3.S: Likewise.
+
+2011-12-23 Marek Polacek <polacek@redhat.com>
+
+ * elf/dl-addr.c (determine_info): Add inline keyword.
+ * elf/tst-auditmod4b.c (check_avx): Likewise.
+ * elf/tst-auditmod6b.c (check_avx): Likewise.
+ * elf/tst-auditmod6c.c (check_avx): Likewise.
+ * elf/tst-auditmod7b.c (check_avx): Likewise.
+
+2011-12-23 Ulrich Drepper <drepper@gmail.com>
+
+ * sysdeps/i386/fpu/bits/fenv.h (feraiseexcept): Also enable for
+ !__SSE_MATH__.
+
+2011-12-23 Liubov Dmitrieva <liubov.dmitrieva@gmail.com>
+
+ [BZ #13540]
+ * sysdeps/i386/i686/multiarch/wcscpy-ssse3.S: Fix wrong copying
+ processing for last bytes.
+
+2011-08-06 Bruno Haible <bruno@clisp.org>
+
+ [BZ #13061]
+ * iconvdata/cp1258.c (comp_table_data): Combine U+00A8 U+0301 to
+ U+0385, not to U+1FEE.
+
+ [BZ #13062]
+ * iconvdata/tcvn5712-1.c (comp_table_data): Remove useless and wrong
+ entry for U+00A5 U+0301.
+
+2011-12-22 Ulrich Drepper <drepper@gmail.com>
+
+ [BZ #13166]
+ * inet/getnameinfo.c (getnameinfo): Return EAI_OVERFLOW if the
+ buffer for the output is too small.
+
+ * sysdeps/i386/fpu/bits/fenv.h [__SSE_MATH__]: Add feraiseexcept
+ optimization.
+
+ [BZ #13185]
+ * sysdeps/i386/fpu/fgetexcptflg.c (__fegetexceptflag): Also return
+ SSE flags if possible.
+
+2011-12-22 Liubov Dmitrieva <liubov.dmitrieva@gmail.com>
+
+ [BZ #13540]
+ * sysdeps/i386/i686/multiarch/strcpy-ssse3.S: Fix wrong copying
+ processing for last bytes.
+
2011-12-22 Joseph Myers <joseph@codesourcery.com>
* sysdeps/unix/sysv/linux/Makefile (syscall-list-variants)
@@ -53,6 +122,7 @@
2011-11-18 Richard B. Kreckel <kreckel@ginac.de>
[BZ #13305]
+ [BZ #12786]
* math/s_cacosh.c: Fix rare miscomputation in cacosh().
* math/s_cacoshf.c: Likewise.
* math/s_cacoshl.c: Likewise.
diff --git a/libc/NEWS b/libc/NEWS
index 8d2fbbe41..55dba1bb7 100644
--- a/libc/NEWS
+++ b/libc/NEWS
@@ -1,4 +1,4 @@
-GNU C Library NEWS -- history of user-visible changes. 2011-12-22
+GNU C Library NEWS -- history of user-visible changes. 2011-12-23
Copyright (C) 1992-2009, 2010, 2011 Free Software Foundation, Inc.
See the end for copying conditions.
@@ -9,12 +9,13 @@ Version 2.15
* The following bugs are resolved with this release:
- 6779, 6783, 9696, 10103, 10709, 11589, 12403, 12847, 12868, 12852, 12874,
- 12885, 12892, 12907, 12922, 12935, 13007, 13021, 13067, 13068, 13090,
- 13092, 13114, 13118, 13123, 13134, 13138, 13147, 13150, 13179, 13192,
- 13268, 13276, 13282, 13291, 13305, 13328, 13335, 13337, 13344, 13358,
- 13367, 13413, 13416, 13423, 13439, 13446, 13472, 13484, 13506, 13515,
- 13523, 13524, 13538
+ 6779, 6783, 9696, 10103, 10709, 11589, 12403, 12786, 12840, 12847, 12868,
+ 12852, 12874, 12885, 12892, 12906, 12907, 12922, 12935, 12962, 13007,
+ 13021, 13061, 13062, 13067, 13068, 13085, 13088, 13090, 13092, 13096,
+ 13114, 13118, 13123, 13134, 13138, 13147, 13150, 13166, 13179, 13185,
+ 13189, 13192, 13268, 13276, 13282, 13291, 13305, 13328, 13335, 13337,
+ 13344, 13358, 13367, 13413, 13416, 13423, 13439, 13446, 13472, 13484,
+ 13506, 13515, 13523, 13524, 13538, 13540
* New program pldd to list loaded object of a process
Implemented by Ulrich Drepper.
@@ -66,7 +67,7 @@ Version 2.15
* Optimized nearbyint and strcasecmp for PPC.
Implemented by Adhemerval Zanella.
-* New locales: bho_IN, unm_US, es_CU
+* New locales: bho_IN, unm_US, es_CU, ta_LK
Version 2.14
diff --git a/libc/bits/byteswap.h b/libc/bits/byteswap.h
index 45cb9471e..6df2f28c0 100644
--- a/libc/bits/byteswap.h
+++ b/libc/bits/byteswap.h
@@ -1,5 +1,6 @@
/* Macros to swap the order of bytes in integer values.
- Copyright (C) 1997,1998,2000-2002,2005,2008 Free Software Foundation, Inc.
+ Copyright (C) 1997,1998,2000-2002,2005,2008,2011
+ Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -60,20 +61,20 @@ __bswap_32 (unsigned int __bsx)
#if defined __GNUC__ && __GNUC__ >= 2
/* Swap bytes in 64 bit value. */
# define __bswap_constant_64(x) \
- ((((x) & 0xff00000000000000ull) >> 56) \
- | (((x) & 0x00ff000000000000ull) >> 40) \
- | (((x) & 0x0000ff0000000000ull) >> 24) \
- | (((x) & 0x000000ff00000000ull) >> 8) \
- | (((x) & 0x00000000ff000000ull) << 8) \
- | (((x) & 0x0000000000ff0000ull) << 24) \
- | (((x) & 0x000000000000ff00ull) << 40) \
- | (((x) & 0x00000000000000ffull) << 56))
+ (__extension__ ((((x) & 0xff00000000000000ull) >> 56) \
+ | (((x) & 0x00ff000000000000ull) >> 40) \
+ | (((x) & 0x0000ff0000000000ull) >> 24) \
+ | (((x) & 0x000000ff00000000ull) >> 8) \
+ | (((x) & 0x00000000ff000000ull) << 8) \
+ | (((x) & 0x0000000000ff0000ull) << 24) \
+ | (((x) & 0x000000000000ff00ull) << 40) \
+ | (((x) & 0x00000000000000ffull) << 56)))
# define __bswap_64(x) \
(__extension__ \
({ union { __extension__ unsigned long long int __ll; \
unsigned int __l[2]; } __w, __r; \
- if (__builtin_constant_p (x)) \
+ if (__builtin_constant_p (x)) \
__r.__ll = __bswap_constant_64 (x); \
else \
{ \
diff --git a/libc/elf/dl-addr.c b/libc/elf/dl-addr.c
index 2b53a5ed0..788225635 100644
--- a/libc/elf/dl-addr.c
+++ b/libc/elf/dl-addr.c
@@ -1,5 +1,5 @@
/* Locate the shared object symbol nearest a given address.
- Copyright (C) 1996-2007, 2009 Free Software Foundation, Inc.
+ Copyright (C) 1996-2007, 2009, 2011 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -22,7 +22,7 @@
#include <ldsodefs.h>
-static void
+static inline void
__attribute ((always_inline))
determine_info (const ElfW(Addr) addr, struct link_map *match, Dl_info *info,
struct link_map **mapp, const ElfW(Sym) **symbolp)
diff --git a/libc/elf/tst-auditmod4b.c b/libc/elf/tst-auditmod4b.c
index a6d3c6a6c..761d97ce9 100644
--- a/libc/elf/tst-auditmod4b.c
+++ b/libc/elf/tst-auditmod4b.c
@@ -108,7 +108,7 @@ la_symbind64 (Elf64_Sym *sym, unsigned int ndx, uintptr_t *refcook,
static int avx = -1;
-static int
+static inline int
__attribute ((always_inline))
check_avx (void)
{
diff --git a/libc/elf/tst-auditmod6b.c b/libc/elf/tst-auditmod6b.c
index f756b5022..a7a60b992 100644
--- a/libc/elf/tst-auditmod6b.c
+++ b/libc/elf/tst-auditmod6b.c
@@ -108,7 +108,7 @@ la_symbind64 (Elf64_Sym *sym, unsigned int ndx, uintptr_t *refcook,
static int avx = -1;
-static int
+static inline int
__attribute ((always_inline))
check_avx (void)
{
diff --git a/libc/elf/tst-auditmod6c.c b/libc/elf/tst-auditmod6c.c
index 49cbf0549..e0b5ac231 100644
--- a/libc/elf/tst-auditmod6c.c
+++ b/libc/elf/tst-auditmod6c.c
@@ -108,7 +108,7 @@ la_symbind64 (Elf64_Sym *sym, unsigned int ndx, uintptr_t *refcook,
static int avx = -1;
-static int
+static inline int
__attribute ((always_inline))
check_avx (void)
{
diff --git a/libc/elf/tst-auditmod7b.c b/libc/elf/tst-auditmod7b.c
index eb237586f..a27d38540 100644
--- a/libc/elf/tst-auditmod7b.c
+++ b/libc/elf/tst-auditmod7b.c
@@ -108,7 +108,7 @@ la_symbind64 (Elf64_Sym *sym, unsigned int ndx, uintptr_t *refcook,
static int avx = -1;
-static int
+static inline int
__attribute ((always_inline))
check_avx (void)
{
diff --git a/libc/iconvdata/cp1258.c b/libc/iconvdata/cp1258.c
index 2b741ba96..b7d23182e 100644
--- a/libc/iconvdata/cp1258.c
+++ b/libc/iconvdata/cp1258.c
@@ -197,8 +197,7 @@ static const struct
{ 0x0077, 0x1E83 },
{ 0x0079, 0x00FD },
{ 0x007A, 0x017A },
- /* { 0x00A5, 0x0385 }, Wrong, A5 is Yen sign */
- { 0x00A8, 0x1FEE },
+ { 0x00A8, 0x0385 }, /* prefer U+0385 over U+1FEE */
{ 0x00C2, 0x1EA4 },
{ 0x00C5, 0x01FA },
{ 0x00C6, 0x01FC },
diff --git a/libc/iconvdata/tcvn5712-1.c b/libc/iconvdata/tcvn5712-1.c
index c94dadb2e..3cfdf468d 100644
--- a/libc/iconvdata/tcvn5712-1.c
+++ b/libc/iconvdata/tcvn5712-1.c
@@ -1,5 +1,5 @@
/* Conversion to and from TCVN5712-1.
- Copyright (C) 2001, 2002, 2004 Free Software Foundation, Inc.
+ Copyright (C) 2001, 2002, 2004, 2011 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@redhat.com>, 2001.
@@ -158,7 +158,7 @@ static const struct
{ 0x01AF, 0x1EEA },
{ 0x01B0, 0x1EEB },
#define COMP_TABLE_IDX_0301 (COMP_TABLE_IDX_0300 + COMP_TABLE_LEN_0300)
-#define COMP_TABLE_LEN_0301 51
+#define COMP_TABLE_LEN_0301 50
{ 0x0041, 0x00C1 },
{ 0x0043, 0x0106 },
{ 0x0045, 0x00C9 },
@@ -193,8 +193,7 @@ static const struct
{ 0x0077, 0x1E83 },
{ 0x0079, 0x00FD },
{ 0x007A, 0x017A },
- { 0x00A5, 0x0385 },
- /*{ 0x00A8, 0x1FEE },*/
+ /*{ 0x00A8, 0x0385 },*//* prefer U+0385 over U+1FEE */
{ 0x00C2, 0x1EA4 },
/*{ 0x00C5, 0x01FA },*/
/*{ 0x00C6, 0x01FC },*/
@@ -492,7 +491,7 @@ static const struct
#include <iconv/loop.c>
-/* Next, define the conversion function from UCS4 to CP1258. */
+/* Next, define the conversion function from UCS4 to TCVN5712-1. */
static const unsigned char from_ucs4[] =
{
diff --git a/libc/include/features.h b/libc/include/features.h
index b16129562..f34dc3159 100644
--- a/libc/include/features.h
+++ b/libc/include/features.h
@@ -338,7 +338,7 @@
/* Major and minor version number of the GNU C library package. Use
these macros to test for features in specific releases. */
#define __GLIBC__ 2
-#define __GLIBC_MINOR__ 14
+#define __GLIBC_MINOR__ 15
#define __GLIBC_PREREQ(maj, min) \
((__GLIBC__ << 16) + __GLIBC_MINOR__ >= ((maj) << 16) + (min))
diff --git a/libc/inet/getnameinfo.c b/libc/inet/getnameinfo.c
index 6fb6ad6e1..436604b75 100644
--- a/libc/inet/getnameinfo.c
+++ b/libc/inet/getnameinfo.c
@@ -346,10 +346,11 @@ getnameinfo (const struct sockaddr *sa, socklen_t addrlen, char *host,
"%u", scopeid);
if (real_hostlen + scopelen + 1 > hostlen)
- /* XXX We should not fail here. Simply enlarge
- the buffer or return with out of memory. */
- return EAI_SYSTEM;
- memcpy (host + real_hostlen, scopebuf, scopelen + 1);
+ /* Signal the buffer is too small. This is
+ what inet_ntop does. */
+ c = NULL;
+ else
+ memcpy (host + real_hostlen, scopebuf, scopelen + 1);
}
}
else
@@ -357,7 +358,7 @@ getnameinfo (const struct sockaddr *sa, socklen_t addrlen, char *host,
(const void *) &(((const struct sockaddr_in *) sa)->sin_addr),
host, hostlen);
if (c == NULL)
- return EAI_SYSTEM;
+ return EAI_OVERFLOW;
}
ok = 1;
}
diff --git a/libc/localedata/ChangeLog b/libc/localedata/ChangeLog
index 17c135dfe..bb9c3886e 100644
--- a/libc/localedata/ChangeLog
+++ b/libc/localedata/ChangeLog
@@ -1,5 +1,31 @@
+2011-12-23 Ulrich Drepper <drepper@gmail.com>
+
+ [BZ #12840]
+ * locales/sv_SE: Modernize date format.
+
+ [BZ #12906]
+ * SUPPORTED (SUPPORTED-LOCALES): Add wal_ET entry.
+
+ * locales/wal_ET: Remove lang_ab entry.
+
+ [BZ #12962]
+ * locales/fi_FI: Various fixups.
+ Patch by Marko Myllynen <myllynen@redhat.com>.
+
+ [BZ #13085]
+ * locales/ta_LK: New file.
+ * SUPPORTED (SUPPORTED-LOCALES): Add appropriate entry
+
+ * locales/si_LK: Add country_ab2, country_ab3, country_num.
+
2011-12-22 Ulrich Drepper <drepper@gmail.com>
+ [BZ #13096]
+ * locales/fi_FI: Fix collation reordering rules.
+
+ [BZ #13189]
+ * SUPPORTED (SUPPORTED-LOCALES): Add ur_IN entry.
+
[BZ #13282]
* locales/brx_IN: New file.
* SUPPORTED (SUPPORTED-LOCALES): Add appropriate entry
diff --git a/libc/localedata/SUPPORTED b/libc/localedata/SUPPORTED
index a4e639794..1fd78472c 100644
--- a/libc/localedata/SUPPORTED
+++ b/libc/localedata/SUPPORTED
@@ -380,6 +380,7 @@ sv_SE/ISO-8859-1 \
sw_KE/UTF-8 \
sw_TZ/UTF-8 \
ta_IN/UTF-8 \
+ta_LK/UTF-8 \
te_IN/UTF-8 \
tg_TJ.UTF-8/UTF-8 \
tg_TJ/KOI8-T \
@@ -403,6 +404,7 @@ ug_CN/UTF-8 \
uk_UA.UTF-8/UTF-8 \
uk_UA/KOI8-U \
unm_US/UTF-8 \
+ur_IN/UTF-8 \
ur_PK/UTF-8 \
uz_UZ/ISO-8859-1 \
uz_UZ@cyrillic/UTF-8 \
@@ -413,6 +415,7 @@ wa_BE/ISO-8859-1 \
wa_BE@euro/ISO-8859-15 \
wa_BE.UTF-8/UTF-8 \
wae_CH/UTF-8 \
+wal_ET/UTF-8 \
wo_SN/UTF-8 \
xh_ZA.UTF-8/UTF-8 \
xh_ZA/ISO-8859-1 \
diff --git a/libc/localedata/locales/fi_FI b/libc/localedata/locales/fi_FI
index c3604871f..f51700c28 100644
--- a/libc/localedata/locales/fi_FI
+++ b/libc/localedata/locales/fi_FI
@@ -1,4 +1,4 @@
-escape_char /
+escape_char /
comment_char %
% Finnish language locale for Finland
@@ -45,10 +45,10 @@ category "fi_FI:2000";LC_NUMERIC
category "fi_FI:2000";LC_MONETARY
category "fi_FI:2000";LC_MESSAGES
category "fi_FI:2000";LC_PAPER
+category "fi_FI:2000";LC_MEASUREMENT
category "fi_FI:2000";LC_NAME
category "fi_FI:2000";LC_ADDRESS
category "fi_FI:2000";LC_TELEPHONE
-
END LC_IDENTIFICATION
LC_COLLATE
@@ -63,64 +63,68 @@ reorder-after <z>
<a-diaerisis>
<o-diaerisis>
-reorder-after <U005A>
+reorder-after <U007A>
<U00E5> <a-ring>;<BAS>;<MIN>;IGNORE
-<U00C5> <a-ring>;<BAS>;<CAP>;IGNORE
<U01FB> <a-ring>;<ACA>;<MIN>;IGNORE
-<U01FA> <a-ring>;<ACA>;<CAP>;IGNORE
<U00E4> <a-diaerisis>;<BAS>;<MIN>;IGNORE
-<U00C4> <a-diaerisis>;<BAS>;<CAP>;IGNORE
<U00E6> <a-diaerisis>;<REU>;<MIN>;IGNORE
-<U00C6> <a-diaerisis>;<REU>;<CAP>;IGNORE
<U01FD> <a-diaerisis>;<U01FD>;<MIN>;IGNORE
-<U01FC> <a-diaerisis>;<U01FD>;<CAP>;IGNORE
<U01E3> <a-diaerisis>;<MAC>;<MIN>;IGNORE
-<U01E2> <a-diaerisis>;<MAC>;<CAP>;IGNORE
<U00F6> <o-diaerisis>;<BAS>;<MIN>;IGNORE
-<U00D6> <o-diaerisis>;<BAS>;<CAP>;IGNORE
<U00F8> <o-diaerisis>;<U00D8>;<MIN>;IGNORE
-<U00D8> <o-diaerisis>;<U00D8>;<CAP>;IGNORE
<U01FF> <o-diaerisis>;<U01FF>;<MIN>;IGNORE
-<U01FE> <o-diaerisis>;<U01FF>;<CAP>;IGNORE
<U00F5> <o-diaerisis>;<TIL>;<MIN>;IGNORE
+reorder-after <U005A>
+<U00C5> <a-ring>;<BAS>;<CAP>;IGNORE
+<U01FA> <a-ring>;<ACA>;<CAP>;IGNORE
+<U00C4> <a-diaerisis>;<BAS>;<CAP>;IGNORE
+<U00C6> <a-diaerisis>;<REU>;<CAP>;IGNORE
+<U01FC> <a-diaerisis>;<U01FD>;<CAP>;IGNORE
+<U01E2> <a-diaerisis>;<MAC>;<CAP>;IGNORE
+<U00D6> <o-diaerisis>;<BAS>;<CAP>;IGNORE
+<U00D8> <o-diaerisis>;<U00D8>;<CAP>;IGNORE
+<U01FE> <o-diaerisis>;<U01FF>;<CAP>;IGNORE
<U00D5> <o-diaerisis>;<TIL>;<CAP>;IGNORE
-reorder-after <U016A>
+reorder-after <U016B>
<U0076> <v>;<U0056>;<BAS>;<MIN>
-<U0056> <v>;<U0056>;<BAS>;<CAP>
<U1E7D> <v>;<U0056>;<TIL>;<MIN>
-<U1E7C> <v>;<U0056>;<TIL>;<CAP>
<U0077> <w>;<U0057>;<BAS>;<MIN>
-<U0057> <w>;<U0057>;<BAS>;<CAP>
<U1E83> <w>;<U0057>;<ACA>;<MIN>
-<U1E82> <w>;<U0057>;<ACA>;<CAP>
<U1E81> <w>;<U0057>;<GRA>;<MIN>
-<U1E80> <w>;<U0057>;<GRA>;<CAP>
<U0175> <w>;<U0057>;<CIR>;<MIN>
-<U0174> <w>;<U0057>;<CIR>;<CAP>
<U1E85> <w>;<U0057>;<REU>;<MIN>
-<U1E84> <w>;<U0057>;<REU>;<CAP>
<U1E87> <w>;<U0057>;<PCT>;<MIN>
+reorder-after <U016A>
+<U0056> <v>;<U0056>;<BAS>;<CAP>
+<U1E7C> <v>;<U0056>;<TIL>;<CAP>
+<U0057> <w>;<U0057>;<BAS>;<CAP>
+<U1E82> <w>;<U0057>;<ACA>;<CAP>
+<U1E80> <w>;<U0057>;<GRA>;<CAP>
+<U0174> <w>;<U0057>;<CIR>;<CAP>
+<U1E84> <w>;<U0057>;<REU>;<CAP>
<U1E86> <w>;<U0057>;<PCT>;<CAP>
reorder-after <U00FF>
<U00FC> <y>;<DTT>;<MIN>;IGNORE
+reorder-after <U0178>
<U00DC> <y>;<DTT>;<CAP>;IGNORE
% Present in iso14651_t1, but these definitions seem to have been
% removed from latest iso14651 tables.
-reorder-after <U0162>
+reorder-after <U0163>
<U00FE> "<t><h>";"<LIG><LIG>";"<MIN><MIN>";IGNORE
+reorder-after <U0162>
<U00DE> "<t><h>";"<LIG><LIG>";"<CAP><CAP>";IGNORE
reorder-after <U0064>
<U00F0> <d>;<PCL>;<MIN>;IGNORE
-<U00D0> <d>;<PCL>;<CAP>;IGNORE
<U0111> <d>;<OBL>;<MIN>;IGNORE
+reorder-after <U0044>
+<U00D0> <d>;<PCL>;<CAP>;IGNORE
<U0110> <d>;<OBL>;<CAP>;IGNORE
reorder-end
-
END LC_COLLATE
LC_CTYPE
@@ -141,12 +145,10 @@ positive_sign ""
negative_sign "<U002D>"
int_frac_digits 2
frac_digits 2
-% int_curr_symbol precedes
-% curr_symbol succeeds
p_cs_precedes 0
-p_sep_by_space 2
+p_sep_by_space 1
n_cs_precedes 0
-n_sep_by_space 2
+n_sep_by_space 1
p_sign_posn 1
n_sign_posn 1
END LC_MONETARY
@@ -168,18 +170,18 @@ day "<U0073><U0075><U006E><U006E><U0075><U006E><U0074><U0061><U0069>";/
"<U0074><U006F><U0072><U0073><U0074><U0061><U0069>";/
"<U0070><U0065><U0072><U006A><U0061><U006E><U0074><U0061><U0069>";/
"<U006C><U0061><U0075><U0061><U006E><U0074><U0061><U0069>"
-abmon "<U0074><U0061><U006D><U006D><U0069><U00A0>";/
- "<U0068><U0065><U006C><U006D><U0069><U00A0>";/
+abmon "<U0074><U0061><U006D><U006D><U0069>";/
+ "<U0068><U0065><U006C><U006D><U0069>";/
"<U006D><U0061><U0061><U006C><U0069><U0073>";/
- "<U0068><U0075><U0068><U0074><U0069><U00A0>";/
- "<U0074><U006F><U0075><U006B><U006F><U00A0>";/
- "<U006B><U0065><U0073><U00E4><U00A0><U00A0>";/
- "<U0068><U0065><U0069><U006E><U00E4><U00A0>";/
- "<U0065><U006C><U006F><U00A0><U00A0><U00A0>";/
- "<U0073><U0079><U0079><U0073><U00A0><U00A0>";/
- "<U006C><U006F><U006B><U0061><U00A0><U00A0>";/
+ "<U0068><U0075><U0068><U0074><U0069>";/
+ "<U0074><U006F><U0075><U006B><U006F>";/
+ "<U006B><U0065><U0073><U00E4>";/
+ "<U0068><U0065><U0069><U006E><U00E4>";/
+ "<U0065><U006C><U006F>";/
+ "<U0073><U0079><U0079><U0073>";/
+ "<U006C><U006F><U006B><U0061>";/
"<U006D><U0061><U0072><U0072><U0061><U0073>";/
- "<U006A><U006F><U0075><U006C><U0075><U00A0>"
+ "<U006A><U006F><U0075><U006C><U0075>"
mon "<U0074><U0061><U006D><U006D><U0069><U006B><U0075><U0075>";/
"<U0068><U0065><U006C><U006D><U0069><U006B><U0075><U0075>";/
"<U006D><U0061><U0061><U006C><U0069><U0073><U006B><U0075><U0075>";/
@@ -202,13 +204,14 @@ t_fmt_ampm ""
date_fmt "<U0025><U0061><U0020><U0025><U002D><U0064><U002E><U0025>/
<U002D><U006D><U002E><U0025><U0059><U0020><U0025><U0048><U002E><U0025>/
<U004D><U002E><U0025><U0053><U0020><U0025><U007A>"
+week 7;19971130;4
first_weekday 2 % Monday
first_workday 2 % Monday
END LC_TIME
LC_MESSAGES
-yesexpr "<U005E><U005B><U004B><U006B><U004A><U006A><U0059><U0079><U005D><U002E><U002A>"
-noexpr "<U005E><U005B><U004E><U006E><U0045><U0065><U005D><U002E><U002A>"
+yesexpr "<U005E><U005B><U004B><U006B><U0059><U0079><U005D><U002E><U002A>"
+noexpr "<U005E><U005B><U0045><U0065><U004E><U006E><U005D><U002E><U002A>"
END LC_MESSAGES
LC_PAPER
@@ -217,6 +220,7 @@ width 210
END LC_PAPER
LC_TELEPHONE
+tel_dom_fmt "<U0028><U0025><U0041><U0029><U0020><U0025><U006C>"
tel_int_fmt "<U002B><U0025><U0063><U0020><U0025><U0061><U0020><U0025>/
<U006C>"
int_prefix "<U0033><U0035><U0038>"
@@ -230,15 +234,25 @@ END LC_MEASUREMENT
LC_NAME
name_fmt "<U0025><U0064><U0025><U0074><U0025><U0067><U0025><U0074>/
<U0025><U006D><U0025><U0074><U0025><U0066>"
+% Finnish equivalents for Mr/Mrs/Miss/Ms are herra/rouva/rouva/neiti
+% but they are practically never used, thus we don't define them here.
END LC_NAME
LC_ADDRESS
-postal_fmt "<U0025><U0066><U0025><U004E><U0025><U0061><U0025><U004E>/
-<U0025><U0064><U0025><U004E><U0025><U0062><U0025><U004E><U0025><U0073>/
-<U0020><U0025><U0068><U0020><U0025><U0065><U0020><U0025><U0072><U0025>/
-<U004E><U0025><U007A><U0020><U0025><U0054><U0025>/
+postal_fmt "<U0025><U0066><U0025><U004E><U0025><U0064><U0025><U004E>/
+<U0025><U0062><U0025><U004E><U0025><U0061><U0025><U004E><U0025><U0073>/
+<U0020><U0025><U0068><U0025><U0074><U0025><U0065><U0025><U0074><U0025>/
+<U0072><U0025><U004E><U0025><U007A><U0020><U0025><U0054><U0025>/
<U004E><U0025><U0063><U0025><U004E>"
country_ab2 "<U0046><U0049>"
country_ab3 "<U0046><U0049><U004E>"
country_num 246
+country_name "<U0053><U0075><U006F><U006D><U0069>"
+country_post "<U0046><U0049>"
+country_car "<U0046><U0049><U004E>"
+country_isbn 952
+lang_name "<U0073><U0075><U006F><U006D><U0069>"
+lang_ab "<U0066><U0069>"
+lang_term "<U0066><U0069><U006E>"
+lang_lib "<U0066><U0069><U006E>"
END LC_ADDRESS
diff --git a/libc/localedata/locales/si_LK b/libc/localedata/locales/si_LK
index daf618caf..8d8643f78 100644
--- a/libc/localedata/locales/si_LK
+++ b/libc/localedata/locales/si_LK
@@ -85,46 +85,46 @@ LC_TIME
%
% Abbreviated weekday names (%a)
abday "<U0D89>";"<U0DC3>";/
- "<U0D85>";"<U0DB6>";/
- "<U0DB6><U0DCA><U200D><U0DBB>";"<U0DC3><U0DD2>";/
- "<U0DC3><U0DD9>"
+ "<U0D85>";"<U0DB6>";/
+ "<U0DB6><U0DCA><U200D><U0DBB>";"<U0DC3><U0DD2>";/
+ "<U0DC3><U0DD9>"
%
% Full weekday names (%A)
day "<U0D89><U0DBB><U0DD2><U0DAF><U0DCF>";/
- "<U0DC3><U0DB3><U0DD4><U0DAF><U0DCF>";/
- "<U0D85><U0D9F><U0DC4><U0DBB><U0DD4><U0DC0><U0DCF><U0DAF><U0DCF>";/
- "<U0DB6><U0DAF><U0DCF><U0DAF><U0DCF>";/
- "<U0DB6><U0DCA><U200D><U0DBB><U0DC4><U0DC3><U0DCA><U0DB4><U0DAD><U0DD2><U0DB1><U0DCA><U0DAF><U0DCF>";/
+ "<U0DC3><U0DB3><U0DD4><U0DAF><U0DCF>";/
+ "<U0D85><U0D9F><U0DC4><U0DBB><U0DD4><U0DC0><U0DCF><U0DAF><U0DCF>";/
+ "<U0DB6><U0DAF><U0DCF><U0DAF><U0DCF>";/
+ "<U0DB6><U0DCA><U200D><U0DBB><U0DC4><U0DC3><U0DCA><U0DB4><U0DAD><U0DD2><U0DB1><U0DCA><U0DAF><U0DCF>";/
"<U0DC3><U0DD2><U0D9A><U0DD4><U0DBB><U0DCF><U0DAF><U0DCF>";/
- "<U0DC3><U0DD9><U0DB1><U0DC3><U0DD4><U0DBB><U0DCF><U0DAF><U0DCF>"
+ "<U0DC3><U0DD9><U0DB1><U0DC3><U0DD4><U0DBB><U0DCF><U0DAF><U0DCF>"
%
% Abbreviated month names (%b)
abmon "<U0DA2><U0DB1>";/
- "<U0DB4><U0DD9><U0DB6>";/
- "<U0DB8><U0DCF><U0DBB><U0DCA>";/
- "<U0D85><U0DB4><U0DCA><U200D><U0DBB><U0DD2>";/
- "<U0DB8><U0DD0><U0DBA><U0DD2>";/
- "<U0DA2><U0DD6><U0DB1><U0DD2>";/
- "<U0DA2><U0DD6><U0DBD><U0DD2>";/
- "<U0D85><U0D9C><U0DDD>";/
- "<U0DC3><U0DD0><U0DB4><U0DCA>";/
- "<U0D94><U0D9A><U0DCA>";/
- "<U0DB1><U0DD9><U0DC0><U0DD0>";/
- "<U0DAF><U0DD9><U0DC3><U0DD0>"
+ "<U0DB4><U0DD9><U0DB6>";/
+ "<U0DB8><U0DCF><U0DBB><U0DCA>";/
+ "<U0D85><U0DB4><U0DCA><U200D><U0DBB><U0DD2>";/
+ "<U0DB8><U0DD0><U0DBA><U0DD2>";/
+ "<U0DA2><U0DD6><U0DB1><U0DD2>";/
+ "<U0DA2><U0DD6><U0DBD><U0DD2>";/
+ "<U0D85><U0D9C><U0DDD>";/
+ "<U0DC3><U0DD0><U0DB4><U0DCA>";/
+ "<U0D94><U0D9A><U0DCA>";/
+ "<U0DB1><U0DD9><U0DC0><U0DD0>";/
+ "<U0DAF><U0DD9><U0DC3><U0DD0>"
%
% Full month names (%B)
mon "<U0DA2><U0DB1><U0DC0><U0DCF><U0DBB><U0DD2>";/
"<U0DB4><U0DD9><U0DB6><U0DBB><U0DC0><U0DCF><U0DBB><U0DD2>";/
- "<U0DB8><U0DCF><U0DBB><U0DCA><U0DAD><U0DD4>";/
- "<U0D85><U0DB4><U0DCA><U200D><U0DBB><U0DD2><U0DBA><U0DD9><U0DBD><U0DCA>";/
- "<U0DB8><U0DD0><U0DBA><U0DD2>";/
- "<U0DA2><U0DD6><U0DB1><U0DD2>";/
- "<U0DA2><U0DD6><U0DBD><U0DD2>";/
- "<U0D85><U0D9C><U0DDD><U0DC3><U0DCA><U0DAD><U0DD4>";/
- "<U0DC3><U0DD0><U0DB4><U0DCA><U0DAD><U0DD0><U0DB8><U0DCA><U0DB6><U0DBB><U0DCA>";/
- "<U0D94><U0D9A><U0DCA><U0DAD><U0DDD><U0DB6><U0DBB><U0DCA>";/
- "<U0DB1><U0DDC><U0DC0><U0DD0><U0DB8><U0DCA><U0DB6><U0DBB><U0DCA>";/
- "<U0DAF><U0DD9><U0DC3><U0DD0><U0DB8><U0DCA><U0DB6><U0DBB><U0DCA>"
+ "<U0DB8><U0DCF><U0DBB><U0DCA><U0DAD><U0DD4>";/
+ "<U0D85><U0DB4><U0DCA><U200D><U0DBB><U0DD2><U0DBA><U0DD9><U0DBD><U0DCA>";/
+ "<U0DB8><U0DD0><U0DBA><U0DD2>";/
+ "<U0DA2><U0DD6><U0DB1><U0DD2>";/
+ "<U0DA2><U0DD6><U0DBD><U0DD2>";/
+ "<U0D85><U0D9C><U0DDD><U0DC3><U0DCA><U0DAD><U0DD4>";/
+ "<U0DC3><U0DD0><U0DB4><U0DCA><U0DAD><U0DD0><U0DB8><U0DCA><U0DB6><U0DBB><U0DCA>";/
+ "<U0D94><U0D9A><U0DCA><U0DAD><U0DDD><U0DB6><U0DBB><U0DCA>";/
+ "<U0DB1><U0DDC><U0DC0><U0DD0><U0DB8><U0DCA><U0DB6><U0DBB><U0DCA>";/
+ "<U0DAF><U0DD9><U0DC3><U0DD0><U0DB8><U0DCA><U0DB6><U0DBB><U0DCA>"
%
% Equivalent of AM PM
am_pm "<U0DB4><U0DD9><U002E><U0DC0><U002E>";"<U0DB4><U002E><U0DC0><U002E>"
@@ -196,6 +196,10 @@ LC_ADDRESS
%
postal_fmt "<U0025><U007A><U0025><U0063><U0025><U0054><U0025><U0073><U0025><U0062><U0025><U0065><U0025><U0072>"
+country_ab2 "<U004C><U004B>"
+country_ab3 "<U004C><U004B><U0041>"
+country_num 144
+
END LC_ADDRESS
diff --git a/libc/localedata/locales/sv_SE b/libc/localedata/locales/sv_SE
index f558e2471..2dbc85364 100644
--- a/libc/localedata/locales/sv_SE
+++ b/libc/localedata/locales/sv_SE
@@ -169,9 +169,9 @@ mon "<U006A><U0061><U006E><U0075><U0061><U0072><U0069>";/
"<U006F><U006B><U0074><U006F><U0062><U0065><U0072>";/
"<U006E><U006F><U0076><U0065><U006D><U0062><U0065><U0072>";/
"<U0064><U0065><U0063><U0065><U006D><U0062><U0065><U0072>"
-d_t_fmt "<U0025><U0061><U0020><U0025><U0065><U0020><U0025><U0062><U0020><U0025><U0059><U0020><U0025><U0048><U002E><U0025><U004D><U002E><U0025><U0053>"
+d_t_fmt "<U0025><U0061><U0020><U0025><U0065><U0020><U0025><U0062><U0020><U0025><U0059><U0020><U0025><U0048><U003A><U0025><U004D><U003A><U0025><U0053>"
d_fmt "<U0025><U0059><U002D><U0025><U006D><U002D><U0025><U0064>"
-t_fmt "<U0025><U0048><U002E><U0025><U004D><U002E><U0025><U0053>"
+t_fmt "<U0025><U0048><U003A><U0025><U004D><U003A><U0025><U0053>"
am_pm "";""
t_fmt_ampm ""
date_fmt "<U0025><U0061><U0020><U0025><U0062><U0020><U0025><U0065>/
diff --git a/libc/localedata/locales/ta_LK b/libc/localedata/locales/ta_LK
new file mode 100644
index 000000000..80b2f69a7
--- /dev/null
+++ b/libc/localedata/locales/ta_LK
@@ -0,0 +1,85 @@
+comment_char %
+escape_char /
+%
+% Tamil language locale for Sri Lanka
+% Language: ta
+% Territory: LK
+% Revision: 1.0
+% Date: 2011,August,11
+% Application: general
+% Users: general
+% Charset: SLS 1326:2008
+% Distribution and use is free, also
+% for commercial purposes.
+
+LC_IDENTIFICATION
+title "Tamil language locale for Sri Lanka"
+source "J.Yogaraj"
+address "30/36Q -2/1,Charles Apartments, De Silva Cross Rd,/
+ Kalubowila,Dehiwela,SriLanka."
+contact "94-777-315206"
+email "yogaraj.ubuntu@gmail.com"
+tel "94-112-765773"
+fax ""
+language "Tamil"
+territory "Sri Lanka"
+revision "1.0"
+date "2011,August,11"
+%
+category "ta_LK:2000";LC_IDENTIFICATION
+category "ta_LK:2000";LC_CTYPE
+category "ta_LK:2000";LC_COLLATE
+category "ta_LK:2000";LC_TIME
+category "ta_LK:2000";LC_NUMERIC
+category "ta_LK:2000";LC_MONETARY
+category "ta_LK:2000";LC_MESSAGES
+category "ta_LK:2000";LC_PAPER
+category "ta_LK:2000";LC_NAME
+category "ta_LK:2000";LC_ADDRESS
+category "ta_LK:2000";LC_TELEPHONE
+
+END LC_IDENTIFICATION
+
+LC_COLLATE
+copy "ta_IN"
+END LC_COLLATE
+
+LC_CTYPE
+copy "ta_IN"
+END LC_CTYPE
+
+LC_MESSAGES
+copy "ta_IN"
+END LC_MESSAGES
+
+LC_MONETARY
+copy "ta_IN"
+END LC_MONETARY
+
+LC_NUMERIC
+copy "ta_IN"
+END LC_NUMERIC
+
+LC_TIME
+copy "ta_IN"
+END LC_TIME
+
+LC_PAPER
+copy "si_LK"
+END LC_PAPER
+
+LC_TELEPHONE
+copy "si_LK"
+END LC_TELEPHONE
+
+LC_MEASUREMENT
+copy "si_LK"
+END LC_MEASUREMENT
+
+LC_NAME
+copy "ta_IN"
+END LC_NAME
+
+LC_ADDRESS
+copy "si_LK"
+END LC_ADDRESS
diff --git a/libc/localedata/locales/wal_ET b/libc/localedata/locales/wal_ET
index 33953cd60..7846531f4 100644
--- a/libc/localedata/locales/wal_ET
+++ b/libc/localedata/locales/wal_ET
@@ -110,7 +110,6 @@ country_num 231 % 210 found in at least one ISO 3166 doc
% country_car unknown
% country_isbn unknown, Need ISO 2108
lang_name "<U12C8><U120B><U12ED><U1273><U1271>"
-lang_ab "<U0077><U0061><U006C>"
lang_term "<U0077><U0061><U006C>"
lang_lib "<U0077><U0061><U006C>"
@@ -144,12 +143,12 @@ LC_TIME
% Abbreviated weekday names (%a)
%
abday "<U12C8><U130B> ";/
- "<U1233><U12ED><U1296>";/
- "<U121B><U1246><U1233>";/
- "<U12A0><U1229><U12CB>";/
- "<U1203><U1219><U1233>";/
- "<U12A0><U122D><U1263>";/
- "<U1244><U122B> "
+ "<U1233><U12ED><U1296>";/
+ "<U121B><U1246><U1233>";/
+ "<U12A0><U1229><U12CB>";/
+ "<U1203><U1219><U1233>";/
+ "<U12A0><U122D><U1263>";/
+ "<U1244><U122B> "
%
% Full weekday names (%A)
%
@@ -169,32 +168,32 @@ day "<U12C8><U130B>";/
% Abbreviated month names (%b)
%
abmon "<U1303><U1295><U12E9>";/
- "<U134C><U1265><U1229>";/
- "<U121B><U122D><U127D>";/
- "<U12A4><U1355><U1228>";/
- "<U121C><U12ED><U0020>";/
- "<U1301><U1295><U0020>";/
- "<U1301><U120B><U12ED>";/
- "<U12A6><U1308><U1235>";/
- "<U1234><U1355><U1274>";/
- "<U12A6><U12AD><U1270>";/
- "<U1296><U126C><U121D>";/
- "<U12F2><U1234><U121D>"
+ "<U134C><U1265><U1229>";/
+ "<U121B><U122D><U127D>";/
+ "<U12A4><U1355><U1228>";/
+ "<U121C><U12ED><U0020>";/
+ "<U1301><U1295><U0020>";/
+ "<U1301><U120B><U12ED>";/
+ "<U12A6><U1308><U1235>";/
+ "<U1234><U1355><U1274>";/
+ "<U12A6><U12AD><U1270>";/
+ "<U1296><U126C><U121D>";/
+ "<U12F2><U1234><U121D>"
%
% Full month names (%B)
%
mon "<U1303><U1295><U12E9><U12C8><U122A>";/
- "<U134C><U1265><U1229><U12C8><U122A>";/
- "<U121B><U122D><U127D>";/
- "<U12A4><U1355><U1228><U120D>";/
- "<U121C><U12ED>";/
- "<U1301><U1295>";/
- "<U1301><U120B><U12ED>";/
- "<U12A6><U1308><U1235><U1275>";/
- "<U1234><U1355><U1274><U121D><U1260><U122D>";/
- "<U12A6><U12AD><U1270><U12CD><U1260><U122D>";/
- "<U1296><U126C><U121D><U1260><U122D>";/
- "<U12F2><U1234><U121D><U1260><U122D>"
+ "<U134C><U1265><U1229><U12C8><U122A>";/
+ "<U121B><U122D><U127D>";/
+ "<U12A4><U1355><U1228><U120D>";/
+ "<U121C><U12ED>";/
+ "<U1301><U1295>";/
+ "<U1301><U120B><U12ED>";/
+ "<U12A6><U1308><U1235><U1275>";/
+ "<U1234><U1355><U1274><U121D><U1260><U122D>";/
+ "<U12A6><U12AD><U1270><U12CD><U1260><U122D>";/
+ "<U1296><U126C><U121D><U1260><U122D>";/
+ "<U12F2><U1234><U121D><U1260><U122D>"
%
% Equivalent of AM PM
%
@@ -202,7 +201,7 @@ mon "<U1303><U1295><U12E9><U12C8><U122A>";/
% also <U12A1><U1218><U122D><U1232>
%
am_pm "<U121B><U1208><U12F6>";/
- "<U1243><U121B>"
+ "<U1243><U121B>"
%
% Appropriate date representation (%x)
% "%d/%m/%Y"
diff --git a/libc/nptl/ChangeLog b/libc/nptl/ChangeLog
index 31177bf17..e8ff69ab0 100644
--- a/libc/nptl/ChangeLog
+++ b/libc/nptl/ChangeLog
@@ -1,3 +1,18 @@
+2011-12-22 Ulrich Drepper <drepper@gmail.com>
+
+ * sysdeps/pthread/gai_misc.h (__gai_create_helper_thread): Use
+ __pthread_get_minstack.
+ * sysdeps/unix/sysv/linux/mq_notify.c (init_mq_netlink): Likewise.
+
+ [BZ #13088]
+ * sysdeps/unix/sysv/linux/timer_routines.c: Get minimum stack size
+ through __pthread_get_minstack.
+ * nptl-init.c (__pthread_initialize_minimal_internal): Get page size
+ directly from _rtld_global_ro.
+ (__pthread_get_minstack): New function.
+ * pthreadP.h: Declare __pthread_get_minstack.
+ * Versions (libpthread) [GLIBC_PRIVATE]: Add __pthread_get_minstack.
+
2011-12-21 Ulrich Drepper <drepper@gmail.com>
[BZ #13515]
diff --git a/libc/nptl/Versions b/libc/nptl/Versions
index 5a884202f..6a1037550 100644
--- a/libc/nptl/Versions
+++ b/libc/nptl/Versions
@@ -255,6 +255,6 @@ libpthread {
GLIBC_PRIVATE {
__pthread_initialize_minimal;
__pthread_clock_gettime; __pthread_clock_settime;
- __pthread_unwind;
+ __pthread_unwind; __pthread_get_minstack;
}
}
diff --git a/libc/nptl/nptl-init.c b/libc/nptl/nptl-init.c
index db45cab23..434922446 100644
--- a/libc/nptl/nptl-init.c
+++ b/libc/nptl/nptl-init.c
@@ -427,7 +427,7 @@ __pthread_initialize_minimal_internal (void)
/* Make sure it meets the minimum size that allocate_stack
(allocatestack.c) will demand, which depends on the page size. */
- const uintptr_t pagesz = __sysconf (_SC_PAGESIZE);
+ const uintptr_t pagesz = GLRO(dl_pagesize);
const size_t minstack = pagesz + __static_tls_size + MINIMAL_REST_STACK;
if (limit.rlim_cur < minstack)
limit.rlim_cur = minstack;
@@ -469,3 +469,13 @@ __pthread_initialize_minimal_internal (void)
}
strong_alias (__pthread_initialize_minimal_internal,
__pthread_initialize_minimal)
+
+
+size_t
+__pthread_get_minstack (const pthread_attr_t *attr)
+{
+ struct pthread_attr *iattr = (struct pthread_attr *) attr;
+
+ return (GLRO(dl_pagesize) + __static_tls_size + PTHREAD_STACK_MIN
+ + iattr->guardsize);
+}
diff --git a/libc/nptl/pthreadP.h b/libc/nptl/pthreadP.h
index df4f4d769..845434e50 100644
--- a/libc/nptl/pthreadP.h
+++ b/libc/nptl/pthreadP.h
@@ -397,6 +397,7 @@ weak_function;
extern void __pthread_init_static_tls (struct link_map *) attribute_hidden;
+extern size_t __pthread_get_minstack (const pthread_attr_t *attr);
/* Namespace save aliases. */
extern int __pthread_getschedparam (pthread_t thread_id, int *policy,
diff --git a/libc/nptl/sysdeps/pthread/gai_misc.h b/libc/nptl/sysdeps/pthread/gai_misc.h
index 9094c1e37..cbbe47657 100644
--- a/libc/nptl/sysdeps/pthread/gai_misc.h
+++ b/libc/nptl/sysdeps/pthread/gai_misc.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2006, 2007, 2008 Free Software Foundation, Inc.
+/* Copyright (C) 2006, 2007, 2008, 2011 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -97,7 +97,9 @@ __gai_create_helper_thread (pthread_t *threadp, void *(*tf) (void *),
pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED);
/* The helper thread needs only very little resources. */
- (void) pthread_attr_setstacksize (&attr, 4 * PTHREAD_STACK_MIN);
+ (void) pthread_attr_setstacksize (&attr,
+ __pthread_get_minstack (&attr)
+ + 4 * PTHREAD_STACK_MIN);
/* Block all signals in the helper thread. To do this thoroughly we
temporarily have to block all signals here. */
diff --git a/libc/nptl/sysdeps/unix/sysv/linux/mq_notify.c b/libc/nptl/sysdeps/unix/sysv/linux/mq_notify.c
index 49ddeae05..11ffc328e 100644
--- a/libc/nptl/sysdeps/unix/sysv/linux/mq_notify.c
+++ b/libc/nptl/sysdeps/unix/sysv/linux/mq_notify.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2004, 2005, 2008 Free Software Foundation, Inc.
+/* Copyright (C) 2004, 2005, 2008, 2011 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contribute by Ulrich Drepper <drepper@redhat.com>, 2004.
@@ -201,7 +201,7 @@ init_mq_netlink (void)
(void) pthread_attr_init (&attr);
(void) pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED);
/* We do not need much stack space, the bare minimum will be enough. */
- (void) pthread_attr_setstacksize (&attr, PTHREAD_STACK_MIN);
+ (void) pthread_attr_setstacksize (&attr, __pthread_get_minstack (&attr));
/* Temporarily block all signals so that the newly created
thread inherits the mask. */
diff --git a/libc/nptl/sysdeps/unix/sysv/linux/timer_routines.c b/libc/nptl/sysdeps/unix/sysv/linux/timer_routines.c
index b159316fb..44da8563d 100644
--- a/libc/nptl/sysdeps/unix/sysv/linux/timer_routines.c
+++ b/libc/nptl/sysdeps/unix/sysv/linux/timer_routines.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
+/* Copyright (C) 2003-2007, 2011 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@redhat.com>, 2003.
@@ -165,7 +165,7 @@ __start_helper_thread (void)
and should go away automatically when canceled. */
pthread_attr_t attr;
(void) pthread_attr_init (&attr);
- (void) pthread_attr_setstacksize (&attr, PTHREAD_STACK_MIN);
+ (void) pthread_attr_setstacksize (&attr, __pthread_get_minstack (&attr));
/* Block all signals in the helper thread but SIGSETXID. To do this
thoroughly we temporarily have to block all signals here. The
diff --git a/libc/sysdeps/i386/bits/byteswap.h b/libc/sysdeps/i386/bits/byteswap.h
index c246ae86c..ddfb785c6 100644
--- a/libc/sysdeps/i386/bits/byteswap.h
+++ b/libc/sysdeps/i386/bits/byteswap.h
@@ -1,5 +1,5 @@
/* Macros to swap the order of bytes in integer values.
- Copyright (C) 1997, 1998, 2000, 2002, 2003, 2006, 2007, 2008, 2010
+ Copyright (C) 1997, 1998, 2000, 2002, 2003, 2006, 2007, 2008, 2010, 2011
Free Software Foundation, Inc.
This file is part of the GNU C Library.
@@ -109,15 +109,15 @@ __bswap_32 (unsigned int __bsx)
#if defined __GNUC__ && __GNUC__ >= 2
/* Swap bytes in 64 bit value. */
-#define __bswap_constant_64(x) \
- ((((x) & 0xff00000000000000ull) >> 56) \
- | (((x) & 0x00ff000000000000ull) >> 40) \
- | (((x) & 0x0000ff0000000000ull) >> 24) \
- | (((x) & 0x000000ff00000000ull) >> 8) \
- | (((x) & 0x00000000ff000000ull) << 8) \
- | (((x) & 0x0000000000ff0000ull) << 24) \
- | (((x) & 0x000000000000ff00ull) << 40) \
- | (((x) & 0x00000000000000ffull) << 56))
+# define __bswap_constant_64(x) \
+ (__extension__ ((((x) & 0xff00000000000000ull) >> 56) \
+ | (((x) & 0x00ff000000000000ull) >> 40) \
+ | (((x) & 0x0000ff0000000000ull) >> 24) \
+ | (((x) & 0x000000ff00000000ull) >> 8) \
+ | (((x) & 0x00000000ff000000ull) << 8) \
+ | (((x) & 0x0000000000ff0000ull) << 24) \
+ | (((x) & 0x000000000000ff00ull) << 40) \
+ | (((x) & 0x00000000000000ffull) << 56)))
# define __bswap_64(x) \
(__extension__ \
diff --git a/libc/sysdeps/i386/fpu/bits/fenv.h b/libc/sysdeps/i386/fpu/bits/fenv.h
index ef3fcb384..8c00771cc 100644
--- a/libc/sysdeps/i386/fpu/bits/fenv.h
+++ b/libc/sysdeps/i386/fpu/bits/fenv.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 1997, 1998, 1999, 2000 Free Software Foundation, Inc.
+/* Copyright (C) 1997, 1998, 1999, 2000, 2011 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -88,3 +88,51 @@ fenv_t;
/* Floating-point environment where none of the exception is masked. */
# define FE_NOMASK_ENV ((__const fenv_t *) -2)
#endif
+
+
+#ifdef __USE_EXTERN_INLINES
+__BEGIN_DECLS
+
+/* Optimized versions. */
+extern int __REDIRECT_NTH (__feraiseexcept_renamed, (int), feraiseexcept);
+__extern_inline int
+__NTH (feraiseexcept (int __excepts))
+{
+ if (__builtin_constant_p (__excepts)
+ && (__excepts & ~(FE_INVALID | FE_DIVBYZERO)) == 0)
+ {
+ if ((FE_INVALID & __excepts) != 0)
+ {
+ /* One example of a invalid operation is 0.0 / 0.0. */
+ float __f = 0.0;
+
+# ifdef __SSE_MATH__
+ __asm__ __volatile__ ("divss %0, %0 " : : "x" (__f));
+# else
+ __asm__ __volatile__ ("fdiv %%st, %%st(0); fwait"
+ : "=t" (__f) : "0" (__f));
+# endif
+ (void) &__f;
+ }
+ if ((FE_DIVBYZERO & __excepts) != 0)
+ {
+ float __f = 1.0;
+ float __g = 0.0;
+
+# ifdef __SSE_MATH__
+ __asm__ __volatile__ ("divss %1, %0" : : "x" (__f), "x" (__g));
+# else
+ __asm__ __volatile__ ("fdivp %%st(1), %%st; fwait"
+ : "=t" (__f) : "0" (__f), "u" (__g) : "st(1)");
+# endif
+ (void) &__f;
+ }
+
+ return 0;
+ }
+
+ return __feraiseexcept_renamed (__excepts);
+}
+
+__END_DECLS
+#endif
diff --git a/libc/sysdeps/i386/fpu/fgetexcptflg.c b/libc/sysdeps/i386/fpu/fgetexcptflg.c
index 5f60511b6..1a0e6df63 100644
--- a/libc/sysdeps/i386/fpu/fgetexcptflg.c
+++ b/libc/sysdeps/i386/fpu/fgetexcptflg.c
@@ -1,5 +1,5 @@
/* Store current representation for exceptions.
- Copyright (C) 1997,99,2000,01 Free Software Foundation, Inc.
+ Copyright (C) 1997,99,2000,01,11 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
@@ -19,7 +19,10 @@
02111-1307 USA. */
#include <fenv.h>
-#include <bp-sym.h>
+#include <unistd.h>
+#include <ldsodefs.h>
+#include <dl-procinfo.h>
+
int
__fegetexceptflag (fexcept_t *flagp, int excepts)
@@ -31,6 +34,17 @@ __fegetexceptflag (fexcept_t *flagp, int excepts)
*flagp = temp & excepts & FE_ALL_EXCEPT;
+ /* If the CPU supports SSE, we clear the MXCSR as well. */
+ if ((GLRO(dl_hwcap) & HWCAP_I386_XMM) != 0)
+ {
+ unsigned int sse_exc;
+
+ /* Get the current MXCSR. */
+ __asm__ ("stmxcsr %0" : "=m" (*&sse_exc));
+
+ *flagp |= sse_exc & excepts & FE_ALL_EXCEPT;
+ }
+
/* Success. */
return 0;
}
@@ -38,7 +52,7 @@ __fegetexceptflag (fexcept_t *flagp, int excepts)
#include <shlib-compat.h>
#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2)
strong_alias (__fegetexceptflag, __old_fegetexceptflag)
-compat_symbol (libm, BP_SYM (__old_fegetexceptflag), BP_SYM (fegetexceptflag), GLIBC_2_1);
+compat_symbol (libm, __old_fegetexceptflag, fegetexceptflag, GLIBC_2_1);
#endif
-versioned_symbol (libm, BP_SYM (__fegetexceptflag), BP_SYM (fegetexceptflag), GLIBC_2_2);
+versioned_symbol (libm, __fegetexceptflag, fegetexceptflag, GLIBC_2_2);
diff --git a/libc/sysdeps/i386/i686/multiarch/strcpy-ssse3.S b/libc/sysdeps/i386/i686/multiarch/strcpy-ssse3.S
index 073856ff8..470ddbe27 100644
--- a/libc/sysdeps/i386/i686/multiarch/strcpy-ssse3.S
+++ b/libc/sysdeps/i386/i686/multiarch/strcpy-ssse3.S
@@ -20,6 +20,7 @@
#ifndef NOT_IN_libc
+
# ifndef USE_AS_STRCAT
# include <sysdep.h>
@@ -31,8 +32,8 @@
cfi_adjust_cfa_offset (-4); \
cfi_restore (REG)
-# define PUSH(REG) pushl REG; CFI_PUSH (REG)
-# define POP(REG) popl REG; CFI_POP (REG)
+# define PUSH(REG) pushl REG; CFI_PUSH (REG)
+# define POP(REG) popl REG; CFI_POP (REG)
# ifndef STRCPY
# define STRCPY __strcpy_ssse3
@@ -40,14 +41,22 @@
# ifdef USE_AS_STRNCPY
# define PARMS 8
-# define ENTRANCE PUSH(%ebx)
-# define RETURN POP(%ebx); ret; CFI_PUSH(%ebx);
-# define RETURN1 POP(%edi); POP(%ebx); ret; CFI_PUSH(%ebx); CFI_PUSH(%edi)
+# define ENTRANCE PUSH (%ebx)
+# define RETURN POP (%ebx); ret; CFI_PUSH (%ebx);
+# define RETURN1 POP (%edi); POP (%ebx); ret; CFI_PUSH (%ebx); CFI_PUSH (%edi)
# else
# define PARMS 4
# define ENTRANCE
# define RETURN ret
-# define RETURN1 POP(%edi); ret; CFI_PUSH(%edi)
+# define RETURN1 POP (%edi); ret; CFI_PUSH (%edi)
+# endif
+
+# ifdef USE_AS_STPCPY
+# define SAVE_RESULT(n) lea n(%edx), %eax
+# define SAVE_RESULT_TAIL(n) lea n(%edx), %eax
+# else
+# define SAVE_RESULT(n) movl %edi, %eax
+# define SAVE_RESULT_TAIL(n) movl %edx, %eax
# endif
# define STR1 PARMS
@@ -60,9 +69,7 @@
movl - 4 byte
movlpd - 8 byte
movaps - 16 byte - requires 16 byte alignment
- of sourse and destination adresses.
- 16 byte alignment: adress is 32bit value,
- right four bit of adress shall be 0.
+ of sourse and destination adresses.
*/
.text
@@ -72,8 +79,6 @@ ENTRY (STRCPY)
mov STR2(%esp), %ecx
# ifdef USE_AS_STRNCPY
movl LEN(%esp), %ebx
- test %ebx, %ebx
- jz L(ExitTail0)
cmp $8, %ebx
jbe L(StrncpyExit8Bytes)
# endif
@@ -127,39 +132,23 @@ ENTRY (STRCPY)
sub $16, %ebx
and $0xf, %esi
-/* add 16 bytes ecx_shift to ebx */
+/* add 16 bytes ecx_offset to ebx */
add %esi, %ebx
# endif
lea 16(%ecx), %esi
-/* Now:
- esi = alignment_16(ecx) + ecx_shift + 16;
- ecx_shift = ecx - alignment_16(ecx)
-*/
and $-16, %esi
-/* Now:
- esi = alignment_16(ecx) + 16
-*/
pxor %xmm0, %xmm0
movlpd (%ecx), %xmm1
movlpd %xmm1, (%edx)
-/*
- look if there is zero symbol in next 16 bytes of string
- from esi to esi + 15 and form mask in xmm0
-*/
+
pcmpeqb (%esi), %xmm0
movlpd 8(%ecx), %xmm1
movlpd %xmm1, 8(%edx)
-/* convert byte mask in xmm0 to bit mask */
-
pmovmskb %xmm0, %eax
sub %ecx, %esi
-/* esi = 16 - ecx_shift */
-
-/* eax = 0: there isn't end of string from position esi to esi+15 */
-
# ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(CopyFrom1To16BytesCase2OrCase3)
@@ -169,17 +158,9 @@ ENTRY (STRCPY)
mov %edx, %eax
lea 16(%edx), %edx
-/* Now:
- edx = edx + 16 = alignment_16(edx) + edx_shift + 16
-*/
and $-16, %edx
-
-/* Now: edx = alignment_16(edx) + 16 */
-
sub %edx, %eax
-/* Now: eax = edx_shift - 16 */
-
# ifdef USE_AS_STRNCPY
add %eax, %esi
lea -1(%esi), %esi
@@ -191,22 +172,11 @@ ENTRY (STRCPY)
L(ContinueCopy):
# endif
sub %eax, %ecx
-/* Now:
- case ecx_shift >= edx_shift:
- ecx = alignment_16(ecx) + (ecx_shift - edx_shift) + 16
- case ecx_shift < edx_shift:
- ecx = alignment_16(ecx) + (16 + ecx_shift - edx_shift)
-*/
mov %ecx, %eax
and $0xf, %eax
-/* Now:
- case ecx_shift >= edx_shift: eax = ecx_shift - edx_shift
- case ecx_shift < edx_shift: eax = (16 + ecx_shift - edx_shift)
- eax can be 0, 1, ..., 15
-*/
mov $0, %esi
-/* case: ecx_shift == edx_shift */
+/* case: ecx_offset == edx_offset */
jz L(Align16Both)
@@ -323,7 +293,7 @@ L(Align16Both):
sub %ecx, %eax
sub %eax, %edx
# ifdef USE_AS_STRNCPY
- lea 48+64(%ebx, %eax), %ebx
+ lea 112(%ebx, %eax), %ebx
# endif
mov $-0x40, %esi
@@ -441,7 +411,6 @@ L(Shl1Start):
jnz L(Shl1LoopExit)
palignr $1, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 31(%ecx), %xmm2
@@ -449,7 +418,6 @@ L(Shl1Start):
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
- movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit1Case2OrCase3)
@@ -457,8 +425,7 @@ L(Shl1Start):
test %eax, %eax
jnz L(Shl1LoopExit)
- palignr $1, %xmm1, %xmm2
- movaps %xmm3, %xmm1
+ palignr $1, %xmm3, %xmm2
movaps %xmm2, (%edx)
lea 31(%ecx), %ecx
lea 16(%edx), %edx
@@ -506,11 +473,11 @@ L(Shl1LoopStart):
jmp L(Shl1LoopStart)
L(Shl1LoopExit):
- movaps (%edx), %xmm6
- psrldq $15, %xmm6
+ movlpd (%ecx), %xmm0
+ movlpd %xmm0, (%edx)
+ movlpd 7(%ecx), %xmm0
+ movlpd %xmm0, 7(%edx)
mov $15, %esi
- palignr $1, %xmm1, %xmm6
- movaps %xmm6, (%edx)
jmp L(CopyFrom1To16Bytes)
.p2align 4
@@ -563,7 +530,6 @@ L(Shl2Start):
jnz L(Shl2LoopExit)
palignr $2, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 30(%ecx), %xmm2
@@ -571,7 +537,6 @@ L(Shl2Start):
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
- movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit2Case2OrCase3)
@@ -579,8 +544,7 @@ L(Shl2Start):
test %eax, %eax
jnz L(Shl2LoopExit)
- palignr $2, %xmm1, %xmm2
- movaps %xmm3, %xmm1
+ palignr $2, %xmm3, %xmm2
movaps %xmm2, (%edx)
lea 30(%ecx), %ecx
lea 16(%edx), %edx
@@ -628,11 +592,11 @@ L(Shl2LoopStart):
jmp L(Shl2LoopStart)
L(Shl2LoopExit):
- movaps (%edx), %xmm6
- psrldq $14, %xmm6
+ movlpd (%ecx), %xmm0
+ movlpd 6(%ecx), %xmm1
+ movlpd %xmm0, (%edx)
+ movlpd %xmm1, 6(%edx)
mov $14, %esi
- palignr $2, %xmm1, %xmm6
- movaps %xmm6, (%edx)
jmp L(CopyFrom1To16Bytes)
.p2align 4
@@ -685,7 +649,6 @@ L(Shl3Start):
jnz L(Shl3LoopExit)
palignr $3, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 29(%ecx), %xmm2
@@ -693,7 +656,6 @@ L(Shl3Start):
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
- movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit3Case2OrCase3)
@@ -701,8 +663,7 @@ L(Shl3Start):
test %eax, %eax
jnz L(Shl3LoopExit)
- palignr $3, %xmm1, %xmm2
- movaps %xmm3, %xmm1
+ palignr $3, %xmm3, %xmm2
movaps %xmm2, (%edx)
lea 29(%ecx), %ecx
lea 16(%edx), %edx
@@ -750,11 +711,11 @@ L(Shl3LoopStart):
jmp L(Shl3LoopStart)
L(Shl3LoopExit):
- movaps (%edx), %xmm6
- psrldq $13, %xmm6
+ movlpd (%ecx), %xmm0
+ movlpd 5(%ecx), %xmm1
+ movlpd %xmm0, (%edx)
+ movlpd %xmm1, 5(%edx)
mov $13, %esi
- palignr $3, %xmm1, %xmm6
- movaps %xmm6, (%edx)
jmp L(CopyFrom1To16Bytes)
.p2align 4
@@ -807,7 +768,6 @@ L(Shl4Start):
jnz L(Shl4LoopExit)
palignr $4, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 28(%ecx), %xmm2
@@ -815,7 +775,6 @@ L(Shl4Start):
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
- movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit4Case2OrCase3)
@@ -823,8 +782,7 @@ L(Shl4Start):
test %eax, %eax
jnz L(Shl4LoopExit)
- palignr $4, %xmm1, %xmm2
- movaps %xmm3, %xmm1
+ palignr $4, %xmm3, %xmm2
movaps %xmm2, (%edx)
lea 28(%ecx), %ecx
lea 16(%edx), %edx
@@ -872,11 +830,11 @@ L(Shl4LoopStart):
jmp L(Shl4LoopStart)
L(Shl4LoopExit):
- movaps (%edx), %xmm6
- psrldq $12, %xmm6
+ movlpd (%ecx), %xmm0
+ movl 8(%ecx), %esi
+ movlpd %xmm0, (%edx)
+ movl %esi, 8(%edx)
mov $12, %esi
- palignr $4, %xmm1, %xmm6
- movaps %xmm6, (%edx)
jmp L(CopyFrom1To16Bytes)
.p2align 4
@@ -929,7 +887,6 @@ L(Shl5Start):
jnz L(Shl5LoopExit)
palignr $5, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 27(%ecx), %xmm2
@@ -937,7 +894,6 @@ L(Shl5Start):
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
- movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit5Case2OrCase3)
@@ -945,8 +901,7 @@ L(Shl5Start):
test %eax, %eax
jnz L(Shl5LoopExit)
- palignr $5, %xmm1, %xmm2
- movaps %xmm3, %xmm1
+ palignr $5, %xmm3, %xmm2
movaps %xmm2, (%edx)
lea 27(%ecx), %ecx
lea 16(%edx), %edx
@@ -994,11 +949,11 @@ L(Shl5LoopStart):
jmp L(Shl5LoopStart)
L(Shl5LoopExit):
- movaps (%edx), %xmm6
- psrldq $11, %xmm6
+ movlpd (%ecx), %xmm0
+ movl 7(%ecx), %esi
+ movlpd %xmm0, (%edx)
+ movl %esi, 7(%edx)
mov $11, %esi
- palignr $5, %xmm1, %xmm6
- movaps %xmm6, (%edx)
jmp L(CopyFrom1To16Bytes)
.p2align 4
@@ -1051,7 +1006,6 @@ L(Shl6Start):
jnz L(Shl6LoopExit)
palignr $6, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 26(%ecx), %xmm2
@@ -1059,7 +1013,6 @@ L(Shl6Start):
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
- movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit6Case2OrCase3)
@@ -1067,8 +1020,7 @@ L(Shl6Start):
test %eax, %eax
jnz L(Shl6LoopExit)
- palignr $6, %xmm1, %xmm2
- movaps %xmm3, %xmm1
+ palignr $6, %xmm3, %xmm2
movaps %xmm2, (%edx)
lea 26(%ecx), %ecx
lea 16(%edx), %edx
@@ -1116,11 +1068,11 @@ L(Shl6LoopStart):
jmp L(Shl6LoopStart)
L(Shl6LoopExit):
- movaps (%edx), %xmm6
- psrldq $10, %xmm6
+ movlpd (%ecx), %xmm0
+ movl 6(%ecx), %esi
+ movlpd %xmm0, (%edx)
+ movl %esi, 6(%edx)
mov $10, %esi
- palignr $6, %xmm1, %xmm6
- movaps %xmm6, (%edx)
jmp L(CopyFrom1To16Bytes)
.p2align 4
@@ -1173,7 +1125,6 @@ L(Shl7Start):
jnz L(Shl7LoopExit)
palignr $7, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 25(%ecx), %xmm2
@@ -1181,7 +1132,6 @@ L(Shl7Start):
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
- movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit7Case2OrCase3)
@@ -1189,8 +1139,7 @@ L(Shl7Start):
test %eax, %eax
jnz L(Shl7LoopExit)
- palignr $7, %xmm1, %xmm2
- movaps %xmm3, %xmm1
+ palignr $7, %xmm3, %xmm2
movaps %xmm2, (%edx)
lea 25(%ecx), %ecx
lea 16(%edx), %edx
@@ -1238,11 +1187,11 @@ L(Shl7LoopStart):
jmp L(Shl7LoopStart)
L(Shl7LoopExit):
- movaps (%edx), %xmm6
- psrldq $9, %xmm6
+ movlpd (%ecx), %xmm0
+ movl 5(%ecx), %esi
+ movlpd %xmm0, (%edx)
+ movl %esi, 5(%edx)
mov $9, %esi
- palignr $7, %xmm1, %xmm6
- movaps %xmm6, (%edx)
jmp L(CopyFrom1To16Bytes)
.p2align 4
@@ -1295,7 +1244,6 @@ L(Shl8Start):
jnz L(Shl8LoopExit)
palignr $8, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 24(%ecx), %xmm2
@@ -1303,7 +1251,6 @@ L(Shl8Start):
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
- movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit8Case2OrCase3)
@@ -1311,8 +1258,7 @@ L(Shl8Start):
test %eax, %eax
jnz L(Shl8LoopExit)
- palignr $8, %xmm1, %xmm2
- movaps %xmm3, %xmm1
+ palignr $8, %xmm3, %xmm2
movaps %xmm2, (%edx)
lea 24(%ecx), %ecx
lea 16(%edx), %edx
@@ -1360,11 +1306,9 @@ L(Shl8LoopStart):
jmp L(Shl8LoopStart)
L(Shl8LoopExit):
- movaps (%edx), %xmm6
- psrldq $8, %xmm6
+ movlpd (%ecx), %xmm0
+ movlpd %xmm0, (%edx)
mov $8, %esi
- palignr $8, %xmm1, %xmm6
- movaps %xmm6, (%edx)
jmp L(CopyFrom1To16Bytes)
.p2align 4
@@ -1417,7 +1361,6 @@ L(Shl9Start):
jnz L(Shl9LoopExit)
palignr $9, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 23(%ecx), %xmm2
@@ -1425,7 +1368,6 @@ L(Shl9Start):
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
- movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit9Case2OrCase3)
@@ -1433,8 +1375,7 @@ L(Shl9Start):
test %eax, %eax
jnz L(Shl9LoopExit)
- palignr $9, %xmm1, %xmm2
- movaps %xmm3, %xmm1
+ palignr $9, %xmm3, %xmm2
movaps %xmm2, (%edx)
lea 23(%ecx), %ecx
lea 16(%edx), %edx
@@ -1482,11 +1423,9 @@ L(Shl9LoopStart):
jmp L(Shl9LoopStart)
L(Shl9LoopExit):
- movaps (%edx), %xmm6
- psrldq $7, %xmm6
+ movlpd -1(%ecx), %xmm0
+ movlpd %xmm0, -1(%edx)
mov $7, %esi
- palignr $9, %xmm1, %xmm6
- movaps %xmm6, (%edx)
jmp L(CopyFrom1To16Bytes)
.p2align 4
@@ -1539,7 +1478,6 @@ L(Shl10Start):
jnz L(Shl10LoopExit)
palignr $10, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 22(%ecx), %xmm2
@@ -1547,7 +1485,6 @@ L(Shl10Start):
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
- movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit10Case2OrCase3)
@@ -1555,8 +1492,7 @@ L(Shl10Start):
test %eax, %eax
jnz L(Shl10LoopExit)
- palignr $10, %xmm1, %xmm2
- movaps %xmm3, %xmm1
+ palignr $10, %xmm3, %xmm2
movaps %xmm2, (%edx)
lea 22(%ecx), %ecx
lea 16(%edx), %edx
@@ -1604,11 +1540,9 @@ L(Shl10LoopStart):
jmp L(Shl10LoopStart)
L(Shl10LoopExit):
- movaps (%edx), %xmm6
- psrldq $6, %xmm6
+ movlpd -2(%ecx), %xmm0
+ movlpd %xmm0, -2(%edx)
mov $6, %esi
- palignr $10, %xmm1, %xmm6
- movaps %xmm6, (%edx)
jmp L(CopyFrom1To16Bytes)
.p2align 4
@@ -1661,7 +1595,6 @@ L(Shl11Start):
jnz L(Shl11LoopExit)
palignr $11, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 21(%ecx), %xmm2
@@ -1669,7 +1602,6 @@ L(Shl11Start):
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
- movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit11Case2OrCase3)
@@ -1677,8 +1609,7 @@ L(Shl11Start):
test %eax, %eax
jnz L(Shl11LoopExit)
- palignr $11, %xmm1, %xmm2
- movaps %xmm3, %xmm1
+ palignr $11, %xmm3, %xmm2
movaps %xmm2, (%edx)
lea 21(%ecx), %ecx
lea 16(%edx), %edx
@@ -1726,11 +1657,9 @@ L(Shl11LoopStart):
jmp L(Shl11LoopStart)
L(Shl11LoopExit):
- movaps (%edx), %xmm6
- psrldq $5, %xmm6
+ movlpd -3(%ecx), %xmm0
+ movlpd %xmm0, -3(%edx)
mov $5, %esi
- palignr $11, %xmm1, %xmm6
- movaps %xmm6, (%edx)
jmp L(CopyFrom1To16Bytes)
.p2align 4
@@ -1783,7 +1712,6 @@ L(Shl12Start):
jnz L(Shl12LoopExit)
palignr $12, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 20(%ecx), %xmm2
@@ -1791,7 +1719,6 @@ L(Shl12Start):
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
- movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit12Case2OrCase3)
@@ -1799,8 +1726,7 @@ L(Shl12Start):
test %eax, %eax
jnz L(Shl12LoopExit)
- palignr $12, %xmm1, %xmm2
- movaps %xmm3, %xmm1
+ palignr $12, %xmm3, %xmm2
movaps %xmm2, (%edx)
lea 20(%ecx), %ecx
lea 16(%edx), %edx
@@ -1848,11 +1774,9 @@ L(Shl12LoopStart):
jmp L(Shl12LoopStart)
L(Shl12LoopExit):
- movaps (%edx), %xmm6
- psrldq $4, %xmm6
+ movl (%ecx), %esi
+ movl %esi, (%edx)
mov $4, %esi
- palignr $12, %xmm1, %xmm6
- movaps %xmm6, (%edx)
jmp L(CopyFrom1To16Bytes)
.p2align 4
@@ -1905,7 +1829,6 @@ L(Shl13Start):
jnz L(Shl13LoopExit)
palignr $13, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 19(%ecx), %xmm2
@@ -1913,7 +1836,6 @@ L(Shl13Start):
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
- movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit13Case2OrCase3)
@@ -1921,8 +1843,7 @@ L(Shl13Start):
test %eax, %eax
jnz L(Shl13LoopExit)
- palignr $13, %xmm1, %xmm2
- movaps %xmm3, %xmm1
+ palignr $13, %xmm3, %xmm2
movaps %xmm2, (%edx)
lea 19(%ecx), %ecx
lea 16(%edx), %edx
@@ -1970,11 +1891,9 @@ L(Shl13LoopStart):
jmp L(Shl13LoopStart)
L(Shl13LoopExit):
- movaps (%edx), %xmm6
- psrldq $3, %xmm6
+ movl -1(%ecx), %esi
+ movl %esi, -1(%edx)
mov $3, %esi
- palignr $13, %xmm1, %xmm6
- movaps %xmm6, (%edx)
jmp L(CopyFrom1To16Bytes)
.p2align 4
@@ -2027,7 +1946,6 @@ L(Shl14Start):
jnz L(Shl14LoopExit)
palignr $14, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 18(%ecx), %xmm2
@@ -2035,7 +1953,6 @@ L(Shl14Start):
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
- movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit14Case2OrCase3)
@@ -2043,8 +1960,7 @@ L(Shl14Start):
test %eax, %eax
jnz L(Shl14LoopExit)
- palignr $14, %xmm1, %xmm2
- movaps %xmm3, %xmm1
+ palignr $14, %xmm3, %xmm2
movaps %xmm2, (%edx)
lea 18(%ecx), %ecx
lea 16(%edx), %edx
@@ -2092,11 +2008,9 @@ L(Shl14LoopStart):
jmp L(Shl14LoopStart)
L(Shl14LoopExit):
- movaps (%edx), %xmm6
- psrldq $2, %xmm6
+ movl -2(%ecx), %esi
+ movl %esi, -2(%edx)
mov $2, %esi
- palignr $14, %xmm1, %xmm6
- movaps %xmm6, (%edx)
jmp L(CopyFrom1To16Bytes)
.p2align 4
@@ -2149,7 +2063,6 @@ L(Shl15Start):
jnz L(Shl15LoopExit)
palignr $15, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 17(%ecx), %xmm2
@@ -2157,7 +2070,6 @@ L(Shl15Start):
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
- movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit15Case2OrCase3)
@@ -2165,8 +2077,7 @@ L(Shl15Start):
test %eax, %eax
jnz L(Shl15LoopExit)
- palignr $15, %xmm1, %xmm2
- movaps %xmm3, %xmm1
+ palignr $15, %xmm3, %xmm2
movaps %xmm2, (%edx)
lea 17(%ecx), %ecx
lea 16(%edx), %edx
@@ -2214,15 +2125,14 @@ L(Shl15LoopStart):
jmp L(Shl15LoopStart)
L(Shl15LoopExit):
- movaps (%edx), %xmm6
- psrldq $1, %xmm6
+ movl -3(%ecx), %esi
+ movl %esi, -3(%edx)
mov $1, %esi
- palignr $15, %xmm1, %xmm6
- movaps %xmm6, (%edx)
# ifdef USE_AS_STRCAT
jmp L(CopyFrom1To16Bytes)
# endif
+
# ifndef USE_AS_STRCAT
.p2align 4
@@ -2235,15 +2145,38 @@ L(CopyFrom1To16Bytes):
POP (%esi)
test %al, %al
- jz L(ExitHigh)
+ jz L(ExitHigh8)
+
+L(CopyFrom1To16BytesLess8):
+ mov %al, %ah
+ and $15, %ah
+ jz L(ExitHigh4)
+
test $0x01, %al
jnz L(Exit1)
test $0x02, %al
jnz L(Exit2)
test $0x04, %al
jnz L(Exit3)
- test $0x08, %al
- jnz L(Exit4)
+
+ .p2align 4
+L(Exit4):
+ movl (%ecx), %eax
+ movl %eax, (%edx)
+ SAVE_RESULT (3)
+# ifdef USE_AS_STRNCPY
+ sub $4, %ebx
+ lea 4(%edx), %ecx
+ jnz L(StrncpyFillTailWithZero1)
+# ifdef USE_AS_STPCPY
+ cmpb $1, (%eax)
+ sbb $-1, %eax
+# endif
+# endif
+ RETURN1
+
+ .p2align 4
+L(ExitHigh4):
test $0x10, %al
jnz L(Exit5)
test $0x20, %al
@@ -2255,11 +2188,7 @@ L(CopyFrom1To16Bytes):
L(Exit8):
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
-# ifdef USE_AS_STPCPY
- lea 7(%edx), %eax
-# else
- movl %edi, %eax
-# endif
+ SAVE_RESULT (7)
# ifdef USE_AS_STRNCPY
sub $8, %ebx
lea 8(%edx), %ecx
@@ -2272,15 +2201,38 @@ L(Exit8):
RETURN1
.p2align 4
-L(ExitHigh):
+L(ExitHigh8):
+ mov %ah, %al
+ and $15, %al
+ jz L(ExitHigh12)
+
test $0x01, %ah
jnz L(Exit9)
test $0x02, %ah
jnz L(Exit10)
test $0x04, %ah
jnz L(Exit11)
- test $0x08, %ah
- jnz L(Exit12)
+
+ .p2align 4
+L(Exit12):
+ movlpd (%ecx), %xmm0
+ movl 8(%ecx), %eax
+ movlpd %xmm0, (%edx)
+ movl %eax, 8(%edx)
+ SAVE_RESULT (11)
+# ifdef USE_AS_STRNCPY
+ sub $12, %ebx
+ lea 12(%edx), %ecx
+ jnz L(StrncpyFillTailWithZero1)
+# ifdef USE_AS_STPCPY
+ cmpb $1, (%eax)
+ sbb $-1, %eax
+# endif
+# endif
+ RETURN1
+
+ .p2align 4
+L(ExitHigh12):
test $0x10, %ah
jnz L(Exit13)
test $0x20, %ah
@@ -2290,15 +2242,9 @@ L(ExitHigh):
.p2align 4
L(Exit16):
- movlpd (%ecx), %xmm0
- movlpd %xmm0, (%edx)
- movlpd 8(%ecx), %xmm0
- movlpd %xmm0, 8(%edx)
-# ifdef USE_AS_STPCPY
- lea 15(%edx), %eax
-# else
- movl %edi, %eax
-# endif
+ movdqu (%ecx), %xmm0
+ movdqu %xmm0, (%edx)
+ SAVE_RESULT (15)
# ifdef USE_AS_STRNCPY
sub $16, %ebx
lea 16(%edx), %ecx
@@ -2310,7 +2256,7 @@ L(Exit16):
# endif
RETURN1
-# ifdef USE_AS_STRNCPY
+# ifdef USE_AS_STRNCPY
CFI_PUSH(%esi)
@@ -2318,79 +2264,84 @@ L(Exit16):
L(CopyFrom1To16BytesCase2):
add $16, %ebx
add %esi, %ecx
- lea (%esi, %edx), %esi
- lea -9(%ebx), %edx
- and $1<<7, %dh
- or %al, %dh
- test %dh, %dh
- lea (%esi), %edx
+ add %esi, %edx
+
POP (%esi)
+
+ test %al, %al
jz L(ExitHighCase2)
- cmp $1, %ebx
- je L(Exit1)
+ cmp $8, %ebx
+ ja L(CopyFrom1To16BytesLess8)
+
test $0x01, %al
jnz L(Exit1)
- cmp $2, %ebx
- je L(Exit2)
+ cmp $1, %ebx
+ je L(Exit1)
test $0x02, %al
jnz L(Exit2)
- cmp $3, %ebx
- je L(Exit3)
+ cmp $2, %ebx
+ je L(Exit2)
test $0x04, %al
jnz L(Exit3)
- cmp $4, %ebx
- je L(Exit4)
+ cmp $3, %ebx
+ je L(Exit3)
test $0x08, %al
jnz L(Exit4)
- cmp $5, %ebx
- je L(Exit5)
+ cmp $4, %ebx
+ je L(Exit4)
test $0x10, %al
jnz L(Exit5)
- cmp $6, %ebx
- je L(Exit6)
+ cmp $5, %ebx
+ je L(Exit5)
test $0x20, %al
jnz L(Exit6)
- cmp $7, %ebx
- je L(Exit7)
+ cmp $6, %ebx
+ je L(Exit6)
test $0x40, %al
jnz L(Exit7)
+ cmp $7, %ebx
+ je L(Exit7)
jmp L(Exit8)
.p2align 4
L(ExitHighCase2):
- cmp $9, %ebx
- je L(Exit9)
+ cmp $8, %ebx
+ jbe L(CopyFrom1To16BytesLess8Case3)
+
test $0x01, %ah
jnz L(Exit9)
- cmp $10, %ebx
- je L(Exit10)
+ cmp $9, %ebx
+ je L(Exit9)
test $0x02, %ah
jnz L(Exit10)
- cmp $11, %ebx
- je L(Exit11)
+ cmp $10, %ebx
+ je L(Exit10)
test $0x04, %ah
jnz L(Exit11)
- cmp $12, %ebx
- je L(Exit12)
+ cmp $11, %ebx
+ je L(Exit11)
test $0x8, %ah
jnz L(Exit12)
- cmp $13, %ebx
- je L(Exit13)
+ cmp $12, %ebx
+ je L(Exit12)
test $0x10, %ah
jnz L(Exit13)
- cmp $14, %ebx
- je L(Exit14)
+ cmp $13, %ebx
+ je L(Exit13)
test $0x20, %ah
jnz L(Exit14)
- cmp $15, %ebx
- je L(Exit15)
+ cmp $14, %ebx
+ je L(Exit14)
test $0x40, %ah
jnz L(Exit15)
+ cmp $15, %ebx
+ je L(Exit15)
jmp L(Exit16)
CFI_PUSH(%esi)
+ .p2align 4
L(CopyFrom1To16BytesCase2OrCase3):
test %eax, %eax
jnz L(CopyFrom1To16BytesCase2)
@@ -2402,47 +2353,78 @@ L(CopyFrom1To16BytesCase3):
add %esi, %ecx
POP (%esi)
- cmp $16, %ebx
- je L(Exit16)
+
cmp $8, %ebx
- je L(Exit8)
- jg L(More8Case3)
+ ja L(ExitHigh8Case3)
+
+L(CopyFrom1To16BytesLess8Case3):
cmp $4, %ebx
- je L(Exit4)
- jg L(More4Case3)
+ ja L(ExitHigh4Case3)
+
+ cmp $1, %ebx
+ je L(Exit1)
cmp $2, %ebx
- jl L(Exit1)
je L(Exit2)
- jg L(Exit3)
-L(More8Case3): /* but less than 16 */
- cmp $12, %ebx
- je L(Exit12)
- jl L(Less12Case3)
- cmp $14, %ebx
- jl L(Exit13)
- je L(Exit14)
- jg L(Exit15)
-L(More4Case3): /* but less than 8 */
+ cmp $3, %ebx
+ je L(Exit3)
+ movl (%ecx), %eax
+ movl %eax, (%edx)
+ SAVE_RESULT (4)
+ RETURN1
+
+ .p2align 4
+L(ExitHigh4Case3):
+ cmp $5, %ebx
+ je L(Exit5)
cmp $6, %ebx
- jl L(Exit5)
je L(Exit6)
- jg L(Exit7)
-L(Less12Case3): /* but more than 8 */
+ cmp $7, %ebx
+ je L(Exit7)
+ movlpd (%ecx), %xmm0
+ movlpd %xmm0, (%edx)
+ SAVE_RESULT (8)
+ RETURN1
+
+ .p2align 4
+L(ExitHigh8Case3):
+ cmp $12, %ebx
+ ja L(ExitHigh12Case3)
+
+ cmp $9, %ebx
+ je L(Exit9)
cmp $10, %ebx
- jl L(Exit9)
je L(Exit10)
- jg L(Exit11)
+ cmp $11, %ebx
+ je L(Exit11)
+ movlpd (%ecx), %xmm0
+ movl 8(%ecx), %eax
+ movlpd %xmm0, (%edx)
+ movl %eax, 8(%edx)
+ SAVE_RESULT (12)
+ RETURN1
+
+ .p2align 4
+L(ExitHigh12Case3):
+ cmp $13, %ebx
+ je L(Exit13)
+ cmp $14, %ebx
+ je L(Exit14)
+ cmp $15, %ebx
+ je L(Exit15)
+ movlpd (%ecx), %xmm0
+ movlpd 8(%ecx), %xmm1
+ movlpd %xmm0, (%edx)
+ movlpd %xmm1, 8(%edx)
+ SAVE_RESULT (16)
+ RETURN1
+
# endif
.p2align 4
L(Exit1):
movb (%ecx), %al
movb %al, (%edx)
-# ifdef USE_AS_STPCPY
- lea (%edx), %eax
-# else
- movl %edi, %eax
-# endif
+ SAVE_RESULT (0)
# ifdef USE_AS_STRNCPY
sub $1, %ebx
lea 1(%edx), %ecx
@@ -2458,11 +2440,7 @@ L(Exit1):
L(Exit2):
movw (%ecx), %ax
movw %ax, (%edx)
-# ifdef USE_AS_STPCPY
- lea 1(%edx), %eax
-# else
- movl %edi, %eax
-# endif
+ SAVE_RESULT (1)
# ifdef USE_AS_STRNCPY
sub $2, %ebx
lea 2(%edx), %ecx
@@ -2480,11 +2458,7 @@ L(Exit3):
movw %ax, (%edx)
movb 2(%ecx), %al
movb %al, 2(%edx)
-# ifdef USE_AS_STPCPY
- lea 2(%edx), %eax
-# else
- movl %edi, %eax
-# endif
+ SAVE_RESULT (2)
# ifdef USE_AS_STRNCPY
sub $3, %ebx
lea 3(%edx), %ecx
@@ -2497,36 +2471,12 @@ L(Exit3):
RETURN1
.p2align 4
-L(Exit4):
- movl (%ecx), %eax
- movl %eax, (%edx)
-# ifdef USE_AS_STPCPY
- lea 3(%edx), %eax
-# else
- movl %edi, %eax
-# endif
-# ifdef USE_AS_STRNCPY
- sub $4, %ebx
- lea 4(%edx), %ecx
- jnz L(StrncpyFillTailWithZero1)
-# ifdef USE_AS_STPCPY
- cmpb $1, (%eax)
- sbb $-1, %eax
-# endif
-# endif
- RETURN1
-
- .p2align 4
L(Exit5):
movl (%ecx), %eax
movl %eax, (%edx)
movb 4(%ecx), %al
movb %al, 4(%edx)
-# ifdef USE_AS_STPCPY
- lea 4(%edx), %eax
-# else
- movl %edi, %eax
-# endif
+ SAVE_RESULT (4)
# ifdef USE_AS_STRNCPY
sub $5, %ebx
lea 5(%edx), %ecx
@@ -2544,11 +2494,7 @@ L(Exit6):
movl %eax, (%edx)
movw 4(%ecx), %ax
movw %ax, 4(%edx)
-# ifdef USE_AS_STPCPY
- lea 5(%edx), %eax
-# else
- movl %edi, %eax
-# endif
+ SAVE_RESULT (5)
# ifdef USE_AS_STRNCPY
sub $6, %ebx
lea 6(%edx), %ecx
@@ -2566,11 +2512,7 @@ L(Exit7):
movl %eax, (%edx)
movl 3(%ecx), %eax
movl %eax, 3(%edx)
-# ifdef USE_AS_STPCPY
- lea 6(%edx), %eax
-# else
- movl %edi, %eax
-# endif
+ SAVE_RESULT (6)
# ifdef USE_AS_STRNCPY
sub $7, %ebx
lea 7(%edx), %ecx
@@ -2585,14 +2527,10 @@ L(Exit7):
.p2align 4
L(Exit9):
movlpd (%ecx), %xmm0
- movlpd %xmm0, (%edx)
movb 8(%ecx), %al
+ movlpd %xmm0, (%edx)
movb %al, 8(%edx)
-# ifdef USE_AS_STPCPY
- lea 8(%edx), %eax
-# else
- movl %edi, %eax
-# endif
+ SAVE_RESULT (8)
# ifdef USE_AS_STRNCPY
sub $9, %ebx
lea 9(%edx), %ecx
@@ -2607,14 +2545,10 @@ L(Exit9):
.p2align 4
L(Exit10):
movlpd (%ecx), %xmm0
- movlpd %xmm0, (%edx)
movw 8(%ecx), %ax
+ movlpd %xmm0, (%edx)
movw %ax, 8(%edx)
-# ifdef USE_AS_STPCPY
- lea 9(%edx), %eax
-# else
- movl %edi, %eax
-# endif
+ SAVE_RESULT (9)
# ifdef USE_AS_STRNCPY
sub $10, %ebx
lea 10(%edx), %ecx
@@ -2629,14 +2563,10 @@ L(Exit10):
.p2align 4
L(Exit11):
movlpd (%ecx), %xmm0
- movlpd %xmm0, (%edx)
movl 7(%ecx), %eax
+ movlpd %xmm0, (%edx)
movl %eax, 7(%edx)
-# ifdef USE_AS_STPCPY
- lea 10(%edx), %eax
-# else
- movl %edi, %eax
-# endif
+ SAVE_RESULT (10)
# ifdef USE_AS_STRNCPY
sub $11, %ebx
lea 11(%edx), %ecx
@@ -2649,38 +2579,12 @@ L(Exit11):
RETURN1
.p2align 4
-L(Exit12):
- movlpd (%ecx), %xmm0
- movlpd %xmm0, (%edx)
- movl 8(%ecx), %eax
- movl %eax, 8(%edx)
-# ifdef USE_AS_STPCPY
- lea 11(%edx), %eax
-# else
- movl %edi, %eax
-# endif
-# ifdef USE_AS_STRNCPY
- sub $12, %ebx
- lea 12(%edx), %ecx
- jnz L(StrncpyFillTailWithZero1)
-# ifdef USE_AS_STPCPY
- cmpb $1, (%eax)
- sbb $-1, %eax
-# endif
-# endif
- RETURN1
-
- .p2align 4
L(Exit13):
movlpd (%ecx), %xmm0
+ movlpd 5(%ecx), %xmm1
movlpd %xmm0, (%edx)
- movlpd 5(%ecx), %xmm0
- movlpd %xmm0, 5(%edx)
-# ifdef USE_AS_STPCPY
- lea 12(%edx), %eax
-# else
- movl %edi, %eax
-# endif
+ movlpd %xmm1, 5(%edx)
+ SAVE_RESULT (12)
# ifdef USE_AS_STRNCPY
sub $13, %ebx
lea 13(%edx), %ecx
@@ -2695,14 +2599,10 @@ L(Exit13):
.p2align 4
L(Exit14):
movlpd (%ecx), %xmm0
+ movlpd 6(%ecx), %xmm1
movlpd %xmm0, (%edx)
- movlpd 6(%ecx), %xmm0
- movlpd %xmm0, 6(%edx)
-# ifdef USE_AS_STPCPY
- lea 13(%edx), %eax
-# else
- movl %edi, %eax
-# endif
+ movlpd %xmm1, 6(%edx)
+ SAVE_RESULT (13)
# ifdef USE_AS_STRNCPY
sub $14, %ebx
lea 14(%edx), %ecx
@@ -2717,14 +2617,10 @@ L(Exit14):
.p2align 4
L(Exit15):
movlpd (%ecx), %xmm0
+ movlpd 7(%ecx), %xmm1
movlpd %xmm0, (%edx)
- movlpd 7(%ecx), %xmm0
- movlpd %xmm0, 7(%edx)
-# ifdef USE_AS_STPCPY
- lea 14(%edx), %eax
-# else
- movl %edi, %eax
-# endif
+ movlpd %xmm1, 7(%edx)
+ SAVE_RESULT (14)
# ifdef USE_AS_STRNCPY
sub $15, %ebx
lea 15(%edx), %ecx
@@ -2853,7 +2749,7 @@ L(FillFrom1To16Bytes):
jl L(Fill1)
je L(Fill2)
jg L(Fill3)
-L(FillMore8): /* but less than 16 */
+L(FillMore8): /* but less than 16 */
cmp $12, %ebx
je L(Fill12)
jl L(FillLess12)
@@ -2861,18 +2757,18 @@ L(FillMore8): /* but less than 16 */
jl L(Fill13)
je L(Fill14)
jg L(Fill15)
-L(FillMore4): /* but less than 8 */
+L(FillMore4): /* but less than 8 */
cmp $6, %ebx
jl L(Fill5)
je L(Fill6)
jg L(Fill7)
-L(FillLess12): /* but more than 8 */
+L(FillLess12): /* but more than 8 */
cmp $10, %ebx
jl L(Fill9)
je L(Fill10)
jmp L(Fill11)
- CFI_PUSH (%edi)
+ CFI_PUSH(%edi)
.p2align 4
L(StrncpyFillTailWithZero1):
@@ -2929,11 +2825,7 @@ L(StrncpyFillLess32):
L(ExitTail1):
movb (%ecx), %al
movb %al, (%edx)
-# ifdef USE_AS_STPCPY
- lea (%edx), %eax
-# else
- movl %edx, %eax
-# endif
+ SAVE_RESULT_TAIL (0)
# ifdef USE_AS_STRNCPY
sub $1, %ebx
lea 1(%edx), %ecx
@@ -2949,11 +2841,7 @@ L(ExitTail1):
L(ExitTail2):
movw (%ecx), %ax
movw %ax, (%edx)
-# ifdef USE_AS_STPCPY
- lea 1(%edx), %eax
-# else
- movl %edx, %eax
-# endif
+ SAVE_RESULT_TAIL (1)
# ifdef USE_AS_STRNCPY
sub $2, %ebx
lea 2(%edx), %ecx
@@ -2971,11 +2859,7 @@ L(ExitTail3):
movw %ax, (%edx)
movb 2(%ecx), %al
movb %al, 2(%edx)
-# ifdef USE_AS_STPCPY
- lea 2(%edx), %eax
-# else
- movl %edx, %eax
-# endif
+ SAVE_RESULT_TAIL (2)
# ifdef USE_AS_STRNCPY
sub $3, %ebx
lea 3(%edx), %ecx
@@ -2991,11 +2875,7 @@ L(ExitTail3):
L(ExitTail4):
movl (%ecx), %eax
movl %eax, (%edx)
-# ifdef USE_AS_STPCPY
- lea 3(%edx), %eax
-# else
- movl %edx, %eax
-# endif
+ SAVE_RESULT_TAIL (3)
# ifdef USE_AS_STRNCPY
sub $4, %ebx
lea 4(%edx), %ecx
@@ -3013,11 +2893,7 @@ L(ExitTail5):
movl %eax, (%edx)
movb 4(%ecx), %al
movb %al, 4(%edx)
-# ifdef USE_AS_STPCPY
- lea 4(%edx), %eax
-# else
- movl %edx, %eax
-# endif
+ SAVE_RESULT_TAIL (4)
# ifdef USE_AS_STRNCPY
sub $5, %ebx
lea 5(%edx), %ecx
@@ -3035,11 +2911,7 @@ L(ExitTail6):
movl %eax, (%edx)
movw 4(%ecx), %ax
movw %ax, 4(%edx)
-# ifdef USE_AS_STPCPY
- lea 5(%edx), %eax
-# else
- movl %edx, %eax
-# endif
+ SAVE_RESULT_TAIL (5)
# ifdef USE_AS_STRNCPY
sub $6, %ebx
lea 6(%edx), %ecx
@@ -3057,11 +2929,7 @@ L(ExitTail7):
movl %eax, (%edx)
movl 3(%ecx), %eax
movl %eax, 3(%edx)
-# ifdef USE_AS_STPCPY
- lea 6(%edx), %eax
-# else
- movl %edx, %eax
-# endif
+ SAVE_RESULT_TAIL (6)
# ifdef USE_AS_STRNCPY
sub $7, %ebx
lea 7(%edx), %ecx
@@ -3077,33 +2945,21 @@ L(ExitTail7):
L(ExitTail8):
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
-# ifdef USE_AS_STPCPY
- lea 7(%edx), %eax
-# else
- movl %edx, %eax
-# endif
+ SAVE_RESULT_TAIL (7)
# ifdef USE_AS_STRNCPY
sub $8, %ebx
lea 8(%edx), %ecx
jnz L(StrncpyFillTailWithZero)
-# ifdef USE_AS_STPCPY
- cmpb $1, (%eax)
- sbb $-1, %eax
-# endif
# endif
RETURN
.p2align 4
L(ExitTail9):
movlpd (%ecx), %xmm0
- movlpd %xmm0, (%edx)
movb 8(%ecx), %al
+ movlpd %xmm0, (%edx)
movb %al, 8(%edx)
-# ifdef USE_AS_STPCPY
- lea 8(%edx), %eax
-# else
- movl %edx, %eax
-# endif
+ SAVE_RESULT_TAIL (8)
# ifdef USE_AS_STRNCPY
sub $9, %ebx
lea 9(%edx), %ecx
@@ -3118,14 +2974,10 @@ L(ExitTail9):
.p2align 4
L(ExitTail10):
movlpd (%ecx), %xmm0
- movlpd %xmm0, (%edx)
movw 8(%ecx), %ax
+ movlpd %xmm0, (%edx)
movw %ax, 8(%edx)
-# ifdef USE_AS_STPCPY
- lea 9(%edx), %eax
-# else
- movl %edx, %eax
-# endif
+ SAVE_RESULT_TAIL (9)
# ifdef USE_AS_STRNCPY
sub $10, %ebx
lea 10(%edx), %ecx
@@ -3140,14 +2992,10 @@ L(ExitTail10):
.p2align 4
L(ExitTail11):
movlpd (%ecx), %xmm0
- movlpd %xmm0, (%edx)
movl 7(%ecx), %eax
+ movlpd %xmm0, (%edx)
movl %eax, 7(%edx)
-# ifdef USE_AS_STPCPY
- lea 10(%edx), %eax
-# else
- movl %edx, %eax
-# endif
+ SAVE_RESULT_TAIL (10)
# ifdef USE_AS_STRNCPY
sub $11, %ebx
lea 11(%edx), %ecx
@@ -3162,14 +3010,10 @@ L(ExitTail11):
.p2align 4
L(ExitTail12):
movlpd (%ecx), %xmm0
- movlpd %xmm0, (%edx)
movl 8(%ecx), %eax
+ movlpd %xmm0, (%edx)
movl %eax, 8(%edx)
-# ifdef USE_AS_STPCPY
- lea 11(%edx), %eax
-# else
- movl %edx, %eax
-# endif
+ SAVE_RESULT_TAIL (11)
# ifdef USE_AS_STRNCPY
sub $12, %ebx
lea 12(%edx), %ecx
@@ -3184,14 +3028,10 @@ L(ExitTail12):
.p2align 4
L(ExitTail13):
movlpd (%ecx), %xmm0
+ movlpd 5(%ecx), %xmm1
movlpd %xmm0, (%edx)
- movlpd 5(%ecx), %xmm0
- movlpd %xmm0, 5(%edx)
-# ifdef USE_AS_STPCPY
- lea 12(%edx), %eax
-# else
- movl %edx, %eax
-# endif
+ movlpd %xmm1, 5(%edx)
+ SAVE_RESULT_TAIL (12)
# ifdef USE_AS_STRNCPY
sub $13, %ebx
lea 13(%edx), %ecx
@@ -3206,19 +3046,15 @@ L(ExitTail13):
.p2align 4
L(ExitTail14):
movlpd (%ecx), %xmm0
+ movlpd 6(%ecx), %xmm1
movlpd %xmm0, (%edx)
- movlpd 6(%ecx), %xmm0
- movlpd %xmm0, 6(%edx)
-# ifdef USE_AS_STPCPY
- lea 13(%edx), %eax
-# else
- movl %edx, %eax
-# endif
+ movlpd %xmm1, 6(%edx)
+ SAVE_RESULT_TAIL (13)
# ifdef USE_AS_STRNCPY
sub $14, %ebx
lea 14(%edx), %ecx
jnz L(StrncpyFillTailWithZero)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
cmpb $1, (%eax)
sbb $-1, %eax
# endif
@@ -3228,36 +3064,22 @@ L(ExitTail14):
.p2align 4
L(ExitTail15):
movlpd (%ecx), %xmm0
+ movlpd 7(%ecx), %xmm1
movlpd %xmm0, (%edx)
- movlpd 7(%ecx), %xmm0
- movlpd %xmm0, 7(%edx)
-# ifdef USE_AS_STPCPY
- lea 14(%edx), %eax
-# else
- movl %edx, %eax
-# endif
+ movlpd %xmm1, 7(%edx)
+ SAVE_RESULT_TAIL (14)
# ifdef USE_AS_STRNCPY
sub $15, %ebx
lea 15(%edx), %ecx
jnz L(StrncpyFillTailWithZero)
-# ifdef USE_AS_STPCPY
- cmpb $1, (%eax)
- sbb $-1, %eax
-# endif
# endif
RETURN
.p2align 4
L(ExitTail16):
- movlpd (%ecx), %xmm0
- movlpd %xmm0, (%edx)
- movlpd 8(%ecx), %xmm0
- movlpd %xmm0, 8(%edx)
-# ifdef USE_AS_STPCPY
- lea 15(%edx), %eax
-# else
- movl %edx, %eax
-# endif
+ movdqu (%ecx), %xmm0
+ movdqu %xmm0, (%edx)
+ SAVE_RESULT_TAIL (15)
# ifdef USE_AS_STRNCPY
sub $16, %ebx
lea 16(%edx), %ecx
@@ -3268,13 +3090,14 @@ L(ExitTail16):
# endif
# endif
RETURN
-#endif
+# endif
# ifdef USE_AS_STRNCPY
# ifndef USE_AS_STRCAT
- CFI_PUSH (%esi)
- CFI_PUSH (%edi)
+ CFI_PUSH (%esi)
+ CFI_PUSH (%edi)
# endif
+ .p2align 4
L(StrncpyLeaveCase2OrCase3):
test %eax, %eax
jnz L(Aligned64LeaveCase2)
@@ -3327,153 +3150,153 @@ L(Aligned64LeaveCase2):
lea 16(%esi), %esi
lea -16(%ebx), %ebx
jmp L(CopyFrom1To16BytesCase2)
-/* -------------------------------------------------- */
+
+/*--------------------------------------------------*/
+ .p2align 4
L(StrncpyExit1Case2OrCase3):
- movaps (%edx), %xmm6
- psrldq $15, %xmm6
+ movlpd (%ecx), %xmm0
+ movlpd 7(%ecx), %xmm1
+ movlpd %xmm0, (%edx)
+ movlpd %xmm1, 7(%edx)
mov $15, %esi
- palignr $1, %xmm1, %xmm6
- movaps %xmm6, (%edx)
test %eax, %eax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
+ .p2align 4
L(StrncpyExit2Case2OrCase3):
- movaps (%edx), %xmm6
- psrldq $14, %xmm6
+ movlpd (%ecx), %xmm0
+ movlpd 6(%ecx), %xmm1
+ movlpd %xmm0, (%edx)
+ movlpd %xmm1, 6(%edx)
mov $14, %esi
- palignr $2, %xmm1, %xmm6
- movaps %xmm6, (%edx)
test %eax, %eax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
+ .p2align 4
L(StrncpyExit3Case2OrCase3):
- movaps (%edx), %xmm6
- psrldq $13, %xmm6
+ movlpd (%ecx), %xmm0
+ movlpd 5(%ecx), %xmm1
+ movlpd %xmm0, (%edx)
+ movlpd %xmm1, 5(%edx)
mov $13, %esi
- palignr $3, %xmm1, %xmm6
- movaps %xmm6, (%edx)
test %eax, %eax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
+ .p2align 4
L(StrncpyExit4Case2OrCase3):
- movaps (%edx), %xmm6
- psrldq $12, %xmm6
+ movlpd (%ecx), %xmm0
+ movl 8(%ecx), %esi
+ movlpd %xmm0, (%edx)
+ movl %esi, 8(%edx)
mov $12, %esi
- palignr $4, %xmm1, %xmm6
- movaps %xmm6, (%edx)
test %eax, %eax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
+ .p2align 4
L(StrncpyExit5Case2OrCase3):
- movaps (%edx), %xmm6
- psrldq $11, %xmm6
+ movlpd (%ecx), %xmm0
+ movl 7(%ecx), %esi
+ movlpd %xmm0, (%edx)
+ movl %esi, 7(%edx)
mov $11, %esi
- palignr $5, %xmm1, %xmm6
- movaps %xmm6, (%edx)
test %eax, %eax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
+ .p2align 4
L(StrncpyExit6Case2OrCase3):
- movaps (%edx), %xmm6
- psrldq $10, %xmm6
+ movlpd (%ecx), %xmm0
+ movl 6(%ecx), %esi
+ movlpd %xmm0, (%edx)
+ movl %esi, 6(%edx)
mov $10, %esi
- palignr $6, %xmm1, %xmm6
- movaps %xmm6, (%edx)
test %eax, %eax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
+ .p2align 4
L(StrncpyExit7Case2OrCase3):
- movaps (%edx), %xmm6
- psrldq $9, %xmm6
+ movlpd (%ecx), %xmm0
+ movl 5(%ecx), %esi
+ movlpd %xmm0, (%edx)
+ movl %esi, 5(%edx)
mov $9, %esi
- palignr $7, %xmm1, %xmm6
- movaps %xmm6, (%edx)
test %eax, %eax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
+ .p2align 4
L(StrncpyExit8Case2OrCase3):
- movaps (%edx), %xmm6
- psrldq $8, %xmm6
+ movlpd (%ecx), %xmm0
+ movlpd %xmm0, (%edx)
mov $8, %esi
- palignr $8, %xmm1, %xmm6
- movaps %xmm6, (%edx)
test %eax, %eax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
+ .p2align 4
L(StrncpyExit9Case2OrCase3):
- movaps (%edx), %xmm6
- psrldq $7, %xmm6
+ movlpd (%ecx), %xmm0
+ movlpd %xmm0, (%edx)
mov $7, %esi
- palignr $9, %xmm1, %xmm6
- movaps %xmm6, (%edx)
test %eax, %eax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
+ .p2align 4
L(StrncpyExit10Case2OrCase3):
- movaps (%edx), %xmm6
- psrldq $6, %xmm6
+ movlpd -1(%ecx), %xmm0
+ movlpd %xmm0, -1(%edx)
mov $6, %esi
- palignr $10, %xmm1, %xmm6
- movaps %xmm6, (%edx)
test %eax, %eax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
+ .p2align 4
L(StrncpyExit11Case2OrCase3):
- movaps (%edx), %xmm6
- psrldq $5, %xmm6
+ movlpd -2(%ecx), %xmm0
+ movlpd %xmm0, -2(%edx)
mov $5, %esi
- palignr $11, %xmm1, %xmm6
- movaps %xmm6, (%edx)
test %eax, %eax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
+ .p2align 4
L(StrncpyExit12Case2OrCase3):
- movaps (%edx), %xmm6
- psrldq $4, %xmm6
+ movl (%ecx), %esi
+ movl %esi, (%edx)
mov $4, %esi
- palignr $12, %xmm1, %xmm6
- movaps %xmm6, (%edx)
test %eax, %eax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
+ .p2align 4
L(StrncpyExit13Case2OrCase3):
- movaps (%edx), %xmm6
- psrldq $3, %xmm6
+ movl -1(%ecx), %esi
+ movl %esi, -1(%edx)
mov $3, %esi
- palignr $13, %xmm1, %xmm6
- movaps %xmm6, (%edx)
test %eax, %eax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
+ .p2align 4
L(StrncpyExit14Case2OrCase3):
- movaps (%edx), %xmm6
- psrldq $2, %xmm6
+ movl -2(%ecx), %esi
+ movl %esi, -2(%edx)
mov $2, %esi
- palignr $14, %xmm1, %xmm6
- movaps %xmm6, (%edx)
test %eax, %eax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
+ .p2align 4
L(StrncpyExit15Case2OrCase3):
- movaps (%edx), %xmm6
- psrldq $1, %xmm6
+ movl -3(%ecx), %esi
+ movl %esi, -3(%edx)
mov $1, %esi
- palignr $15, %xmm1, %xmm6
- movaps %xmm6, (%edx)
test %eax, %eax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
@@ -3483,36 +3306,29 @@ L(StrncpyLeave1):
add $48, %ebx
jle L(StrncpyExit1)
palignr $1, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 31(%ecx), %xmm2
lea 16(%esi), %esi
- movaps %xmm2, %xmm3
sub $16, %ebx
jbe L(StrncpyExit1)
- palignr $1, %xmm1, %xmm2
+ palignr $1, %xmm3, %xmm2
movaps %xmm2, 16(%edx)
- movaps 31+16(%ecx), %xmm2
- movaps %xmm3, %xmm1
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit1)
- movaps %xmm2, %xmm1
movaps %xmm4, 32(%edx)
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit1)
- movaps %xmm7, %xmm1
movaps %xmm5, 48(%edx)
lea 16(%esi), %esi
lea -16(%ebx), %ebx
-
L(StrncpyExit1):
- movaps (%edx, %esi), %xmm6
- psrldq $15, %xmm6
- palignr $1, %xmm1, %xmm6
- movaps %xmm6, (%edx, %esi)
- lea 15(%esi), %esi
+ lea 15(%edx, %esi), %edx
+ lea 15(%ecx, %esi), %ecx
+ movdqu -16(%ecx), %xmm0
+ xor %esi, %esi
+ movdqu %xmm0, -16(%edx)
jmp L(CopyFrom1To16BytesCase3)
L(StrncpyLeave2):
@@ -3520,36 +3336,29 @@ L(StrncpyLeave2):
add $48, %ebx
jle L(StrncpyExit2)
palignr $2, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 30(%ecx), %xmm2
lea 16(%esi), %esi
- movaps %xmm2, %xmm3
sub $16, %ebx
jbe L(StrncpyExit2)
- palignr $2, %xmm1, %xmm2
+ palignr $2, %xmm3, %xmm2
movaps %xmm2, 16(%edx)
- movaps 30+16(%ecx), %xmm2
- movaps %xmm3, %xmm1
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit2)
- movaps %xmm2, %xmm1
movaps %xmm4, 32(%edx)
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit2)
- movaps %xmm7, %xmm1
movaps %xmm5, 48(%edx)
lea 16(%esi), %esi
lea -16(%ebx), %ebx
-
L(StrncpyExit2):
- movaps (%edx, %esi), %xmm6
- psrldq $14, %xmm6
- palignr $2, %xmm1, %xmm6
- movaps %xmm6, (%edx, %esi)
- lea 14(%esi), %esi
+ lea 14(%edx, %esi), %edx
+ lea 14(%ecx, %esi), %ecx
+ movdqu -16(%ecx), %xmm0
+ xor %esi, %esi
+ movdqu %xmm0, -16(%edx)
jmp L(CopyFrom1To16BytesCase3)
L(StrncpyLeave3):
@@ -3557,36 +3366,29 @@ L(StrncpyLeave3):
add $48, %ebx
jle L(StrncpyExit3)
palignr $3, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 29(%ecx), %xmm2
lea 16(%esi), %esi
- movaps %xmm2, %xmm3
sub $16, %ebx
jbe L(StrncpyExit3)
- palignr $3, %xmm1, %xmm2
+ palignr $3, %xmm3, %xmm2
movaps %xmm2, 16(%edx)
- movaps 29+16(%ecx), %xmm2
- movaps %xmm3, %xmm1
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit3)
- movaps %xmm2, %xmm1
movaps %xmm4, 32(%edx)
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit3)
- movaps %xmm7, %xmm1
movaps %xmm5, 48(%edx)
lea 16(%esi), %esi
lea -16(%ebx), %ebx
-
L(StrncpyExit3):
- movaps (%edx, %esi), %xmm6
- psrldq $13, %xmm6
- palignr $3, %xmm1, %xmm6
- movaps %xmm6, (%edx, %esi)
- lea 13(%esi), %esi
+ lea 13(%edx, %esi), %edx
+ lea 13(%ecx, %esi), %ecx
+ movdqu -16(%ecx), %xmm0
+ xor %esi, %esi
+ movdqu %xmm0, -16(%edx)
jmp L(CopyFrom1To16BytesCase3)
L(StrncpyLeave4):
@@ -3594,36 +3396,31 @@ L(StrncpyLeave4):
add $48, %ebx
jle L(StrncpyExit4)
palignr $4, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 28(%ecx), %xmm2
lea 16(%esi), %esi
- movaps %xmm2, %xmm3
sub $16, %ebx
jbe L(StrncpyExit4)
- palignr $4, %xmm1, %xmm2
+ palignr $4, %xmm3, %xmm2
movaps %xmm2, 16(%edx)
- movaps 28+16(%ecx), %xmm2
- movaps %xmm3, %xmm1
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit4)
- movaps %xmm2, %xmm1
movaps %xmm4, 32(%edx)
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit4)
- movaps %xmm7, %xmm1
movaps %xmm5, 48(%edx)
lea 16(%esi), %esi
lea -16(%ebx), %ebx
-
L(StrncpyExit4):
- movaps (%edx, %esi), %xmm6
- psrldq $12, %xmm6
- palignr $4, %xmm1, %xmm6
- movaps %xmm6, (%edx, %esi)
- lea 12(%esi), %esi
+ lea 12(%edx, %esi), %edx
+ lea 12(%ecx, %esi), %ecx
+ movlpd -12(%ecx), %xmm0
+ movl -4(%ecx), %eax
+ movlpd %xmm0, -12(%edx)
+ movl %eax, -4(%edx)
+ xor %esi, %esi
jmp L(CopyFrom1To16BytesCase3)
L(StrncpyLeave5):
@@ -3631,36 +3428,31 @@ L(StrncpyLeave5):
add $48, %ebx
jle L(StrncpyExit5)
palignr $5, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 27(%ecx), %xmm2
lea 16(%esi), %esi
- movaps %xmm2, %xmm3
sub $16, %ebx
jbe L(StrncpyExit5)
- palignr $5, %xmm1, %xmm2
+ palignr $5, %xmm3, %xmm2
movaps %xmm2, 16(%edx)
- movaps 27+16(%ecx), %xmm2
- movaps %xmm3, %xmm1
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit5)
- movaps %xmm2, %xmm1
movaps %xmm4, 32(%edx)
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit5)
- movaps %xmm7, %xmm1
movaps %xmm5, 48(%edx)
lea 16(%esi), %esi
lea -16(%ebx), %ebx
-
L(StrncpyExit5):
- movaps (%edx, %esi), %xmm6
- psrldq $11, %xmm6
- palignr $5, %xmm1, %xmm6
- movaps %xmm6, (%edx, %esi)
- lea 11(%esi), %esi
+ lea 11(%edx, %esi), %edx
+ lea 11(%ecx, %esi), %ecx
+ movlpd -11(%ecx), %xmm0
+ movl -4(%ecx), %eax
+ movlpd %xmm0, -11(%edx)
+ movl %eax, -4(%edx)
+ xor %esi, %esi
jmp L(CopyFrom1To16BytesCase3)
L(StrncpyLeave6):
@@ -3668,36 +3460,32 @@ L(StrncpyLeave6):
add $48, %ebx
jle L(StrncpyExit6)
palignr $6, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 26(%ecx), %xmm2
lea 16(%esi), %esi
- movaps %xmm2, %xmm3
sub $16, %ebx
jbe L(StrncpyExit6)
- palignr $6, %xmm1, %xmm2
+ palignr $6, %xmm3, %xmm2
movaps %xmm2, 16(%edx)
- movaps 26+16(%ecx), %xmm2
- movaps %xmm3, %xmm1
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit6)
- movaps %xmm2, %xmm1
movaps %xmm4, 32(%edx)
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit6)
- movaps %xmm7, %xmm1
movaps %xmm5, 48(%edx)
lea 16(%esi), %esi
lea -16(%ebx), %ebx
-
L(StrncpyExit6):
- movaps (%edx, %esi), %xmm6
- psrldq $10, %xmm6
- palignr $6, %xmm1, %xmm6
- movaps %xmm6, (%edx, %esi)
- lea 10(%esi), %esi
+ lea 10(%edx, %esi), %edx
+ lea 10(%ecx, %esi), %ecx
+
+ movlpd -10(%ecx), %xmm0
+ movw -2(%ecx), %ax
+ movlpd %xmm0, -10(%edx)
+ movw %ax, -2(%edx)
+ xor %esi, %esi
jmp L(CopyFrom1To16BytesCase3)
L(StrncpyLeave7):
@@ -3705,36 +3493,32 @@ L(StrncpyLeave7):
add $48, %ebx
jle L(StrncpyExit7)
palignr $7, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 25(%ecx), %xmm2
lea 16(%esi), %esi
- movaps %xmm2, %xmm3
sub $16, %ebx
jbe L(StrncpyExit7)
- palignr $7, %xmm1, %xmm2
+ palignr $7, %xmm3, %xmm2
movaps %xmm2, 16(%edx)
- movaps 25+16(%ecx), %xmm2
- movaps %xmm3, %xmm1
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit7)
- movaps %xmm2, %xmm1
movaps %xmm4, 32(%edx)
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit7)
- movaps %xmm7, %xmm1
movaps %xmm5, 48(%edx)
lea 16(%esi), %esi
lea -16(%ebx), %ebx
-
L(StrncpyExit7):
- movaps (%edx, %esi), %xmm6
- psrldq $9, %xmm6
- palignr $7, %xmm1, %xmm6
- movaps %xmm6, (%edx, %esi)
- lea 9(%esi), %esi
+ lea 9(%edx, %esi), %edx
+ lea 9(%ecx, %esi), %ecx
+
+ movlpd -9(%ecx), %xmm0
+ movb -1(%ecx), %ah
+ movlpd %xmm0, -9(%edx)
+ movb %ah, -1(%edx)
+ xor %esi, %esi
jmp L(CopyFrom1To16BytesCase3)
L(StrncpyLeave8):
@@ -3742,36 +3526,29 @@ L(StrncpyLeave8):
add $48, %ebx
jle L(StrncpyExit8)
palignr $8, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 24(%ecx), %xmm2
lea 16(%esi), %esi
- movaps %xmm2, %xmm3
sub $16, %ebx
jbe L(StrncpyExit8)
- palignr $8, %xmm1, %xmm2
+ palignr $8, %xmm3, %xmm2
movaps %xmm2, 16(%edx)
- movaps 24+16(%ecx), %xmm2
- movaps %xmm3, %xmm1
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit8)
- movaps %xmm2, %xmm1
movaps %xmm4, 32(%edx)
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit8)
- movaps %xmm7, %xmm1
movaps %xmm5, 48(%edx)
lea 16(%esi), %esi
lea -16(%ebx), %ebx
-
L(StrncpyExit8):
- movaps (%edx, %esi), %xmm6
- psrldq $8, %xmm6
- palignr $8, %xmm1, %xmm6
- movaps %xmm6, (%edx, %esi)
- lea 8(%esi), %esi
+ lea 8(%edx, %esi), %edx
+ lea 8(%ecx, %esi), %ecx
+ movlpd -8(%ecx), %xmm0
+ movlpd %xmm0, -8(%edx)
+ xor %esi, %esi
jmp L(CopyFrom1To16BytesCase3)
L(StrncpyLeave9):
@@ -3779,36 +3556,30 @@ L(StrncpyLeave9):
add $48, %ebx
jle L(StrncpyExit9)
palignr $9, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 23(%ecx), %xmm2
lea 16(%esi), %esi
- movaps %xmm2, %xmm3
sub $16, %ebx
jbe L(StrncpyExit9)
- palignr $9, %xmm1, %xmm2
+ palignr $9, %xmm3, %xmm2
movaps %xmm2, 16(%edx)
- movaps 23+16(%ecx), %xmm2
- movaps %xmm3, %xmm1
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit9)
- movaps %xmm2, %xmm1
movaps %xmm4, 32(%edx)
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit9)
- movaps %xmm7, %xmm1
movaps %xmm5, 48(%edx)
lea 16(%esi), %esi
lea -16(%ebx), %ebx
-
L(StrncpyExit9):
- movaps (%edx, %esi), %xmm6
- psrldq $7, %xmm6
- palignr $9, %xmm1, %xmm6
- movaps %xmm6, (%edx, %esi)
- lea 7(%esi), %esi
+ lea 7(%edx, %esi), %edx
+ lea 7(%ecx, %esi), %ecx
+
+ movlpd -8(%ecx), %xmm0
+ movlpd %xmm0, -8(%edx)
+ xor %esi, %esi
jmp L(CopyFrom1To16BytesCase3)
L(StrncpyLeave10):
@@ -3816,36 +3587,30 @@ L(StrncpyLeave10):
add $48, %ebx
jle L(StrncpyExit10)
palignr $10, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 22(%ecx), %xmm2
lea 16(%esi), %esi
- movaps %xmm2, %xmm3
sub $16, %ebx
jbe L(StrncpyExit10)
- palignr $10, %xmm1, %xmm2
+ palignr $10, %xmm3, %xmm2
movaps %xmm2, 16(%edx)
- movaps 22+16(%ecx), %xmm2
- movaps %xmm3, %xmm1
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit10)
- movaps %xmm2, %xmm1
movaps %xmm4, 32(%edx)
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit10)
- movaps %xmm7, %xmm1
movaps %xmm5, 48(%edx)
lea 16(%esi), %esi
lea -16(%ebx), %ebx
-
L(StrncpyExit10):
- movaps (%edx, %esi), %xmm6
- psrldq $6, %xmm6
- palignr $10, %xmm1, %xmm6
- movaps %xmm6, (%edx, %esi)
- lea 6(%esi), %esi
+ lea 6(%edx, %esi), %edx
+ lea 6(%ecx, %esi), %ecx
+
+ movlpd -8(%ecx), %xmm0
+ movlpd %xmm0, -8(%edx)
+ xor %esi, %esi
jmp L(CopyFrom1To16BytesCase3)
L(StrncpyLeave11):
@@ -3853,36 +3618,31 @@ L(StrncpyLeave11):
add $48, %ebx
jle L(StrncpyExit11)
palignr $11, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 21(%ecx), %xmm2
lea 16(%esi), %esi
- movaps %xmm2, %xmm3
sub $16, %ebx
jbe L(StrncpyExit11)
- palignr $11, %xmm1, %xmm2
+ palignr $11, %xmm3, %xmm2
movaps %xmm2, 16(%edx)
- movaps 21+16(%ecx), %xmm2
- movaps %xmm3, %xmm1
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit11)
- movaps %xmm2, %xmm1
movaps %xmm4, 32(%edx)
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit11)
- movaps %xmm7, %xmm1
movaps %xmm5, 48(%edx)
lea 16(%esi), %esi
lea -16(%ebx), %ebx
-
L(StrncpyExit11):
- movaps (%edx, %esi), %xmm6
- psrldq $5, %xmm6
- palignr $11, %xmm1, %xmm6
- movaps %xmm6, (%edx, %esi)
- lea 5(%esi), %esi
+ lea 5(%edx, %esi), %edx
+ lea 5(%ecx, %esi), %ecx
+ movl -5(%ecx), %esi
+ movb -1(%ecx), %ah
+ movl %esi, -5(%edx)
+ movb %ah, -1(%edx)
+ xor %esi, %esi
jmp L(CopyFrom1To16BytesCase3)
L(StrncpyLeave12):
@@ -3890,36 +3650,29 @@ L(StrncpyLeave12):
add $48, %ebx
jle L(StrncpyExit12)
palignr $12, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 20(%ecx), %xmm2
lea 16(%esi), %esi
- movaps %xmm2, %xmm3
sub $16, %ebx
jbe L(StrncpyExit12)
- palignr $12, %xmm1, %xmm2
+ palignr $12, %xmm3, %xmm2
movaps %xmm2, 16(%edx)
- movaps 20+16(%ecx), %xmm2
- movaps %xmm3, %xmm1
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit12)
- movaps %xmm2, %xmm1
movaps %xmm4, 32(%edx)
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit12)
- movaps %xmm7, %xmm1
movaps %xmm5, 48(%edx)
lea 16(%esi), %esi
lea -16(%ebx), %ebx
-
L(StrncpyExit12):
- movaps (%edx, %esi), %xmm6
- psrldq $4, %xmm6
- palignr $12, %xmm1, %xmm6
- movaps %xmm6, (%edx, %esi)
- lea 4(%esi), %esi
+ lea 4(%edx, %esi), %edx
+ lea 4(%ecx, %esi), %ecx
+ movl -4(%ecx), %eax
+ movl %eax, -4(%edx)
+ xor %esi, %esi
jmp L(CopyFrom1To16BytesCase3)
L(StrncpyLeave13):
@@ -3927,36 +3680,30 @@ L(StrncpyLeave13):
add $48, %ebx
jle L(StrncpyExit13)
palignr $13, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 19(%ecx), %xmm2
lea 16(%esi), %esi
- movaps %xmm2, %xmm3
sub $16, %ebx
jbe L(StrncpyExit13)
- palignr $13, %xmm1, %xmm2
+ palignr $13, %xmm3, %xmm2
movaps %xmm2, 16(%edx)
- movaps 19+16(%ecx), %xmm2
- movaps %xmm3, %xmm1
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit13)
- movaps %xmm2, %xmm1
movaps %xmm4, 32(%edx)
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit13)
- movaps %xmm7, %xmm1
movaps %xmm5, 48(%edx)
lea 16(%esi), %esi
lea -16(%ebx), %ebx
-
L(StrncpyExit13):
- movaps (%edx, %esi), %xmm6
- psrldq $3, %xmm6
- palignr $13, %xmm1, %xmm6
- movaps %xmm6, (%edx, %esi)
- lea 3(%esi), %esi
+ lea 3(%edx, %esi), %edx
+ lea 3(%ecx, %esi), %ecx
+
+ movl -4(%ecx), %eax
+ movl %eax, -4(%edx)
+ xor %esi, %esi
jmp L(CopyFrom1To16BytesCase3)
L(StrncpyLeave14):
@@ -3964,36 +3711,29 @@ L(StrncpyLeave14):
add $48, %ebx
jle L(StrncpyExit14)
palignr $14, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 18(%ecx), %xmm2
lea 16(%esi), %esi
- movaps %xmm2, %xmm3
sub $16, %ebx
jbe L(StrncpyExit14)
- palignr $14, %xmm1, %xmm2
+ palignr $14, %xmm3, %xmm2
movaps %xmm2, 16(%edx)
- movaps 18+16(%ecx), %xmm2
- movaps %xmm3, %xmm1
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit14)
- movaps %xmm2, %xmm1
movaps %xmm4, 32(%edx)
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit14)
- movaps %xmm7, %xmm1
movaps %xmm5, 48(%edx)
lea 16(%esi), %esi
lea -16(%ebx), %ebx
-
L(StrncpyExit14):
- movaps (%edx, %esi), %xmm6
- psrldq $2, %xmm6
- palignr $14, %xmm1, %xmm6
- movaps %xmm6, (%edx, %esi)
- lea 2(%esi), %esi
+ lea 2(%edx, %esi), %edx
+ lea 2(%ecx, %esi), %ecx
+ movw -2(%ecx), %ax
+ movw %ax, -2(%edx)
+ xor %esi, %esi
jmp L(CopyFrom1To16BytesCase3)
L(StrncpyLeave15):
@@ -4001,43 +3741,36 @@ L(StrncpyLeave15):
add $48, %ebx
jle L(StrncpyExit15)
palignr $15, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 17(%ecx), %xmm2
lea 16(%esi), %esi
- movaps %xmm2, %xmm3
sub $16, %ebx
jbe L(StrncpyExit15)
- palignr $15, %xmm1, %xmm2
+ palignr $15, %xmm3, %xmm2
movaps %xmm2, 16(%edx)
- movaps 17+16(%ecx), %xmm2
- movaps %xmm3, %xmm1
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit15)
- movaps %xmm2, %xmm1
movaps %xmm4, 32(%edx)
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit15)
- movaps %xmm7, %xmm1
movaps %xmm5, 48(%edx)
lea 16(%esi), %esi
lea -16(%ebx), %ebx
-
L(StrncpyExit15):
- movaps (%edx, %esi), %xmm6
- psrldq $1, %xmm6
- palignr $15, %xmm1, %xmm6
- movaps %xmm6, (%edx, %esi)
- lea 1(%esi), %esi
+ lea 1(%edx, %esi), %edx
+ lea 1(%ecx, %esi), %ecx
+ movb -1(%ecx), %ah
+ movb %ah, -1(%edx)
+ xor %esi, %esi
jmp L(CopyFrom1To16BytesCase3)
# endif
# ifndef USE_AS_STRCAT
# ifdef USE_AS_STRNCPY
- CFI_POP (%esi)
- CFI_POP (%edi)
+ CFI_POP (%esi)
+ CFI_POP (%edi)
.p2align 4
L(ExitTail0):
@@ -4046,20 +3779,14 @@ L(ExitTail0):
.p2align 4
L(StrncpyExit15Bytes):
- cmp $9, %ebx
- je L(ExitTail9)
+ cmp $12, %ebx
+ jbe L(StrncpyExit12Bytes)
cmpb $0, 8(%ecx)
jz L(ExitTail9)
- cmp $10, %ebx
- je L(ExitTail10)
cmpb $0, 9(%ecx)
jz L(ExitTail10)
- cmp $11, %ebx
- je L(ExitTail11)
cmpb $0, 10(%ecx)
jz L(ExitTail11)
- cmp $12, %ebx
- je L(ExitTail12)
cmpb $0, 11(%ecx)
jz L(ExitTail12)
cmp $13, %ebx
@@ -4071,9 +3798,9 @@ L(StrncpyExit15Bytes):
cmpb $0, 13(%ecx)
jz L(ExitTail14)
movlpd (%ecx), %xmm0
+ movlpd 7(%ecx), %xmm1
movlpd %xmm0, (%edx)
- movlpd 7(%ecx), %xmm0
- movlpd %xmm0, 7(%edx)
+ movlpd %xmm1, 7(%edx)
# ifdef USE_AS_STPCPY
lea 14(%edx), %eax
cmpb $1, (%eax)
@@ -4084,23 +3811,43 @@ L(StrncpyExit15Bytes):
RETURN
.p2align 4
+L(StrncpyExit12Bytes):
+ cmp $9, %ebx
+ je L(ExitTail9)
+ cmpb $0, 8(%ecx)
+ jz L(ExitTail9)
+ cmp $10, %ebx
+ je L(ExitTail10)
+ cmpb $0, 9(%ecx)
+ jz L(ExitTail10)
+ cmp $11, %ebx
+ je L(ExitTail11)
+ cmpb $0, 10(%ecx)
+ jz L(ExitTail11)
+ movlpd (%ecx), %xmm0
+ movl 8(%ecx), %eax
+ movlpd %xmm0, (%edx)
+ movl %eax, 8(%edx)
+ SAVE_RESULT_TAIL (11)
+# ifdef USE_AS_STPCPY
+ cmpb $1, (%eax)
+ sbb $-1, %eax
+# endif
+ RETURN
+
+ .p2align 4
L(StrncpyExit8Bytes):
- cmp $1, %ebx
- je L(ExitTail1)
+ cmp $4, %ebx
+ jbe L(StrncpyExit4Bytes)
cmpb $0, (%ecx)
jz L(ExitTail1)
- cmp $2, %ebx
- je L(ExitTail2)
cmpb $0, 1(%ecx)
jz L(ExitTail2)
- cmp $3, %ebx
- je L(ExitTail3)
cmpb $0, 2(%ecx)
jz L(ExitTail3)
- cmp $4, %ebx
- je L(ExitTail4)
cmpb $0, 3(%ecx)
jz L(ExitTail4)
+
cmp $5, %ebx
je L(ExitTail5)
cmpb $0, 4(%ecx)
@@ -4123,8 +3870,32 @@ L(StrncpyExit8Bytes):
movl %edx, %eax
# endif
RETURN
-# endif
+ .p2align 4
+L(StrncpyExit4Bytes):
+ test %ebx, %ebx
+ jz L(ExitTail0)
+ cmp $1, %ebx
+ je L(ExitTail1)
+ cmpb $0, (%ecx)
+ jz L(ExitTail1)
+ cmp $2, %ebx
+ je L(ExitTail2)
+ cmpb $0, 1(%ecx)
+ jz L(ExitTail2)
+ cmp $3, %ebx
+ je L(ExitTail3)
+ cmpb $0, 2(%ecx)
+ jz L(ExitTail3)
+ movl (%ecx), %eax
+ movl %eax, (%edx)
+ SAVE_RESULT_TAIL (3)
+# ifdef USE_AS_STPCPY
+ cmpb $1, (%eax)
+ sbb $-1, %eax
+# endif
+ RETURN
+# endif
END (STRCPY)
# endif
diff --git a/libc/sysdeps/i386/i686/multiarch/wcscpy-ssse3.S b/libc/sysdeps/i386/i686/multiarch/wcscpy-ssse3.S
index 84d92a8bd..abeea2226 100644
--- a/libc/sysdeps/i386/i686/multiarch/wcscpy-ssse3.S
+++ b/libc/sysdeps/i386/i686/multiarch/wcscpy-ssse3.S
@@ -54,7 +54,6 @@ ENTRY (__wcscpy_ssse3)
PUSH (%edi)
mov %edx, %edi
-
PUSH (%esi)
lea 16(%ecx), %esi
@@ -220,7 +219,6 @@ L(Shl4Start):
jnz L(Shl4LoopExit)
palignr $4, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 28(%ecx), %xmm2
@@ -228,15 +226,14 @@ L(Shl4Start):
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
- movaps %xmm2, %xmm3
+ movaps %xmm2, %xmm1
test %eax, %eax
jnz L(Shl4LoopExit)
- palignr $4, %xmm1, %xmm2
+ palignr $4, %xmm3, %xmm2
movaps %xmm2, (%edx)
movaps 28(%ecx), %xmm2
- movaps %xmm3, %xmm1
pcmpeqd %xmm2, %xmm0
lea 16(%edx), %edx
@@ -248,7 +245,6 @@ L(Shl4Start):
jnz L(Shl4LoopExit)
palignr $4, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 28(%ecx), %xmm2
@@ -256,13 +252,11 @@ L(Shl4Start):
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
- movaps %xmm2, %xmm3
test %eax, %eax
jnz L(Shl4LoopExit)
- palignr $4, %xmm1, %xmm2
- movaps %xmm3, %xmm1
+ palignr $4, %xmm3, %xmm2
movaps %xmm2, (%edx)
lea 28(%ecx), %ecx
lea 16(%edx), %edx
@@ -305,14 +299,13 @@ L(Shl4LoopStart):
jmp L(Shl4LoopStart)
L(Shl4LoopExit):
- movaps (%edx), %xmm6
- psrldq $12, %xmm6
- palignr $4, %xmm1, %xmm6
- movaps %xmm6, (%edx)
+ movlpd (%ecx), %xmm0
+ movl 8(%ecx), %esi
+ movlpd %xmm0, (%edx)
+ movl %esi, 8(%edx)
+ POP (%esi)
add $12, %edx
add $12, %ecx
-
- POP (%esi)
test %al, %al
jz L(ExitHigh)
test $0x01, %al
@@ -337,7 +330,6 @@ L(Shl8Start):
jnz L(Shl8LoopExit)
palignr $8, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 24(%ecx), %xmm2
@@ -345,15 +337,14 @@ L(Shl8Start):
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
- movaps %xmm2, %xmm3
+ movaps %xmm2, %xmm1
test %eax, %eax
jnz L(Shl8LoopExit)
- palignr $8, %xmm1, %xmm2
+ palignr $8, %xmm3, %xmm2
movaps %xmm2, (%edx)
movaps 24(%ecx), %xmm2
- movaps %xmm3, %xmm1
pcmpeqd %xmm2, %xmm0
lea 16(%edx), %edx
@@ -365,7 +356,6 @@ L(Shl8Start):
jnz L(Shl8LoopExit)
palignr $8, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 24(%ecx), %xmm2
@@ -373,13 +363,11 @@ L(Shl8Start):
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
- movaps %xmm2, %xmm3
test %eax, %eax
jnz L(Shl8LoopExit)
- palignr $8, %xmm1, %xmm2
- movaps %xmm3, %xmm1
+ palignr $8, %xmm3, %xmm2
movaps %xmm2, (%edx)
lea 24(%ecx), %ecx
lea 16(%edx), %edx
@@ -422,14 +410,11 @@ L(Shl8LoopStart):
jmp L(Shl8LoopStart)
L(Shl8LoopExit):
- movaps (%edx), %xmm6
- psrldq $8, %xmm6
- palignr $8, %xmm1, %xmm6
- movaps %xmm6, (%edx)
+ movlpd (%ecx), %xmm0
+ movlpd %xmm0, (%edx)
+ POP (%esi)
add $8, %edx
add $8, %ecx
-
- POP (%esi)
test %al, %al
jz L(ExitHigh)
test $0x01, %al
@@ -454,7 +439,6 @@ L(Shl12Start):
jnz L(Shl12LoopExit)
palignr $12, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 20(%ecx), %xmm2
@@ -462,15 +446,14 @@ L(Shl12Start):
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
- movaps %xmm2, %xmm3
+ movaps %xmm2, %xmm1
test %eax, %eax
jnz L(Shl12LoopExit)
- palignr $12, %xmm1, %xmm2
+ palignr $12, %xmm3, %xmm2
movaps %xmm2, (%edx)
movaps 20(%ecx), %xmm2
- movaps %xmm3, %xmm1
pcmpeqd %xmm2, %xmm0
lea 16(%edx), %edx
@@ -482,7 +465,6 @@ L(Shl12Start):
jnz L(Shl12LoopExit)
palignr $12, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 20(%ecx), %xmm2
@@ -490,13 +472,11 @@ L(Shl12Start):
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
- movaps %xmm2, %xmm3
test %eax, %eax
jnz L(Shl12LoopExit)
- palignr $12, %xmm1, %xmm2
- movaps %xmm3, %xmm1
+ palignr $12, %xmm3, %xmm2
movaps %xmm2, (%edx)
lea 20(%ecx), %ecx
lea 16(%edx), %edx
@@ -539,11 +519,9 @@ L(Shl12LoopStart):
jmp L(Shl12LoopStart)
L(Shl12LoopExit):
- movaps (%edx), %xmm6
- psrldq $4, %xmm6
+ movl (%ecx), %esi
+ movl %esi, (%edx)
mov $4, %esi
- palignr $12, %xmm1, %xmm6
- movaps %xmm6, (%edx)
.p2align 4
L(CopyFrom1To16Bytes):
@@ -555,6 +533,7 @@ L(CopyFrom1To16Bytes):
jz L(ExitHigh)
test $0x01, %al
jnz L(Exit4)
+L(Exit8):
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
movl %edi, %eax
@@ -564,6 +543,7 @@ L(CopyFrom1To16Bytes):
L(ExitHigh):
test $0x01, %ah
jnz L(Exit12)
+L(Exit16):
movdqu (%ecx), %xmm0
movdqu %xmm0, (%edx)
movl %edi, %eax
diff --git a/libc/sysdeps/ia64/bits/byteswap.h b/libc/sysdeps/ia64/bits/byteswap.h
index d64914f36..29d0e37d1 100644
--- a/libc/sysdeps/ia64/bits/byteswap.h
+++ b/libc/sysdeps/ia64/bits/byteswap.h
@@ -1,5 +1,6 @@
/* Macros to swap the order of bytes in integer values.
- Copyright (C) 1997,1998,2000,2002,2003,2008 Free Software Foundation, Inc.
+ Copyright (C) 1997,1998,2000,2002,2003,2008,2011
+ Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -77,17 +78,17 @@ __bswap_32 (unsigned int __bsx)
/* Swap bytes in 64 bit value. */
-#define __bswap_constant_64(x) \
- ((((x) & 0xff00000000000000ul) >> 56) \
- | (((x) & 0x00ff000000000000ul) >> 40) \
- | (((x) & 0x0000ff0000000000ul) >> 24) \
- | (((x) & 0x000000ff00000000ul) >> 8) \
- | (((x) & 0x00000000ff000000ul) << 8) \
- | (((x) & 0x0000000000ff0000ul) << 24) \
- | (((x) & 0x000000000000ff00ul) << 40) \
- | (((x) & 0x00000000000000fful) << 56))
-
#if defined __GNUC__ && __GNUC__ >= 2
+# define __bswap_constant_64(x) \
+ (__extension__ ((((x) & 0xff00000000000000ul) >> 56) \
+ | (((x) & 0x00ff000000000000ul) >> 40) \
+ | (((x) & 0x0000ff0000000000ul) >> 24) \
+ | (((x) & 0x000000ff00000000ul) >> 8) \
+ | (((x) & 0x00000000ff000000ul) << 8) \
+ | (((x) & 0x0000000000ff0000ul) << 24) \
+ | (((x) & 0x000000000000ff00ul) << 40) \
+ | (((x) & 0x00000000000000fful) << 56)))
+
# define __bswap_64(x) \
(__extension__ \
({ register unsigned long int __v, __x = (x); \
@@ -97,9 +98,19 @@ __bswap_32 (unsigned int __bsx)
__asm__ __volatile__ ("mux1 %0 = %1, @rev ;;" \
: "=r" (__v) \
: "r" ((unsigned long int) (__x))); \
- __v; }))
+ __v; }))
#else
+# define __bswap_constant_64(x) \
+ ((((x) & 0xff00000000000000ul) >> 56) \
+ | (((x) & 0x00ff000000000000ul) >> 40) \
+ | (((x) & 0x0000ff0000000000ul) >> 24) \
+ | (((x) & 0x000000ff00000000ul) >> 8) \
+ | (((x) & 0x00000000ff000000ul) << 8) \
+ | (((x) & 0x0000000000ff0000ul) << 24) \
+ | (((x) & 0x000000000000ff00ul) << 40) \
+ | (((x) & 0x00000000000000fful) << 56))
+
static __inline unsigned long int
__bswap_64 (unsigned long int __bsx)
{
diff --git a/libc/sysdeps/s390/bits/byteswap.h b/libc/sysdeps/s390/bits/byteswap.h
index 4bfd5fa06..0e0346bba 100644
--- a/libc/sysdeps/s390/bits/byteswap.h
+++ b/libc/sysdeps/s390/bits/byteswap.h
@@ -1,5 +1,5 @@
/* Macros to swap the order of bytes in integer values. s390 version.
- Copyright (C) 2000, 2001, 2002, 2003, 2008 Free Software Foundation, Inc.
+ Copyright (C) 2000-2003, 2008, 2011 Free Software Foundation, Inc.
Contributed by Martin Schwidefsky (schwidefsky@de.ibm.com).
This file is part of the GNU C Library.
@@ -35,31 +35,31 @@
# if __WORDSIZE == 64
# define __bswap_16(x) \
(__extension__ \
- ({ unsigned short int __v, __x = (x); \
+ ({ unsigned short int __v, __x = (x); \
if (__builtin_constant_p (x)) \
__v = __bswap_constant_16 (__x); \
else { \
- unsigned short int __tmp = (unsigned short int) (__x); \
- __asm__ __volatile__ ( \
- "lrvh %0,%1" \
- : "=&d" (__v) : "m" (__tmp) ); \
- } \
+ unsigned short int __tmp = (unsigned short int) (__x); \
+ __asm__ __volatile__ ( \
+ "lrvh %0,%1" \
+ : "=&d" (__v) : "m" (__tmp) ); \
+ } \
__v; }))
# else
# define __bswap_16(x) \
(__extension__ \
- ({ unsigned short int __v, __x = (x); \
+ ({ unsigned short int __v, __x = (x); \
if (__builtin_constant_p (x)) \
__v = __bswap_constant_16 (__x); \
else { \
- unsigned short int __tmp = (unsigned short int) (__x); \
- __asm__ __volatile__ ( \
- "sr %0,%0\n" \
- "la 1,%1\n" \
- "icm %0,2,1(1)\n" \
- "ic %0,0(1)" \
- : "=&d" (__v) : "m" (__tmp) : "1"); \
- } \
+ unsigned short int __tmp = (unsigned short int) (__x); \
+ __asm__ __volatile__ ( \
+ "sr %0,%0\n" \
+ "la 1,%1\n" \
+ "icm %0,2,1(1)\n" \
+ "ic %0,0(1)" \
+ : "=&d" (__v) : "m" (__tmp) : "1"); \
+ } \
__v; }))
# endif
#else
@@ -80,32 +80,32 @@ __bswap_16 (unsigned short int __bsx)
# if __WORDSIZE == 64
# define __bswap_32(x) \
(__extension__ \
- ({ unsigned int __v, __x = (x); \
+ ({ unsigned int __v, __x = (x); \
if (__builtin_constant_p (x)) \
__v = __bswap_constant_32 (__x); \
else { \
- unsigned int __tmp = (unsigned int) (__x); \
- __asm__ __volatile__ ( \
- "lrv %0,%1" \
- : "=&d" (__v) : "m" (__tmp)); \
- } \
+ unsigned int __tmp = (unsigned int) (__x); \
+ __asm__ __volatile__ ( \
+ "lrv %0,%1" \
+ : "=&d" (__v) : "m" (__tmp)); \
+ } \
__v; }))
# else
# define __bswap_32(x) \
(__extension__ \
- ({ unsigned int __v, __x = (x); \
+ ({ unsigned int __v, __x = (x); \
if (__builtin_constant_p (x)) \
__v = __bswap_constant_32 (__x); \
else { \
- unsigned int __tmp = (unsigned int) (__x); \
- __asm__ __volatile__ ( \
- "la 1,%1\n" \
- "icm %0,8,3(1)\n" \
- "icm %0,4,2(1)\n" \
- "icm %0,2,1(1)\n" \
- "ic %0,0(1)" \
- : "=&d" (__v) : "m" (__tmp) : "1"); \
- } \
+ unsigned int __tmp = (unsigned int) (__x); \
+ __asm__ __volatile__ ( \
+ "la 1,%1\n" \
+ "icm %0,8,3(1)\n" \
+ "icm %0,4,2(1)\n" \
+ "icm %0,2,1(1)\n" \
+ "ic %0,0(1)" \
+ : "=&d" (__v) : "m" (__tmp) : "1"); \
+ } \
__v; }))
# endif
#else
@@ -117,37 +117,51 @@ __bswap_32 (unsigned int __bsx)
#endif
/* Swap bytes in 64 bit value. */
-#define __bswap_constant_64(x) \
- ((((x)&0xff00000000000000) >> 56) | (((x)&0x00ff000000000000) >> 40) | \
- (((x)&0x0000ff0000000000) >> 24) | (((x)&0x000000ff00000000) >> 8) | \
- (((x)&0x00000000ff000000) << 8) | (((x)&0x0000000000ff0000) << 24) | \
- (((x)&0x000000000000ff00) << 40) | (((x)&0x00000000000000ff) << 56))
-
#if defined __GNUC__ && __GNUC__ >= 2
+# define __bswap_constant_64(x) \
+ (__extension__ ((((x) & 0xff00000000000000ul) >> 56) \
+ | (((x) & 0x00ff000000000000ul) >> 40) \
+ | (((x) & 0x0000ff0000000000ul) >> 24) \
+ | (((x) & 0x000000ff00000000ul) >> 8) \
+ | (((x) & 0x00000000ff000000ul) << 8) \
+ | (((x) & 0x0000000000ff0000ul) << 24) \
+ | (((x) & 0x000000000000ff00ul) << 40) \
+ | (((x) & 0x00000000000000fful) << 56)))
+
# if __WORDSIZE == 64
# define __bswap_64(x) \
(__extension__ \
- ({ unsigned long __w, __x = (x); \
+ ({ unsigned long __w, __x = (x); \
if (__builtin_constant_p (x)) \
__w = __bswap_constant_64 (__x); \
else { \
- unsigned long __tmp = (unsigned long) (__x); \
- __asm__ __volatile__ ( \
- "lrvg %0,%1" \
- : "=&d" (__w) : "m" (__tmp)); \
- } \
+ unsigned long __tmp = (unsigned long) (__x); \
+ __asm__ __volatile__ ( \
+ "lrvg %0,%1" \
+ : "=&d" (__w) : "m" (__tmp)); \
+ } \
__w; }))
# else
# define __bswap_64(x) \
__extension__ \
({ union { unsigned long long int __ll; \
- unsigned long int __l[2]; } __w, __r; \
- __w.__ll = (x); \
- __r.__l[0] = __bswap_32 (__w.__l[1]); \
- __r.__l[1] = __bswap_32 (__w.__l[0]); \
- __r.__ll; })
+ unsigned long int __l[2]; } __w, __r; \
+ __w.__ll = (x); \
+ __r.__l[0] = __bswap_32 (__w.__l[1]); \
+ __r.__l[1] = __bswap_32 (__w.__l[0]); \
+ __r.__ll; })
# endif
#else
+# define __bswap_constant_64(x) \
+ ((((x) & 0xff00000000000000ul) >> 56) \
+ | (((x) & 0x00ff000000000000ul) >> 40) \
+ | (((x) & 0x0000ff0000000000ul) >> 24) \
+ | (((x) & 0x000000ff00000000ul) >> 8) \
+ | (((x) & 0x00000000ff000000ul) << 8) \
+ | (((x) & 0x0000000000ff0000ul) << 24) \
+ | (((x) & 0x000000000000ff00ul) << 40) \
+ | (((x) & 0x00000000000000fful) << 56))
+
static __inline unsigned long long int
__bswap_64 (unsigned long long int __bsx)
{
diff --git a/libc/sysdeps/x86_64/bits/byteswap.h b/libc/sysdeps/x86_64/bits/byteswap.h
index e350fb806..c6db93c41 100644
--- a/libc/sysdeps/x86_64/bits/byteswap.h
+++ b/libc/sysdeps/x86_64/bits/byteswap.h
@@ -1,5 +1,5 @@
/* Macros to swap the order of bytes in integer values.
- Copyright (C) 1997, 1998, 2000, 2002, 2003, 2007, 2008, 2010
+ Copyright (C) 1997, 1998, 2000, 2002, 2003, 2007, 2008, 2010, 2011
Free Software Foundation, Inc.
This file is part of the GNU C Library.
@@ -99,14 +99,14 @@
#if defined __GNUC__ && __GNUC__ >= 2
/* Swap bytes in 64 bit value. */
# define __bswap_constant_64(x) \
- ((((x) & 0xff00000000000000ull) >> 56) \
- | (((x) & 0x00ff000000000000ull) >> 40) \
- | (((x) & 0x0000ff0000000000ull) >> 24) \
- | (((x) & 0x000000ff00000000ull) >> 8) \
- | (((x) & 0x00000000ff000000ull) << 8) \
- | (((x) & 0x0000000000ff0000ull) << 24) \
- | (((x) & 0x000000000000ff00ull) << 40) \
- | (((x) & 0x00000000000000ffull) << 56))
+ (__extension__ ((((x) & 0xff00000000000000ull) >> 56) \
+ | (((x) & 0x00ff000000000000ull) >> 40) \
+ | (((x) & 0x0000ff0000000000ull) >> 24) \
+ | (((x) & 0x000000ff00000000ull) >> 8) \
+ | (((x) & 0x00000000ff000000ull) << 8) \
+ | (((x) & 0x0000000000ff0000ull) << 24) \
+ | (((x) & 0x000000000000ff00ull) << 40) \
+ | (((x) & 0x00000000000000ffull) << 56)))
# if __WORDSIZE == 64
# define __bswap_64(x) \
diff --git a/libc/sysdeps/x86_64/dl-machine.h b/libc/sysdeps/x86_64/dl-machine.h
index 1068af6bb..a8fbc1625 100644
--- a/libc/sysdeps/x86_64/dl-machine.h
+++ b/libc/sysdeps/x86_64/dl-machine.h
@@ -98,7 +98,7 @@ elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
/* The GOT entries for functions in the PLT have not yet been filled
in. Their initial contents will arrange when called to push an
offset into the .rel.plt section, push _GLOBAL_OFFSET_TABLE_[1],
- and then jump to _GLOBAL_OFFSET_TABLE[2]. */
+ and then jump to _GLOBAL_OFFSET_TABLE_[2]. */
got = (Elf64_Addr *) D_PTR (l, l_info[DT_PLTGOT]);
/* If a library is prelinked but we have to relocate anyway,
we have to be able to undo the prelinking of .got.plt.
@@ -214,7 +214,7 @@ _dl_start_user:\n\
/* The x86-64 never uses Elf64_Rel relocations. */
#define ELF_MACHINE_NO_REL 1
-/* We define an initialization functions. This is called very early in
+/* We define an initialization function. This is called very early in
_dl_sysdep_start. */
#define DL_PLATFORM_INIT dl_platform_init ()
@@ -234,8 +234,8 @@ elf_machine_fixup_plt (struct link_map *map, lookup_t t,
return *reloc_addr = value;
}
-/* Return the final value of a plt relocation. On x86-64 the
- JUMP_SLOT relocation ignores the addend. */
+/* Return the final value of a PLT relocation. On x86-64 the
+ JUMP_SLOT relocation ignores the addend. */
static inline Elf64_Addr
elf_machine_plt_value (struct link_map *map, const Elf64_Rela *reloc,
Elf64_Addr value)
diff --git a/libc/sysdeps/x86_64/multiarch/strcpy-ssse3.S b/libc/sysdeps/x86_64/multiarch/strcpy-ssse3.S
index c4ec54cd2..b1047652d 100644
--- a/libc/sysdeps/x86_64/multiarch/strcpy-ssse3.S
+++ b/libc/sysdeps/x86_64/multiarch/strcpy-ssse3.S
@@ -29,6 +29,7 @@
.section .text.ssse3,"ax",@progbits
ENTRY (STRCPY)
+
mov %rsi, %rcx
# ifdef USE_AS_STRNCPY
mov %rdx, %r8
@@ -39,7 +40,7 @@ ENTRY (STRCPY)
jz L(Exit0)
cmp $8, %r8
jbe L(StrncpyExit8Bytes)
-# endif
+# endif
cmpb $0, (%rcx)
jz L(Exit1)
cmpb $0, 1(%rcx)
@@ -56,10 +57,10 @@ ENTRY (STRCPY)
jz L(Exit7)
cmpb $0, 7(%rcx)
jz L(Exit8)
-# ifdef USE_AS_STRNCPY
+# ifdef USE_AS_STRNCPY
cmp $16, %r8
jb L(StrncpyExit15Bytes)
-# endif
+# endif
cmpb $0, 8(%rcx)
jz L(Exit9)
cmpb $0, 9(%rcx)
@@ -74,10 +75,10 @@ ENTRY (STRCPY)
jz L(Exit14)
cmpb $0, 14(%rcx)
jz L(Exit15)
-# ifdef USE_AS_STRNCPY
+# ifdef USE_AS_STRNCPY
cmp $16, %r8
je L(Exit16)
-# endif
+# endif
cmpb $0, 15(%rcx)
jz L(Exit16)
# endif
@@ -87,25 +88,15 @@ ENTRY (STRCPY)
sub $16, %r8
and $0xf, %rsi
-/* add 16 bytes rcx_shift to r8 */
+/* add 16 bytes rcx_offset to r8 */
+
add %rsi, %r8
# endif
lea 16(%rcx), %rsi
-/* Now:
- rsi = alignment_16(rcx) + rcx_shift + 16;
- rcx_shift = rcx - alignment_16(rcx)
-*/
and $-16, %rsi
-/* Now:
- rsi = alignment_16(rcx) + 16
-*/
pxor %xmm0, %xmm0
mov (%rcx), %r9
mov %r9, (%rdx)
-/*
- look if there is zero symbol in next 16 bytes of string
- from rsi to rsi + 15 and form mask in xmm0
-*/
pcmpeqb (%rsi), %xmm0
mov 8(%rcx), %r9
mov %r9, 8(%rdx)
@@ -115,10 +106,6 @@ ENTRY (STRCPY)
pmovmskb %xmm0, %rax
sub %rcx, %rsi
-/* rsi = 16 - rcx_shift */
-
-/* rax = 0: there isn't end of string from position rsi to rsi+15 */
-
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(CopyFrom1To16BytesCase2OrCase3)
@@ -128,17 +115,9 @@ ENTRY (STRCPY)
mov %rdx, %rax
lea 16(%rdx), %rdx
-/* Now:
- rdx = rdx + 16 = alignment_16(rdx) + rdx_shift + 16
-*/
and $-16, %rdx
-
-/* Now: rdx = alignment_16(rdx) + 16 */
-
sub %rdx, %rax
-/* Now: rax = rdx_shift - 16 */
-
# ifdef USE_AS_STRNCPY
add %rax, %rsi
lea -1(%rsi), %rsi
@@ -150,22 +129,11 @@ ENTRY (STRCPY)
L(ContinueCopy):
# endif
sub %rax, %rcx
-/* Now:
- case rcx_shift >= rdx_shift:
- rcx = alignment_16(rcx) + (rcx_shift - rdx_shift) + 16
- case rcx_shift < rdx_shift:
- rcx = alignment_16(rcx) + (16 + rcx_shift - rdx_shift)
-*/
mov %rcx, %rax
and $0xf, %rax
-/* Now:
- case rcx_shift >= rdx_shift: rax = rcx_shift - rdx_shift
- case rcx_shift < rdx_shift: rax = (16 + rcx_shift - rdx_shift)
- rax can be 0, 1, ..., 15
-*/
mov $0, %rsi
-/* case: rcx_shift == rdx_shift */
+/* case: rcx_offset == rdx_offset */
jz L(Align16Both)
@@ -282,10 +250,11 @@ L(Align16Both):
sub %rcx, %rax
sub %rax, %rdx
# ifdef USE_AS_STRNCPY
- lea 48+64(%r8, %rax), %r8
+ lea 112(%r8, %rax), %r8
# endif
mov $-0x40, %rsi
+ .p2align 4
L(Aligned64Loop):
movaps (%rcx), %xmm2
movaps %xmm2, %xmm4
@@ -366,7 +335,6 @@ L(Shl1Start):
jnz L(Shl1LoopExit)
palignr $1, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%rdx)
movaps 31(%rcx), %xmm2
@@ -374,7 +342,7 @@ L(Shl1Start):
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
- movaps %xmm2, %xmm3
+ movaps %xmm2, %xmm1
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit1Case2OrCase3)
@@ -382,10 +350,9 @@ L(Shl1Start):
test %rax, %rax
jnz L(Shl1LoopExit)
- palignr $1, %xmm1, %xmm2
+ palignr $1, %xmm3, %xmm2
movaps %xmm2, (%rdx)
movaps 31(%rcx), %xmm2
- movaps %xmm3, %xmm1
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
@@ -400,7 +367,6 @@ L(Shl1Start):
jnz L(Shl1LoopExit)
palignr $1, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%rdx)
movaps 31(%rcx), %xmm2
@@ -408,7 +374,6 @@ L(Shl1Start):
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
- movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit1Case2OrCase3)
@@ -416,8 +381,7 @@ L(Shl1Start):
test %rax, %rax
jnz L(Shl1LoopExit)
- palignr $1, %xmm1, %xmm2
- movaps %xmm3, %xmm1
+ palignr $1, %xmm3, %xmm2
movaps %xmm2, (%rdx)
lea 31(%rcx), %rcx
lea 16(%rdx), %rdx
@@ -432,6 +396,8 @@ L(Shl1Start):
# endif
movaps -1(%rcx), %xmm1
+/* 64 bytes loop */
+ .p2align 4
L(Shl1LoopStart):
movaps 15(%rcx), %xmm2
movaps 31(%rcx), %xmm3
@@ -465,11 +431,9 @@ L(Shl1LoopStart):
jmp L(Shl1LoopStart)
L(Shl1LoopExit):
- movaps (%rdx), %xmm6
- psrldq $15, %xmm6
+ movdqu -1(%rcx), %xmm1
mov $15, %rsi
- palignr $1, %xmm1, %xmm6
- movaps %xmm6, (%rdx)
+ movdqu %xmm1, -1(%rdx)
jmp L(CopyFrom1To16Bytes)
.p2align 4
@@ -488,7 +452,6 @@ L(Shl2Start):
jnz L(Shl2LoopExit)
palignr $2, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%rdx)
movaps 30(%rcx), %xmm2
@@ -496,7 +459,7 @@ L(Shl2Start):
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
- movaps %xmm2, %xmm3
+ movaps %xmm2, %xmm1
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit2Case2OrCase3)
@@ -504,10 +467,9 @@ L(Shl2Start):
test %rax, %rax
jnz L(Shl2LoopExit)
- palignr $2, %xmm1, %xmm2
+ palignr $2, %xmm3, %xmm2
movaps %xmm2, (%rdx)
movaps 30(%rcx), %xmm2
- movaps %xmm3, %xmm1
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
@@ -522,7 +484,6 @@ L(Shl2Start):
jnz L(Shl2LoopExit)
palignr $2, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%rdx)
movaps 30(%rcx), %xmm2
@@ -530,7 +491,6 @@ L(Shl2Start):
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
- movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit2Case2OrCase3)
@@ -538,8 +498,7 @@ L(Shl2Start):
test %rax, %rax
jnz L(Shl2LoopExit)
- palignr $2, %xmm1, %xmm2
- movaps %xmm3, %xmm1
+ palignr $2, %xmm3, %xmm2
movaps %xmm2, (%rdx)
lea 30(%rcx), %rcx
lea 16(%rdx), %rdx
@@ -554,6 +513,8 @@ L(Shl2Start):
# endif
movaps -2(%rcx), %xmm1
+/* 64 bytes loop */
+ .p2align 4
L(Shl2LoopStart):
movaps 14(%rcx), %xmm2
movaps 30(%rcx), %xmm3
@@ -587,11 +548,9 @@ L(Shl2LoopStart):
jmp L(Shl2LoopStart)
L(Shl2LoopExit):
- movaps (%rdx), %xmm6
- psrldq $14, %xmm6
+ movdqu -2(%rcx), %xmm1
mov $14, %rsi
- palignr $2, %xmm1, %xmm6
- movaps %xmm6, (%rdx)
+ movdqu %xmm1, -2(%rdx)
jmp L(CopyFrom1To16Bytes)
.p2align 4
@@ -610,7 +569,6 @@ L(Shl3Start):
jnz L(Shl3LoopExit)
palignr $3, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%rdx)
movaps 29(%rcx), %xmm2
@@ -618,7 +576,7 @@ L(Shl3Start):
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
- movaps %xmm2, %xmm3
+ movaps %xmm2, %xmm1
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit3Case2OrCase3)
@@ -626,10 +584,9 @@ L(Shl3Start):
test %rax, %rax
jnz L(Shl3LoopExit)
- palignr $3, %xmm1, %xmm2
+ palignr $3, %xmm3, %xmm2
movaps %xmm2, (%rdx)
movaps 29(%rcx), %xmm2
- movaps %xmm3, %xmm1
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
@@ -644,7 +601,6 @@ L(Shl3Start):
jnz L(Shl3LoopExit)
palignr $3, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%rdx)
movaps 29(%rcx), %xmm2
@@ -652,7 +608,6 @@ L(Shl3Start):
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
- movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit3Case2OrCase3)
@@ -660,8 +615,7 @@ L(Shl3Start):
test %rax, %rax
jnz L(Shl3LoopExit)
- palignr $3, %xmm1, %xmm2
- movaps %xmm3, %xmm1
+ palignr $3, %xmm3, %xmm2
movaps %xmm2, (%rdx)
lea 29(%rcx), %rcx
lea 16(%rdx), %rdx
@@ -676,6 +630,8 @@ L(Shl3Start):
# endif
movaps -3(%rcx), %xmm1
+/* 64 bytes loop */
+ .p2align 4
L(Shl3LoopStart):
movaps 13(%rcx), %xmm2
movaps 29(%rcx), %xmm3
@@ -709,11 +665,9 @@ L(Shl3LoopStart):
jmp L(Shl3LoopStart)
L(Shl3LoopExit):
- movaps (%rdx), %xmm6
- psrldq $13, %xmm6
+ movdqu -3(%rcx), %xmm1
mov $13, %rsi
- palignr $3, %xmm1, %xmm6
- movaps %xmm6, (%rdx)
+ movdqu %xmm1, -3(%rdx)
jmp L(CopyFrom1To16Bytes)
.p2align 4
@@ -732,7 +686,6 @@ L(Shl4Start):
jnz L(Shl4LoopExit)
palignr $4, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%rdx)
movaps 28(%rcx), %xmm2
@@ -740,7 +693,7 @@ L(Shl4Start):
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
- movaps %xmm2, %xmm3
+ movaps %xmm2, %xmm1
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit4Case2OrCase3)
@@ -748,10 +701,9 @@ L(Shl4Start):
test %rax, %rax
jnz L(Shl4LoopExit)
- palignr $4, %xmm1, %xmm2
+ palignr $4, %xmm3, %xmm2
movaps %xmm2, (%rdx)
movaps 28(%rcx), %xmm2
- movaps %xmm3, %xmm1
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
@@ -766,7 +718,6 @@ L(Shl4Start):
jnz L(Shl4LoopExit)
palignr $4, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%rdx)
movaps 28(%rcx), %xmm2
@@ -774,7 +725,6 @@ L(Shl4Start):
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
- movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit4Case2OrCase3)
@@ -782,8 +732,7 @@ L(Shl4Start):
test %rax, %rax
jnz L(Shl4LoopExit)
- palignr $4, %xmm1, %xmm2
- movaps %xmm3, %xmm1
+ palignr $4, %xmm3, %xmm2
movaps %xmm2, (%rdx)
lea 28(%rcx), %rcx
lea 16(%rdx), %rdx
@@ -798,6 +747,8 @@ L(Shl4Start):
# endif
movaps -4(%rcx), %xmm1
+/* 64 bytes loop */
+ .p2align 4
L(Shl4LoopStart):
movaps 12(%rcx), %xmm2
movaps 28(%rcx), %xmm3
@@ -831,11 +782,9 @@ L(Shl4LoopStart):
jmp L(Shl4LoopStart)
L(Shl4LoopExit):
- movaps (%rdx), %xmm6
- psrldq $12, %xmm6
+ movdqu -4(%rcx), %xmm1
mov $12, %rsi
- palignr $4, %xmm1, %xmm6
- movaps %xmm6, (%rdx)
+ movdqu %xmm1, -4(%rdx)
jmp L(CopyFrom1To16Bytes)
.p2align 4
@@ -854,7 +803,6 @@ L(Shl5Start):
jnz L(Shl5LoopExit)
palignr $5, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%rdx)
movaps 27(%rcx), %xmm2
@@ -862,7 +810,7 @@ L(Shl5Start):
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
- movaps %xmm2, %xmm3
+ movaps %xmm2, %xmm1
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit5Case2OrCase3)
@@ -870,10 +818,9 @@ L(Shl5Start):
test %rax, %rax
jnz L(Shl5LoopExit)
- palignr $5, %xmm1, %xmm2
+ palignr $5, %xmm3, %xmm2
movaps %xmm2, (%rdx)
movaps 27(%rcx), %xmm2
- movaps %xmm3, %xmm1
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
@@ -888,7 +835,6 @@ L(Shl5Start):
jnz L(Shl5LoopExit)
palignr $5, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%rdx)
movaps 27(%rcx), %xmm2
@@ -896,7 +842,6 @@ L(Shl5Start):
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
- movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit5Case2OrCase3)
@@ -904,8 +849,7 @@ L(Shl5Start):
test %rax, %rax
jnz L(Shl5LoopExit)
- palignr $5, %xmm1, %xmm2
- movaps %xmm3, %xmm1
+ palignr $5, %xmm3, %xmm2
movaps %xmm2, (%rdx)
lea 27(%rcx), %rcx
lea 16(%rdx), %rdx
@@ -920,6 +864,8 @@ L(Shl5Start):
# endif
movaps -5(%rcx), %xmm1
+/* 64 bytes loop */
+ .p2align 4
L(Shl5LoopStart):
movaps 11(%rcx), %xmm2
movaps 27(%rcx), %xmm3
@@ -953,11 +899,9 @@ L(Shl5LoopStart):
jmp L(Shl5LoopStart)
L(Shl5LoopExit):
- movaps (%rdx), %xmm6
- psrldq $11, %xmm6
+ movdqu -5(%rcx), %xmm1
mov $11, %rsi
- palignr $5, %xmm1, %xmm6
- movaps %xmm6, (%rdx)
+ movdqu %xmm1, -5(%rdx)
jmp L(CopyFrom1To16Bytes)
.p2align 4
@@ -976,7 +920,6 @@ L(Shl6Start):
jnz L(Shl6LoopExit)
palignr $6, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%rdx)
movaps 26(%rcx), %xmm2
@@ -984,7 +927,7 @@ L(Shl6Start):
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
- movaps %xmm2, %xmm3
+ movaps %xmm2, %xmm1
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit6Case2OrCase3)
@@ -992,10 +935,9 @@ L(Shl6Start):
test %rax, %rax
jnz L(Shl6LoopExit)
- palignr $6, %xmm1, %xmm2
+ palignr $6, %xmm3, %xmm2
movaps %xmm2, (%rdx)
movaps 26(%rcx), %xmm2
- movaps %xmm3, %xmm1
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
@@ -1010,7 +952,6 @@ L(Shl6Start):
jnz L(Shl6LoopExit)
palignr $6, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%rdx)
movaps 26(%rcx), %xmm2
@@ -1018,7 +959,6 @@ L(Shl6Start):
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
- movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit6Case2OrCase3)
@@ -1026,8 +966,7 @@ L(Shl6Start):
test %rax, %rax
jnz L(Shl6LoopExit)
- palignr $6, %xmm1, %xmm2
- movaps %xmm3, %xmm1
+ palignr $6, %xmm3, %xmm2
movaps %xmm2, (%rdx)
lea 26(%rcx), %rcx
lea 16(%rdx), %rdx
@@ -1042,6 +981,8 @@ L(Shl6Start):
# endif
movaps -6(%rcx), %xmm1
+/* 64 bytes loop */
+ .p2align 4
L(Shl6LoopStart):
movaps 10(%rcx), %xmm2
movaps 26(%rcx), %xmm3
@@ -1075,11 +1016,11 @@ L(Shl6LoopStart):
jmp L(Shl6LoopStart)
L(Shl6LoopExit):
- movaps (%rdx), %xmm6
- psrldq $10, %xmm6
+ mov (%rcx), %r9
+ mov 6(%rcx), %esi
+ mov %r9, (%rdx)
+ mov %esi, 6(%rdx)
mov $10, %rsi
- palignr $6, %xmm1, %xmm6
- movaps %xmm6, (%rdx)
jmp L(CopyFrom1To16Bytes)
.p2align 4
@@ -1098,7 +1039,6 @@ L(Shl7Start):
jnz L(Shl7LoopExit)
palignr $7, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%rdx)
movaps 25(%rcx), %xmm2
@@ -1106,7 +1046,7 @@ L(Shl7Start):
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
- movaps %xmm2, %xmm3
+ movaps %xmm2, %xmm1
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit7Case2OrCase3)
@@ -1114,10 +1054,9 @@ L(Shl7Start):
test %rax, %rax
jnz L(Shl7LoopExit)
- palignr $7, %xmm1, %xmm2
+ palignr $7, %xmm3, %xmm2
movaps %xmm2, (%rdx)
movaps 25(%rcx), %xmm2
- movaps %xmm3, %xmm1
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
@@ -1132,7 +1071,6 @@ L(Shl7Start):
jnz L(Shl7LoopExit)
palignr $7, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%rdx)
movaps 25(%rcx), %xmm2
@@ -1140,7 +1078,6 @@ L(Shl7Start):
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
- movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit7Case2OrCase3)
@@ -1148,8 +1085,7 @@ L(Shl7Start):
test %rax, %rax
jnz L(Shl7LoopExit)
- palignr $7, %xmm1, %xmm2
- movaps %xmm3, %xmm1
+ palignr $7, %xmm3, %xmm2
movaps %xmm2, (%rdx)
lea 25(%rcx), %rcx
lea 16(%rdx), %rdx
@@ -1164,6 +1100,8 @@ L(Shl7Start):
# endif
movaps -7(%rcx), %xmm1
+/* 64 bytes loop */
+ .p2align 4
L(Shl7LoopStart):
movaps 9(%rcx), %xmm2
movaps 25(%rcx), %xmm3
@@ -1197,11 +1135,11 @@ L(Shl7LoopStart):
jmp L(Shl7LoopStart)
L(Shl7LoopExit):
- movaps (%rdx), %xmm6
- psrldq $9, %xmm6
+ mov (%rcx), %r9
+ mov 5(%rcx), %esi
+ mov %r9, (%rdx)
+ mov %esi, 5(%rdx)
mov $9, %rsi
- palignr $7, %xmm1, %xmm6
- movaps %xmm6, (%rdx)
jmp L(CopyFrom1To16Bytes)
.p2align 4
@@ -1220,7 +1158,6 @@ L(Shl8Start):
jnz L(Shl8LoopExit)
palignr $8, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%rdx)
movaps 24(%rcx), %xmm2
@@ -1228,7 +1165,7 @@ L(Shl8Start):
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
- movaps %xmm2, %xmm3
+ movaps %xmm2, %xmm1
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit8Case2OrCase3)
@@ -1236,10 +1173,9 @@ L(Shl8Start):
test %rax, %rax
jnz L(Shl8LoopExit)
- palignr $8, %xmm1, %xmm2
+ palignr $8, %xmm3, %xmm2
movaps %xmm2, (%rdx)
movaps 24(%rcx), %xmm2
- movaps %xmm3, %xmm1
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
@@ -1254,7 +1190,6 @@ L(Shl8Start):
jnz L(Shl8LoopExit)
palignr $8, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%rdx)
movaps 24(%rcx), %xmm2
@@ -1262,7 +1197,6 @@ L(Shl8Start):
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
- movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit8Case2OrCase3)
@@ -1270,8 +1204,7 @@ L(Shl8Start):
test %rax, %rax
jnz L(Shl8LoopExit)
- palignr $8, %xmm1, %xmm2
- movaps %xmm3, %xmm1
+ palignr $8, %xmm3, %xmm2
movaps %xmm2, (%rdx)
lea 24(%rcx), %rcx
lea 16(%rdx), %rdx
@@ -1286,6 +1219,8 @@ L(Shl8Start):
# endif
movaps -8(%rcx), %xmm1
+/* 64 bytes loop */
+ .p2align 4
L(Shl8LoopStart):
movaps 8(%rcx), %xmm2
movaps 24(%rcx), %xmm3
@@ -1319,11 +1254,9 @@ L(Shl8LoopStart):
jmp L(Shl8LoopStart)
L(Shl8LoopExit):
- movaps (%rdx), %xmm6
- psrldq $8, %xmm6
+ mov (%rcx), %r9
mov $8, %rsi
- palignr $8, %xmm1, %xmm6
- movaps %xmm6, (%rdx)
+ mov %r9, (%rdx)
jmp L(CopyFrom1To16Bytes)
.p2align 4
@@ -1342,7 +1275,6 @@ L(Shl9Start):
jnz L(Shl9LoopExit)
palignr $9, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%rdx)
movaps 23(%rcx), %xmm2
@@ -1350,7 +1282,7 @@ L(Shl9Start):
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
- movaps %xmm2, %xmm3
+ movaps %xmm2, %xmm1
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit9Case2OrCase3)
@@ -1358,10 +1290,9 @@ L(Shl9Start):
test %rax, %rax
jnz L(Shl9LoopExit)
- palignr $9, %xmm1, %xmm2
+ palignr $9, %xmm3, %xmm2
movaps %xmm2, (%rdx)
movaps 23(%rcx), %xmm2
- movaps %xmm3, %xmm1
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
@@ -1376,7 +1307,6 @@ L(Shl9Start):
jnz L(Shl9LoopExit)
palignr $9, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%rdx)
movaps 23(%rcx), %xmm2
@@ -1384,7 +1314,6 @@ L(Shl9Start):
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
- movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit9Case2OrCase3)
@@ -1392,8 +1321,7 @@ L(Shl9Start):
test %rax, %rax
jnz L(Shl9LoopExit)
- palignr $9, %xmm1, %xmm2
- movaps %xmm3, %xmm1
+ palignr $9, %xmm3, %xmm2
movaps %xmm2, (%rdx)
lea 23(%rcx), %rcx
lea 16(%rdx), %rdx
@@ -1408,6 +1336,8 @@ L(Shl9Start):
# endif
movaps -9(%rcx), %xmm1
+/* 64 bytes loop */
+ .p2align 4
L(Shl9LoopStart):
movaps 7(%rcx), %xmm2
movaps 23(%rcx), %xmm3
@@ -1441,11 +1371,9 @@ L(Shl9LoopStart):
jmp L(Shl9LoopStart)
L(Shl9LoopExit):
- movaps (%rdx), %xmm6
- psrldq $7, %xmm6
+ mov -1(%rcx), %r9
mov $7, %rsi
- palignr $9, %xmm1, %xmm6
- movaps %xmm6, (%rdx)
+ mov %r9, -1(%rdx)
jmp L(CopyFrom1To16Bytes)
.p2align 4
@@ -1464,7 +1392,6 @@ L(Shl10Start):
jnz L(Shl10LoopExit)
palignr $10, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%rdx)
movaps 22(%rcx), %xmm2
@@ -1472,7 +1399,7 @@ L(Shl10Start):
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
- movaps %xmm2, %xmm3
+ movaps %xmm2, %xmm1
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit10Case2OrCase3)
@@ -1480,10 +1407,9 @@ L(Shl10Start):
test %rax, %rax
jnz L(Shl10LoopExit)
- palignr $10, %xmm1, %xmm2
+ palignr $10, %xmm3, %xmm2
movaps %xmm2, (%rdx)
movaps 22(%rcx), %xmm2
- movaps %xmm3, %xmm1
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
@@ -1498,7 +1424,6 @@ L(Shl10Start):
jnz L(Shl10LoopExit)
palignr $10, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%rdx)
movaps 22(%rcx), %xmm2
@@ -1506,7 +1431,6 @@ L(Shl10Start):
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
- movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit10Case2OrCase3)
@@ -1514,8 +1438,7 @@ L(Shl10Start):
test %rax, %rax
jnz L(Shl10LoopExit)
- palignr $10, %xmm1, %xmm2
- movaps %xmm3, %xmm1
+ palignr $10, %xmm3, %xmm2
movaps %xmm2, (%rdx)
lea 22(%rcx), %rcx
lea 16(%rdx), %rdx
@@ -1530,6 +1453,8 @@ L(Shl10Start):
# endif
movaps -10(%rcx), %xmm1
+/* 64 bytes loop */
+ .p2align 4
L(Shl10LoopStart):
movaps 6(%rcx), %xmm2
movaps 22(%rcx), %xmm3
@@ -1563,11 +1488,9 @@ L(Shl10LoopStart):
jmp L(Shl10LoopStart)
L(Shl10LoopExit):
- movaps (%rdx), %xmm6
- psrldq $6, %xmm6
+ mov -2(%rcx), %r9
mov $6, %rsi
- palignr $10, %xmm1, %xmm6
- movaps %xmm6, (%rdx)
+ mov %r9, -2(%rdx)
jmp L(CopyFrom1To16Bytes)
.p2align 4
@@ -1586,7 +1509,6 @@ L(Shl11Start):
jnz L(Shl11LoopExit)
palignr $11, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%rdx)
movaps 21(%rcx), %xmm2
@@ -1594,7 +1516,7 @@ L(Shl11Start):
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
- movaps %xmm2, %xmm3
+ movaps %xmm2, %xmm1
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit11Case2OrCase3)
@@ -1602,10 +1524,9 @@ L(Shl11Start):
test %rax, %rax
jnz L(Shl11LoopExit)
- palignr $11, %xmm1, %xmm2
+ palignr $11, %xmm3, %xmm2
movaps %xmm2, (%rdx)
movaps 21(%rcx), %xmm2
- movaps %xmm3, %xmm1
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
@@ -1620,7 +1541,6 @@ L(Shl11Start):
jnz L(Shl11LoopExit)
palignr $11, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%rdx)
movaps 21(%rcx), %xmm2
@@ -1628,7 +1548,6 @@ L(Shl11Start):
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
- movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit11Case2OrCase3)
@@ -1636,8 +1555,7 @@ L(Shl11Start):
test %rax, %rax
jnz L(Shl11LoopExit)
- palignr $11, %xmm1, %xmm2
- movaps %xmm3, %xmm1
+ palignr $11, %xmm3, %xmm2
movaps %xmm2, (%rdx)
lea 21(%rcx), %rcx
lea 16(%rdx), %rdx
@@ -1652,6 +1570,8 @@ L(Shl11Start):
# endif
movaps -11(%rcx), %xmm1
+/* 64 bytes loop */
+ .p2align 4
L(Shl11LoopStart):
movaps 5(%rcx), %xmm2
movaps 21(%rcx), %xmm3
@@ -1685,11 +1605,9 @@ L(Shl11LoopStart):
jmp L(Shl11LoopStart)
L(Shl11LoopExit):
- movaps (%rdx), %xmm6
- psrldq $5, %xmm6
+ mov -3(%rcx), %r9
mov $5, %rsi
- palignr $11, %xmm1, %xmm6
- movaps %xmm6, (%rdx)
+ mov %r9, -3(%rdx)
jmp L(CopyFrom1To16Bytes)
.p2align 4
@@ -1708,7 +1626,6 @@ L(Shl12Start):
jnz L(Shl12LoopExit)
palignr $12, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%rdx)
movaps 20(%rcx), %xmm2
@@ -1716,7 +1633,7 @@ L(Shl12Start):
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
- movaps %xmm2, %xmm3
+ movaps %xmm2, %xmm1
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit12Case2OrCase3)
@@ -1724,10 +1641,9 @@ L(Shl12Start):
test %rax, %rax
jnz L(Shl12LoopExit)
- palignr $12, %xmm1, %xmm2
+ palignr $12, %xmm3, %xmm2
movaps %xmm2, (%rdx)
movaps 20(%rcx), %xmm2
- movaps %xmm3, %xmm1
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
@@ -1742,7 +1658,6 @@ L(Shl12Start):
jnz L(Shl12LoopExit)
palignr $12, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%rdx)
movaps 20(%rcx), %xmm2
@@ -1750,7 +1665,6 @@ L(Shl12Start):
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
- movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit12Case2OrCase3)
@@ -1758,8 +1672,7 @@ L(Shl12Start):
test %rax, %rax
jnz L(Shl12LoopExit)
- palignr $12, %xmm1, %xmm2
- movaps %xmm3, %xmm1
+ palignr $12, %xmm3, %xmm2
movaps %xmm2, (%rdx)
lea 20(%rcx), %rcx
lea 16(%rdx), %rdx
@@ -1774,6 +1687,8 @@ L(Shl12Start):
# endif
movaps -12(%rcx), %xmm1
+/* 64 bytes loop */
+ .p2align 4
L(Shl12LoopStart):
movaps 4(%rcx), %xmm2
movaps 20(%rcx), %xmm3
@@ -1807,11 +1722,9 @@ L(Shl12LoopStart):
jmp L(Shl12LoopStart)
L(Shl12LoopExit):
- movaps (%rdx), %xmm6
- psrldq $4, %xmm6
+ mov (%rcx), %r9d
mov $4, %rsi
- palignr $12, %xmm1, %xmm6
- movaps %xmm6, (%rdx)
+ mov %r9d, (%rdx)
jmp L(CopyFrom1To16Bytes)
.p2align 4
@@ -1830,7 +1743,6 @@ L(Shl13Start):
jnz L(Shl13LoopExit)
palignr $13, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%rdx)
movaps 19(%rcx), %xmm2
@@ -1838,7 +1750,7 @@ L(Shl13Start):
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
- movaps %xmm2, %xmm3
+ movaps %xmm2, %xmm1
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit13Case2OrCase3)
@@ -1846,10 +1758,9 @@ L(Shl13Start):
test %rax, %rax
jnz L(Shl13LoopExit)
- palignr $13, %xmm1, %xmm2
+ palignr $13, %xmm3, %xmm2
movaps %xmm2, (%rdx)
movaps 19(%rcx), %xmm2
- movaps %xmm3, %xmm1
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
@@ -1864,7 +1775,6 @@ L(Shl13Start):
jnz L(Shl13LoopExit)
palignr $13, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%rdx)
movaps 19(%rcx), %xmm2
@@ -1872,7 +1782,6 @@ L(Shl13Start):
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
- movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit13Case2OrCase3)
@@ -1880,8 +1789,7 @@ L(Shl13Start):
test %rax, %rax
jnz L(Shl13LoopExit)
- palignr $13, %xmm1, %xmm2
- movaps %xmm3, %xmm1
+ palignr $13, %xmm3, %xmm2
movaps %xmm2, (%rdx)
lea 19(%rcx), %rcx
lea 16(%rdx), %rdx
@@ -1896,6 +1804,8 @@ L(Shl13Start):
# endif
movaps -13(%rcx), %xmm1
+/* 64 bytes loop */
+ .p2align 4
L(Shl13LoopStart):
movaps 3(%rcx), %xmm2
movaps 19(%rcx), %xmm3
@@ -1929,11 +1839,9 @@ L(Shl13LoopStart):
jmp L(Shl13LoopStart)
L(Shl13LoopExit):
- movaps (%rdx), %xmm6
- psrldq $3, %xmm6
+ mov -1(%rcx), %r9d
mov $3, %rsi
- palignr $13, %xmm1, %xmm6
- movaps %xmm6, (%rdx)
+ mov %r9d, -1(%rdx)
jmp L(CopyFrom1To16Bytes)
.p2align 4
@@ -1952,7 +1860,6 @@ L(Shl14Start):
jnz L(Shl14LoopExit)
palignr $14, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%rdx)
movaps 18(%rcx), %xmm2
@@ -1960,7 +1867,7 @@ L(Shl14Start):
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
- movaps %xmm2, %xmm3
+ movaps %xmm2, %xmm1
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit14Case2OrCase3)
@@ -1968,10 +1875,9 @@ L(Shl14Start):
test %rax, %rax
jnz L(Shl14LoopExit)
- palignr $14, %xmm1, %xmm2
+ palignr $14, %xmm3, %xmm2
movaps %xmm2, (%rdx)
movaps 18(%rcx), %xmm2
- movaps %xmm3, %xmm1
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
@@ -1986,7 +1892,6 @@ L(Shl14Start):
jnz L(Shl14LoopExit)
palignr $14, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%rdx)
movaps 18(%rcx), %xmm2
@@ -1994,7 +1899,6 @@ L(Shl14Start):
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
- movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit14Case2OrCase3)
@@ -2002,8 +1906,7 @@ L(Shl14Start):
test %rax, %rax
jnz L(Shl14LoopExit)
- palignr $14, %xmm1, %xmm2
- movaps %xmm3, %xmm1
+ palignr $14, %xmm3, %xmm2
movaps %xmm2, (%rdx)
lea 18(%rcx), %rcx
lea 16(%rdx), %rdx
@@ -2018,6 +1921,8 @@ L(Shl14Start):
# endif
movaps -14(%rcx), %xmm1
+/* 64 bytes loop */
+ .p2align 4
L(Shl14LoopStart):
movaps 2(%rcx), %xmm2
movaps 18(%rcx), %xmm3
@@ -2051,11 +1956,9 @@ L(Shl14LoopStart):
jmp L(Shl14LoopStart)
L(Shl14LoopExit):
- movaps (%rdx), %xmm6
- psrldq $2, %xmm6
+ mov -2(%rcx), %r9d
mov $2, %rsi
- palignr $14, %xmm1, %xmm6
- movaps %xmm6, (%rdx)
+ mov %r9d, -2(%rdx)
jmp L(CopyFrom1To16Bytes)
.p2align 4
@@ -2074,7 +1977,6 @@ L(Shl15Start):
jnz L(Shl15LoopExit)
palignr $15, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%rdx)
movaps 17(%rcx), %xmm2
@@ -2082,7 +1984,7 @@ L(Shl15Start):
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
- movaps %xmm2, %xmm3
+ movaps %xmm2, %xmm1
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit15Case2OrCase3)
@@ -2090,10 +1992,9 @@ L(Shl15Start):
test %rax, %rax
jnz L(Shl15LoopExit)
- palignr $15, %xmm1, %xmm2
+ palignr $15, %xmm3, %xmm2
movaps %xmm2, (%rdx)
movaps 17(%rcx), %xmm2
- movaps %xmm3, %xmm1
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
@@ -2108,7 +2009,6 @@ L(Shl15Start):
jnz L(Shl15LoopExit)
palignr $15, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%rdx)
movaps 17(%rcx), %xmm2
@@ -2116,7 +2016,6 @@ L(Shl15Start):
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
- movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit15Case2OrCase3)
@@ -2124,8 +2023,7 @@ L(Shl15Start):
test %rax, %rax
jnz L(Shl15LoopExit)
- palignr $15, %xmm1, %xmm2
- movaps %xmm3, %xmm1
+ palignr $15, %xmm3, %xmm2
movaps %xmm2, (%rdx)
lea 17(%rcx), %rcx
lea 16(%rdx), %rdx
@@ -2140,6 +2038,8 @@ L(Shl15Start):
# endif
movaps -15(%rcx), %xmm1
+/* 64 bytes loop */
+ .p2align 4
L(Shl15LoopStart):
movaps 1(%rcx), %xmm2
movaps 17(%rcx), %xmm3
@@ -2173,16 +2073,15 @@ L(Shl15LoopStart):
jmp L(Shl15LoopStart)
L(Shl15LoopExit):
- movaps (%rdx), %xmm6
- psrldq $1, %xmm6
+ mov -3(%rcx), %r9d
mov $1, %rsi
- palignr $15, %xmm1, %xmm6
- movaps %xmm6, (%rdx)
+ mov %r9d, -3(%rdx)
# ifdef USE_AS_STRCAT
jmp L(CopyFrom1To16Bytes)
# endif
# ifndef USE_AS_STRCAT
+
.p2align 4
L(CopyFrom1To16Bytes):
# ifdef USE_AS_STRNCPY
@@ -2463,7 +2362,7 @@ L(Exit4):
# ifdef USE_AS_STPCPY
cmpb $1, (%rax)
sbb $-1, %rax
-# endif
+# endif
# endif
ret
@@ -2485,7 +2384,7 @@ L(Exit5):
# ifdef USE_AS_STPCPY
cmpb $1, (%rax)
sbb $-1, %rax
-# endif
+# endif
# endif
ret
@@ -2507,7 +2406,7 @@ L(Exit6):
# ifdef USE_AS_STPCPY
cmpb $1, (%rax)
sbb $-1, %rax
-# endif
+# endif
# endif
ret
@@ -2617,7 +2516,7 @@ L(Exit12):
# ifdef USE_AS_STPCPY
cmpb $1, (%rax)
sbb $-1, %rax
-# endif
+# endif
# endif
ret
@@ -2955,11 +2854,10 @@ L(StrncpyExit8Bytes):
ret
# endif
-
# endif
# ifdef USE_AS_STRNCPY
-
+ .p2align 4
L(StrncpyLeaveCase2OrCase3):
test %rax, %rax
jnz L(Aligned64LeaveCase2)
@@ -3014,710 +2912,639 @@ L(Aligned64LeaveCase2):
lea -16(%r8), %r8
jmp L(CopyFrom1To16BytesCase2)
/*--------------------------------------------------*/
+ .p2align 4
L(StrncpyExit1Case2OrCase3):
- movaps (%rdx), %xmm6
- psrldq $15, %xmm6
+ movdqu -1(%rcx), %xmm0
+ movdqu %xmm0, -1(%rdx)
mov $15, %rsi
- palignr $1, %xmm1, %xmm6
- movaps %xmm6, (%rdx)
test %rax, %rax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
+ .p2align 4
L(StrncpyExit2Case2OrCase3):
- movaps (%rdx), %xmm6
- psrldq $14, %xmm6
+ movdqu -2(%rcx), %xmm0
+ movdqu %xmm0, -2(%rdx)
mov $14, %rsi
- palignr $2, %xmm1, %xmm6
- movaps %xmm6, (%rdx)
test %rax, %rax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
+ .p2align 4
L(StrncpyExit3Case2OrCase3):
- movaps (%rdx), %xmm6
- psrldq $13, %xmm6
+ movdqu -3(%rcx), %xmm0
+ movdqu %xmm0, -3(%rdx)
mov $13, %rsi
- palignr $3, %xmm1, %xmm6
- movaps %xmm6, (%rdx)
test %rax, %rax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
+ .p2align 4
L(StrncpyExit4Case2OrCase3):
- movaps (%rdx), %xmm6
- psrldq $12, %xmm6
+ movdqu -4(%rcx), %xmm0
+ movdqu %xmm0, -4(%rdx)
mov $12, %rsi
- palignr $4, %xmm1, %xmm6
- movaps %xmm6, (%rdx)
test %rax, %rax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
+ .p2align 4
L(StrncpyExit5Case2OrCase3):
- movaps (%rdx), %xmm6
- psrldq $11, %xmm6
+ movdqu -5(%rcx), %xmm0
+ movdqu %xmm0, -5(%rdx)
mov $11, %rsi
- palignr $5, %xmm1, %xmm6
- movaps %xmm6, (%rdx)
test %rax, %rax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
+ .p2align 4
L(StrncpyExit6Case2OrCase3):
- movaps (%rdx), %xmm6
- psrldq $10, %xmm6
- mov $10, %rsi
- palignr $6, %xmm1, %xmm6
- movaps %xmm6, (%rdx)
+ mov (%rcx), %rsi
+ mov 6(%rcx), %r9d
+ mov %r9d, 6(%rdx)
+ mov %rsi, (%rdx)
test %rax, %rax
+ mov $10, %rsi
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
+ .p2align 4
L(StrncpyExit7Case2OrCase3):
- movaps (%rdx), %xmm6
- psrldq $9, %xmm6
- mov $9, %rsi
- palignr $7, %xmm1, %xmm6
- movaps %xmm6, (%rdx)
+ mov (%rcx), %rsi
+ mov 5(%rcx), %r9d
+ mov %r9d, 5(%rdx)
+ mov %rsi, (%rdx)
test %rax, %rax
+ mov $9, %rsi
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
+ .p2align 4
L(StrncpyExit8Case2OrCase3):
- movaps (%rdx), %xmm6
- psrldq $8, %xmm6
+ mov (%rcx), %r9
mov $8, %rsi
- palignr $8, %xmm1, %xmm6
- movaps %xmm6, (%rdx)
+ mov %r9, (%rdx)
test %rax, %rax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
+ .p2align 4
L(StrncpyExit9Case2OrCase3):
- movaps (%rdx), %xmm6
- psrldq $7, %xmm6
+ mov -1(%rcx), %r9
mov $7, %rsi
- palignr $9, %xmm1, %xmm6
- movaps %xmm6, (%rdx)
+ mov %r9, -1(%rdx)
test %rax, %rax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
+ .p2align 4
L(StrncpyExit10Case2OrCase3):
- movaps (%rdx), %xmm6
- psrldq $6, %xmm6
+ mov -2(%rcx), %r9
mov $6, %rsi
- palignr $10, %xmm1, %xmm6
- movaps %xmm6, (%rdx)
+ mov %r9, -2(%rdx)
test %rax, %rax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
+ .p2align 4
L(StrncpyExit11Case2OrCase3):
- movaps (%rdx), %xmm6
- psrldq $5, %xmm6
+ mov -3(%rcx), %r9
mov $5, %rsi
- palignr $11, %xmm1, %xmm6
- movaps %xmm6, (%rdx)
+ mov %r9, -3(%rdx)
test %rax, %rax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
+ .p2align 4
L(StrncpyExit12Case2OrCase3):
- movaps (%rdx), %xmm6
- psrldq $4, %xmm6
+ mov (%rcx), %r9d
mov $4, %rsi
- palignr $12, %xmm1, %xmm6
- movaps %xmm6, (%rdx)
+ mov %r9d, (%rdx)
test %rax, %rax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
+ .p2align 4
L(StrncpyExit13Case2OrCase3):
- movaps (%rdx), %xmm6
- psrldq $3, %xmm6
+ mov -1(%rcx), %r9d
mov $3, %rsi
- palignr $13, %xmm1, %xmm6
- movaps %xmm6, (%rdx)
+ mov %r9d, -1(%rdx)
test %rax, %rax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
+ .p2align 4
L(StrncpyExit14Case2OrCase3):
- movaps (%rdx), %xmm6
- psrldq $2, %xmm6
+ mov -2(%rcx), %r9d
mov $2, %rsi
- palignr $14, %xmm1, %xmm6
- movaps %xmm6, (%rdx)
+ mov %r9d, -2(%rdx)
test %rax, %rax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
+ .p2align 4
L(StrncpyExit15Case2OrCase3):
- movaps (%rdx), %xmm6
- psrldq $1, %xmm6
+ mov -3(%rcx), %r9d
mov $1, %rsi
- palignr $15, %xmm1, %xmm6
- movaps %xmm6, (%rdx)
+ mov %r9d, -3(%rdx)
test %rax, %rax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
+ .p2align 4
L(StrncpyLeave1):
movaps %xmm2, %xmm3
add $48, %r8
jle L(StrncpyExit1)
palignr $1, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%rdx)
movaps 31(%rcx), %xmm2
lea 16(%rsi), %rsi
- movaps %xmm2, %xmm3
sub $16, %r8
jbe L(StrncpyExit1)
- palignr $1, %xmm1, %xmm2
+ palignr $1, %xmm3, %xmm2
movaps %xmm2, 16(%rdx)
- movaps 31+16(%rcx), %xmm2
- movaps %xmm3, %xmm1
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit1)
- movaps %xmm2, %xmm1
movaps %xmm4, 32(%rdx)
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit1)
- movaps %xmm7, %xmm1
movaps %xmm5, 48(%rdx)
lea 16(%rsi), %rsi
lea -16(%r8), %r8
L(StrncpyExit1):
- movaps (%rdx, %rsi), %xmm6
- psrldq $15, %xmm6
- palignr $1, %xmm1, %xmm6
- movaps %xmm6, (%rdx, %rsi)
- lea 15(%rsi), %rsi
+ lea 15(%rdx, %rsi), %rdx
+ lea 15(%rcx, %rsi), %rcx
+ mov -15(%rcx), %rsi
+ mov -8(%rcx), %rax
+ mov %rsi, -15(%rdx)
+ mov %rax, -8(%rdx)
+ xor %rsi, %rsi
jmp L(CopyFrom1To16BytesCase3)
+ .p2align 4
L(StrncpyLeave2):
movaps %xmm2, %xmm3
add $48, %r8
jle L(StrncpyExit2)
palignr $2, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%rdx)
movaps 30(%rcx), %xmm2
lea 16(%rsi), %rsi
- movaps %xmm2, %xmm3
sub $16, %r8
jbe L(StrncpyExit2)
- palignr $2, %xmm1, %xmm2
+ palignr $2, %xmm3, %xmm2
movaps %xmm2, 16(%rdx)
- movaps 30+16(%rcx), %xmm2
- movaps %xmm3, %xmm1
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit2)
- movaps %xmm2, %xmm1
movaps %xmm4, 32(%rdx)
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit2)
- movaps %xmm7, %xmm1
movaps %xmm5, 48(%rdx)
lea 16(%rsi), %rsi
lea -16(%r8), %r8
L(StrncpyExit2):
- movaps (%rdx, %rsi), %xmm6
- psrldq $14, %xmm6
- palignr $2, %xmm1, %xmm6
- movaps %xmm6, (%rdx, %rsi)
- lea 14(%rsi), %rsi
+ lea 14(%rdx, %rsi), %rdx
+ lea 14(%rcx, %rsi), %rcx
+ mov -14(%rcx), %rsi
+ mov -8(%rcx), %rax
+ mov %rsi, -14(%rdx)
+ mov %rax, -8(%rdx)
+ xor %rsi, %rsi
jmp L(CopyFrom1To16BytesCase3)
+ .p2align 4
L(StrncpyLeave3):
movaps %xmm2, %xmm3
add $48, %r8
jle L(StrncpyExit3)
palignr $3, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%rdx)
movaps 29(%rcx), %xmm2
lea 16(%rsi), %rsi
- movaps %xmm2, %xmm3
sub $16, %r8
jbe L(StrncpyExit3)
- palignr $3, %xmm1, %xmm2
+ palignr $3, %xmm3, %xmm2
movaps %xmm2, 16(%rdx)
- movaps 29+16(%rcx), %xmm2
- movaps %xmm3, %xmm1
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit3)
- movaps %xmm2, %xmm1
movaps %xmm4, 32(%rdx)
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit3)
- movaps %xmm7, %xmm1
movaps %xmm5, 48(%rdx)
lea 16(%rsi), %rsi
lea -16(%r8), %r8
L(StrncpyExit3):
- movaps (%rdx, %rsi), %xmm6
- psrldq $13, %xmm6
- palignr $3, %xmm1, %xmm6
- movaps %xmm6, (%rdx, %rsi)
- lea 13(%rsi), %rsi
+ lea 13(%rdx, %rsi), %rdx
+ lea 13(%rcx, %rsi), %rcx
+ mov -13(%rcx), %rsi
+ mov -8(%rcx), %rax
+ mov %rsi, -13(%rdx)
+ mov %rax, -8(%rdx)
+ xor %rsi, %rsi
jmp L(CopyFrom1To16BytesCase3)
+ .p2align 4
L(StrncpyLeave4):
movaps %xmm2, %xmm3
add $48, %r8
jle L(StrncpyExit4)
palignr $4, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%rdx)
movaps 28(%rcx), %xmm2
lea 16(%rsi), %rsi
- movaps %xmm2, %xmm3
sub $16, %r8
jbe L(StrncpyExit4)
- palignr $4, %xmm1, %xmm2
+ palignr $4, %xmm3, %xmm2
movaps %xmm2, 16(%rdx)
- movaps 28+16(%rcx), %xmm2
- movaps %xmm3, %xmm1
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit4)
- movaps %xmm2, %xmm1
movaps %xmm4, 32(%rdx)
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit4)
- movaps %xmm7, %xmm1
movaps %xmm5, 48(%rdx)
lea 16(%rsi), %rsi
lea -16(%r8), %r8
L(StrncpyExit4):
- movaps (%rdx, %rsi), %xmm6
- psrldq $12, %xmm6
- palignr $4, %xmm1, %xmm6
- movaps %xmm6, (%rdx, %rsi)
- lea 12(%rsi), %rsi
+ lea 12(%rdx, %rsi), %rdx
+ lea 12(%rcx, %rsi), %rcx
+ mov -12(%rcx), %rsi
+ mov -4(%rcx), %eax
+ mov %rsi, -12(%rdx)
+ mov %eax, -4(%rdx)
+ xor %rsi, %rsi
jmp L(CopyFrom1To16BytesCase3)
+ .p2align 4
L(StrncpyLeave5):
movaps %xmm2, %xmm3
add $48, %r8
jle L(StrncpyExit5)
palignr $5, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%rdx)
movaps 27(%rcx), %xmm2
lea 16(%rsi), %rsi
- movaps %xmm2, %xmm3
sub $16, %r8
jbe L(StrncpyExit5)
- palignr $5, %xmm1, %xmm2
+ palignr $5, %xmm3, %xmm2
movaps %xmm2, 16(%rdx)
- movaps 27+16(%rcx), %xmm2
- movaps %xmm3, %xmm1
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit5)
- movaps %xmm2, %xmm1
movaps %xmm4, 32(%rdx)
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit5)
- movaps %xmm7, %xmm1
movaps %xmm5, 48(%rdx)
lea 16(%rsi), %rsi
lea -16(%r8), %r8
L(StrncpyExit5):
- movaps (%rdx, %rsi), %xmm6
- psrldq $11, %xmm6
- palignr $5, %xmm1, %xmm6
- movaps %xmm6, (%rdx, %rsi)
- lea 11(%rsi), %rsi
+ lea 11(%rdx, %rsi), %rdx
+ lea 11(%rcx, %rsi), %rcx
+ mov -11(%rcx), %rsi
+ mov -4(%rcx), %eax
+ mov %rsi, -11(%rdx)
+ mov %eax, -4(%rdx)
+ xor %rsi, %rsi
jmp L(CopyFrom1To16BytesCase3)
+ .p2align 4
L(StrncpyLeave6):
movaps %xmm2, %xmm3
add $48, %r8
jle L(StrncpyExit6)
palignr $6, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%rdx)
movaps 26(%rcx), %xmm2
lea 16(%rsi), %rsi
- movaps %xmm2, %xmm3
sub $16, %r8
jbe L(StrncpyExit6)
- palignr $6, %xmm1, %xmm2
+ palignr $6, %xmm3, %xmm2
movaps %xmm2, 16(%rdx)
- movaps 26+16(%rcx), %xmm2
- movaps %xmm3, %xmm1
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit6)
- movaps %xmm2, %xmm1
movaps %xmm4, 32(%rdx)
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit6)
- movaps %xmm7, %xmm1
movaps %xmm5, 48(%rdx)
lea 16(%rsi), %rsi
lea -16(%r8), %r8
L(StrncpyExit6):
- movaps (%rdx, %rsi), %xmm6
- psrldq $10, %xmm6
- palignr $6, %xmm1, %xmm6
- movaps %xmm6, (%rdx, %rsi)
- lea 10(%rsi), %rsi
+ lea 10(%rdx, %rsi), %rdx
+ lea 10(%rcx, %rsi), %rcx
+ mov -10(%rcx), %rsi
+ movw -2(%rcx), %ax
+ mov %rsi, -10(%rdx)
+ movw %ax, -2(%rdx)
+ xor %rsi, %rsi
jmp L(CopyFrom1To16BytesCase3)
+ .p2align 4
L(StrncpyLeave7):
movaps %xmm2, %xmm3
add $48, %r8
jle L(StrncpyExit7)
palignr $7, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%rdx)
movaps 25(%rcx), %xmm2
lea 16(%rsi), %rsi
- movaps %xmm2, %xmm3
sub $16, %r8
jbe L(StrncpyExit7)
- palignr $7, %xmm1, %xmm2
+ palignr $7, %xmm3, %xmm2
movaps %xmm2, 16(%rdx)
- movaps 25+16(%rcx), %xmm2
- movaps %xmm3, %xmm1
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit7)
- movaps %xmm2, %xmm1
movaps %xmm4, 32(%rdx)
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit7)
- movaps %xmm7, %xmm1
movaps %xmm5, 48(%rdx)
lea 16(%rsi), %rsi
lea -16(%r8), %r8
L(StrncpyExit7):
- movaps (%rdx, %rsi), %xmm6
- psrldq $9, %xmm6
- palignr $7, %xmm1, %xmm6
- movaps %xmm6, (%rdx, %rsi)
- lea 9(%rsi), %rsi
+ lea 9(%rdx, %rsi), %rdx
+ lea 9(%rcx, %rsi), %rcx
+ mov -9(%rcx), %rsi
+ movb -1(%rcx), %ah
+ mov %rsi, -9(%rdx)
+ movb %ah, -1(%rdx)
+ xor %rsi, %rsi
jmp L(CopyFrom1To16BytesCase3)
+ .p2align 4
L(StrncpyLeave8):
movaps %xmm2, %xmm3
add $48, %r8
jle L(StrncpyExit8)
palignr $8, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%rdx)
movaps 24(%rcx), %xmm2
lea 16(%rsi), %rsi
- movaps %xmm2, %xmm3
sub $16, %r8
jbe L(StrncpyExit8)
- palignr $8, %xmm1, %xmm2
+ palignr $8, %xmm3, %xmm2
movaps %xmm2, 16(%rdx)
- movaps 24+16(%rcx), %xmm2
- movaps %xmm3, %xmm1
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit8)
- movaps %xmm2, %xmm1
movaps %xmm4, 32(%rdx)
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit8)
- movaps %xmm7, %xmm1
movaps %xmm5, 48(%rdx)
lea 16(%rsi), %rsi
lea -16(%r8), %r8
L(StrncpyExit8):
- movaps (%rdx, %rsi), %xmm6
- psrldq $8, %xmm6
- palignr $8, %xmm1, %xmm6
- movaps %xmm6, (%rdx, %rsi)
- lea 8(%rsi), %rsi
+ lea 8(%rdx, %rsi), %rdx
+ lea 8(%rcx, %rsi), %rcx
+ mov -8(%rcx), %rax
+ xor %rsi, %rsi
+ mov %rax, -8(%rdx)
jmp L(CopyFrom1To16BytesCase3)
+ .p2align 4
L(StrncpyLeave9):
movaps %xmm2, %xmm3
add $48, %r8
jle L(StrncpyExit9)
palignr $9, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%rdx)
movaps 23(%rcx), %xmm2
lea 16(%rsi), %rsi
- movaps %xmm2, %xmm3
sub $16, %r8
jbe L(StrncpyExit9)
- palignr $9, %xmm1, %xmm2
+ palignr $9, %xmm3, %xmm2
movaps %xmm2, 16(%rdx)
- movaps 23+16(%rcx), %xmm2
- movaps %xmm3, %xmm1
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit9)
- movaps %xmm2, %xmm1
movaps %xmm4, 32(%rdx)
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit9)
- movaps %xmm7, %xmm1
movaps %xmm5, 48(%rdx)
lea 16(%rsi), %rsi
lea -16(%r8), %r8
L(StrncpyExit9):
- movaps (%rdx, %rsi), %xmm6
- psrldq $7, %xmm6
- palignr $9, %xmm1, %xmm6
- movaps %xmm6, (%rdx, %rsi)
- lea 7(%rsi), %rsi
+ lea 7(%rdx, %rsi), %rdx
+ lea 7(%rcx, %rsi), %rcx
+ mov -8(%rcx), %rax
+ xor %rsi, %rsi
+ mov %rax, -8(%rdx)
jmp L(CopyFrom1To16BytesCase3)
+ .p2align 4
L(StrncpyLeave10):
movaps %xmm2, %xmm3
add $48, %r8
jle L(StrncpyExit10)
palignr $10, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%rdx)
movaps 22(%rcx), %xmm2
lea 16(%rsi), %rsi
- movaps %xmm2, %xmm3
sub $16, %r8
jbe L(StrncpyExit10)
- palignr $10, %xmm1, %xmm2
+ palignr $10, %xmm3, %xmm2
movaps %xmm2, 16(%rdx)
- movaps 22+16(%rcx), %xmm2
- movaps %xmm3, %xmm1
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit10)
- movaps %xmm2, %xmm1
movaps %xmm4, 32(%rdx)
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit10)
- movaps %xmm7, %xmm1
movaps %xmm5, 48(%rdx)
lea 16(%rsi), %rsi
lea -16(%r8), %r8
L(StrncpyExit10):
- movaps (%rdx, %rsi), %xmm6
- psrldq $6, %xmm6
- palignr $10, %xmm1, %xmm6
- movaps %xmm6, (%rdx, %rsi)
- lea 6(%rsi), %rsi
+ lea 6(%rdx, %rsi), %rdx
+ lea 6(%rcx, %rsi), %rcx
+ mov -8(%rcx), %rax
+ xor %rsi, %rsi
+ mov %rax, -8(%rdx)
jmp L(CopyFrom1To16BytesCase3)
+ .p2align 4
L(StrncpyLeave11):
movaps %xmm2, %xmm3
add $48, %r8
jle L(StrncpyExit11)
palignr $11, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%rdx)
movaps 21(%rcx), %xmm2
lea 16(%rsi), %rsi
- movaps %xmm2, %xmm3
sub $16, %r8
jbe L(StrncpyExit11)
- palignr $11, %xmm1, %xmm2
+ palignr $11, %xmm3, %xmm2
movaps %xmm2, 16(%rdx)
- movaps 21+16(%rcx), %xmm2
- movaps %xmm3, %xmm1
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit11)
- movaps %xmm2, %xmm1
movaps %xmm4, 32(%rdx)
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit11)
- movaps %xmm7, %xmm1
movaps %xmm5, 48(%rdx)
lea 16(%rsi), %rsi
lea -16(%r8), %r8
L(StrncpyExit11):
- movaps (%rdx, %rsi), %xmm6
- psrldq $5, %xmm6
- palignr $11, %xmm1, %xmm6
- movaps %xmm6, (%rdx, %rsi)
- lea 5(%rsi), %rsi
+ lea 5(%rdx, %rsi), %rdx
+ lea 5(%rcx, %rsi), %rcx
+ mov -8(%rcx), %rax
+ xor %rsi, %rsi
+ mov %rax, -8(%rdx)
jmp L(CopyFrom1To16BytesCase3)
+ .p2align 4
L(StrncpyLeave12):
movaps %xmm2, %xmm3
add $48, %r8
jle L(StrncpyExit12)
palignr $12, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%rdx)
movaps 20(%rcx), %xmm2
lea 16(%rsi), %rsi
- movaps %xmm2, %xmm3
sub $16, %r8
jbe L(StrncpyExit12)
- palignr $12, %xmm1, %xmm2
+ palignr $12, %xmm3, %xmm2
movaps %xmm2, 16(%rdx)
- movaps 20+16(%rcx), %xmm2
- movaps %xmm3, %xmm1
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit12)
- movaps %xmm2, %xmm1
movaps %xmm4, 32(%rdx)
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit12)
- movaps %xmm7, %xmm1
movaps %xmm5, 48(%rdx)
lea 16(%rsi), %rsi
lea -16(%r8), %r8
L(StrncpyExit12):
- movaps (%rdx, %rsi), %xmm6
- psrldq $4, %xmm6
- palignr $12, %xmm1, %xmm6
- movaps %xmm6, (%rdx, %rsi)
- lea 4(%rsi), %rsi
+ lea 4(%rdx, %rsi), %rdx
+ lea 4(%rcx, %rsi), %rcx
+ mov -4(%rcx), %eax
+ xor %rsi, %rsi
+ mov %eax, -4(%rdx)
jmp L(CopyFrom1To16BytesCase3)
+ .p2align 4
L(StrncpyLeave13):
movaps %xmm2, %xmm3
add $48, %r8
jle L(StrncpyExit13)
palignr $13, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%rdx)
movaps 19(%rcx), %xmm2
lea 16(%rsi), %rsi
- movaps %xmm2, %xmm3
sub $16, %r8
jbe L(StrncpyExit13)
- palignr $13, %xmm1, %xmm2
+ palignr $13, %xmm3, %xmm2
movaps %xmm2, 16(%rdx)
- movaps 19+16(%rcx), %xmm2
- movaps %xmm3, %xmm1
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit13)
- movaps %xmm2, %xmm1
movaps %xmm4, 32(%rdx)
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit13)
- movaps %xmm7, %xmm1
movaps %xmm5, 48(%rdx)
lea 16(%rsi), %rsi
lea -16(%r8), %r8
L(StrncpyExit13):
- movaps (%rdx, %rsi), %xmm6
- psrldq $3, %xmm6
- palignr $13, %xmm1, %xmm6
- movaps %xmm6, (%rdx, %rsi)
- lea 3(%rsi), %rsi
+ lea 3(%rdx, %rsi), %rdx
+ lea 3(%rcx, %rsi), %rcx
+ mov -4(%rcx), %eax
+ xor %rsi, %rsi
+ mov %eax, -4(%rdx)
jmp L(CopyFrom1To16BytesCase3)
+ .p2align 4
L(StrncpyLeave14):
movaps %xmm2, %xmm3
add $48, %r8
jle L(StrncpyExit14)
palignr $14, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%rdx)
movaps 18(%rcx), %xmm2
lea 16(%rsi), %rsi
- movaps %xmm2, %xmm3
sub $16, %r8
jbe L(StrncpyExit14)
- palignr $14, %xmm1, %xmm2
+ palignr $14, %xmm3, %xmm2
movaps %xmm2, 16(%rdx)
- movaps 18+16(%rcx), %xmm2
- movaps %xmm3, %xmm1
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit14)
- movaps %xmm2, %xmm1
movaps %xmm4, 32(%rdx)
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit14)
- movaps %xmm7, %xmm1
movaps %xmm5, 48(%rdx)
lea 16(%rsi), %rsi
lea -16(%r8), %r8
L(StrncpyExit14):
- movaps (%rdx, %rsi), %xmm6
- psrldq $2, %xmm6
- palignr $14, %xmm1, %xmm6
- movaps %xmm6, (%rdx, %rsi)
- lea 2(%rsi), %rsi
+ lea 2(%rdx, %rsi), %rdx
+ lea 2(%rcx, %rsi), %rcx
+ movw -2(%rcx), %ax
+ xor %rsi, %rsi
+ movw %ax, -2(%rdx)
jmp L(CopyFrom1To16BytesCase3)
+ .p2align 4
L(StrncpyLeave15):
movaps %xmm2, %xmm3
add $48, %r8
jle L(StrncpyExit15)
palignr $15, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%rdx)
movaps 17(%rcx), %xmm2
lea 16(%rsi), %rsi
- movaps %xmm2, %xmm3
sub $16, %r8
jbe L(StrncpyExit15)
- palignr $15, %xmm1, %xmm2
+ palignr $15, %xmm3, %xmm2
movaps %xmm2, 16(%rdx)
- movaps 17+16(%rcx), %xmm2
- movaps %xmm3, %xmm1
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit15)
- movaps %xmm2, %xmm1
movaps %xmm4, 32(%rdx)
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit15)
- movaps %xmm7, %xmm1
movaps %xmm5, 48(%rdx)
lea 16(%rsi), %rsi
lea -16(%r8), %r8
L(StrncpyExit15):
- movaps (%rdx, %rsi), %xmm6
- psrldq $1, %xmm6
- palignr $15, %xmm1, %xmm6
- movaps %xmm6, (%rdx, %rsi)
- lea 1(%rsi), %rsi
+ lea 1(%rdx, %rsi), %rdx
+ lea 1(%rcx, %rsi), %rcx
+ movb -1(%rcx), %ah
+ xor %rsi, %rsi
+ movb %ah, -1(%rdx)
jmp L(CopyFrom1To16BytesCase3)
+
# endif
# ifndef USE_AS_STRCAT
END (STRCPY)
diff --git a/libc/sysdeps/x86_64/multiarch/wcscpy-ssse3.S b/libc/sysdeps/x86_64/multiarch/wcscpy-ssse3.S
index 4e292f3c2..477b2cb4e 100644
--- a/libc/sysdeps/x86_64/multiarch/wcscpy-ssse3.S
+++ b/libc/sysdeps/x86_64/multiarch/wcscpy-ssse3.S
@@ -21,8 +21,9 @@
#ifndef NOT_IN_libc
# include <sysdep.h>
-.text
+ .section .text.ssse3,"ax",@progbits
ENTRY (__wcscpy_ssse3)
+
mov %rsi, %rcx
mov %rdi, %rdx
@@ -136,6 +137,7 @@ L(Align16Both):
mov $-0x40, %rsi
+ .p2align 4
L(Aligned64Loop):
movaps (%rcx), %xmm2
movaps %xmm2, %xmm4
@@ -205,7 +207,6 @@ L(Shl4Start):
jnz L(Shl4LoopExit)
palignr $4, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%rdx)
movaps 28(%rcx), %xmm2
@@ -213,15 +214,14 @@ L(Shl4Start):
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
- movaps %xmm2, %xmm3
+ movaps %xmm2, %xmm1
test %rax, %rax
jnz L(Shl4LoopExit)
- palignr $4, %xmm1, %xmm2
+ palignr $4, %xmm3, %xmm2
movaps %xmm2, (%rdx)
movaps 28(%rcx), %xmm2
- movaps %xmm3, %xmm1
pcmpeqd %xmm2, %xmm0
lea 16(%rdx), %rdx
@@ -233,7 +233,6 @@ L(Shl4Start):
jnz L(Shl4LoopExit)
palignr $4, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%rdx)
movaps 28(%rcx), %xmm2
@@ -245,8 +244,7 @@ L(Shl4Start):
test %rax, %rax
jnz L(Shl4LoopExit)
- palignr $4, %xmm1, %xmm2
- movaps %xmm3, %xmm1
+ palignr $4, %xmm3, %xmm2
movaps %xmm2, (%rdx)
lea 28(%rcx), %rcx
lea 16(%rdx), %rdx
@@ -259,6 +257,7 @@ L(Shl4Start):
movaps -4(%rcx), %xmm1
+ .p2align 4
L(Shl4LoopStart):
movaps 12(%rcx), %xmm2
movaps 28(%rcx), %xmm3
@@ -289,11 +288,9 @@ L(Shl4LoopStart):
jmp L(Shl4LoopStart)
L(Shl4LoopExit):
- movaps (%rdx), %xmm6
- psrldq $12, %xmm6
+ movdqu -4(%rcx), %xmm1
mov $12, %rsi
- palignr $4, %xmm1, %xmm6
- movaps %xmm6, (%rdx)
+ movdqu %xmm1, -4(%rdx)
jmp L(CopyFrom1To16Bytes)
.p2align 4
@@ -309,7 +306,6 @@ L(Shl8Start):
jnz L(Shl8LoopExit)
palignr $8, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%rdx)
movaps 24(%rcx), %xmm2
@@ -317,15 +313,14 @@ L(Shl8Start):
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
- movaps %xmm2, %xmm3
+ movaps %xmm2, %xmm1
test %rax, %rax
jnz L(Shl8LoopExit)
- palignr $8, %xmm1, %xmm2
+ palignr $8, %xmm3, %xmm2
movaps %xmm2, (%rdx)
movaps 24(%rcx), %xmm2
- movaps %xmm3, %xmm1
pcmpeqd %xmm2, %xmm0
lea 16(%rdx), %rdx
@@ -337,7 +332,6 @@ L(Shl8Start):
jnz L(Shl8LoopExit)
palignr $8, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%rdx)
movaps 24(%rcx), %xmm2
@@ -345,13 +339,11 @@ L(Shl8Start):
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
- movaps %xmm2, %xmm3
test %rax, %rax
jnz L(Shl8LoopExit)
- palignr $8, %xmm1, %xmm2
- movaps %xmm3, %xmm1
+ palignr $8, %xmm3, %xmm2
movaps %xmm2, (%rdx)
lea 24(%rcx), %rcx
lea 16(%rdx), %rdx
@@ -364,6 +356,7 @@ L(Shl8Start):
movaps -8(%rcx), %xmm1
+ .p2align 4
L(Shl8LoopStart):
movaps 8(%rcx), %xmm2
movaps 24(%rcx), %xmm3
@@ -394,11 +387,9 @@ L(Shl8LoopStart):
jmp L(Shl8LoopStart)
L(Shl8LoopExit):
- movaps (%rdx), %xmm6
- psrldq $8, %xmm6
+ mov (%rcx), %r9
mov $8, %rsi
- palignr $8, %xmm1, %xmm6
- movaps %xmm6, (%rdx)
+ mov %r9, (%rdx)
jmp L(CopyFrom1To16Bytes)
.p2align 4
@@ -414,7 +405,6 @@ L(Shl12Start):
jnz L(Shl12LoopExit)
palignr $12, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%rdx)
movaps 20(%rcx), %xmm2
@@ -422,15 +412,14 @@ L(Shl12Start):
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
- movaps %xmm2, %xmm3
+ movaps %xmm2, %xmm1
test %rax, %rax
jnz L(Shl12LoopExit)
- palignr $12, %xmm1, %xmm2
+ palignr $12, %xmm3, %xmm2
movaps %xmm2, (%rdx)
movaps 20(%rcx), %xmm2
- movaps %xmm3, %xmm1
pcmpeqd %xmm2, %xmm0
lea 16(%rdx), %rdx
@@ -442,7 +431,6 @@ L(Shl12Start):
jnz L(Shl12LoopExit)
palignr $12, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%rdx)
movaps 20(%rcx), %xmm2
@@ -450,13 +438,11 @@ L(Shl12Start):
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
- movaps %xmm2, %xmm3
test %rax, %rax
jnz L(Shl12LoopExit)
- palignr $12, %xmm1, %xmm2
- movaps %xmm3, %xmm1
+ palignr $12, %xmm3, %xmm2
movaps %xmm2, (%rdx)
lea 20(%rcx), %rcx
lea 16(%rdx), %rdx
@@ -469,6 +455,7 @@ L(Shl12Start):
movaps -12(%rcx), %xmm1
+ .p2align 4
L(Shl12LoopStart):
movaps 4(%rcx), %xmm2
movaps 20(%rcx), %xmm3
@@ -498,11 +485,10 @@ L(Shl12LoopStart):
jmp L(Shl12LoopStart)
L(Shl12LoopExit):
- movaps (%rdx), %xmm6
- psrldq $4, %xmm6
+ mov (%rcx), %r9d
mov $4, %rsi
- palignr $12, %xmm1, %xmm6
- movaps %xmm6, (%rdx)
+ mov %r9d, (%rdx)
+ jmp L(CopyFrom1To16Bytes)
.p2align 4
L(CopyFrom1To16Bytes):
@@ -556,8 +542,10 @@ L(Exit12):
.p2align 4
L(Exit16):
- movdqu (%rcx), %xmm0
- movdqu %xmm0, (%rdx)
+ mov (%rcx), %rax
+ mov %rax, (%rdx)
+ mov 8(%rcx), %rax
+ mov %rax, 8(%rdx)
mov %rdi, %rax
ret
diff --git a/libc/version.h b/libc/version.h
index a37c4c77d..1dac42529 100644
--- a/libc/version.h
+++ b/libc/version.h
@@ -1,4 +1,4 @@
/* This file just defines the current version number of libc. */
-#define RELEASE "development"
-#define VERSION "2.14.90"
+#define RELEASE "stable"
+#define VERSION "2.15"
diff --git a/ports/ChangeLog.m68k b/ports/ChangeLog.m68k
index bd8631763..a234cfae6 100644
--- a/ports/ChangeLog.m68k
+++ b/ports/ChangeLog.m68k
@@ -1,3 +1,8 @@
+2011-12-23 Andreas Schwab <schwab@linux-m68k.org>
+
+ * sysdeps/m68k/bits/byteswap.h (__bswap_constant_64): Protect long
+ long constant with __extension__.
+
2011-12-04 Thorsten Glaser <tg@mirbsd.de>
* sysdeps/unix/sysv/linux/m68k/syscall.S: Allow six arguments.
diff --git a/ports/sysdeps/m68k/bits/byteswap.h b/ports/sysdeps/m68k/bits/byteswap.h
index 4f31d95bb..5e08805c5 100644
--- a/ports/sysdeps/m68k/bits/byteswap.h
+++ b/ports/sysdeps/m68k/bits/byteswap.h
@@ -28,7 +28,7 @@
because GCC is smart enough to generate optimal assembler output, and
this allows for better cse. */
#define __bswap_constant_16(x) \
- ((((x) >> 8) & 0xffu) | (((x) & 0xffu) << 8))
+ ((((x) >> 8) & 0xffu) | (((x) & 0xffu) << 8))
static __inline unsigned short int
__bswap_16 (unsigned short int __bsx)
@@ -38,8 +38,8 @@ __bswap_16 (unsigned short int __bsx)
/* Swap bytes in 32 bit value. */
#define __bswap_constant_32(x) \
- ((((x) & 0xff000000u) >> 24) | (((x) & 0x00ff0000u) >> 8) | \
- (((x) & 0x0000ff00u) << 8) | (((x) & 0x000000ffu) << 24))
+ ((((x) & 0xff000000u) >> 24) | (((x) & 0x00ff0000u) >> 8) | \
+ (((x) & 0x0000ff00u) << 8) | (((x) & 0x000000ffu) << 24))
#if !defined(__mcoldfire__)
static __inline unsigned int
@@ -64,14 +64,15 @@ __bswap_32 (unsigned int __bsx)
#if defined __GNUC__ && __GNUC__ >= 2
/* Swap bytes in 64 bit value. */
# define __bswap_constant_64(x) \
- ((((x) & 0xff00000000000000ull) >> 56) \
- | (((x) & 0x00ff000000000000ull) >> 40) \
- | (((x) & 0x0000ff0000000000ull) >> 24) \
- | (((x) & 0x000000ff00000000ull) >> 8) \
- | (((x) & 0x00000000ff000000ull) << 8) \
- | (((x) & 0x0000000000ff0000ull) << 24) \
- | (((x) & 0x000000000000ff00ull) << 40) \
- | (((x) & 0x00000000000000ffull) << 56))
+ __extension__ \
+ ((((x) & 0xff00000000000000ull) >> 56) \
+ | (((x) & 0x00ff000000000000ull) >> 40) \
+ | (((x) & 0x0000ff0000000000ull) >> 24) \
+ | (((x) & 0x000000ff00000000ull) >> 8) \
+ | (((x) & 0x00000000ff000000ull) << 8) \
+ | (((x) & 0x0000000000ff0000ull) << 24) \
+ | (((x) & 0x000000000000ff00ull) << 40) \
+ | (((x) & 0x00000000000000ffull) << 56))
/* Swap bytes in 64 bit value. */
static __inline unsigned long long