summaryrefslogtreecommitdiff
path: root/libc/ports
diff options
context:
space:
mode:
Diffstat (limited to 'libc/ports')
-rw-r--r--libc/ports/ChangeLog.aarch6439
-rw-r--r--libc/ports/ChangeLog.hppa8
-rw-r--r--libc/ports/ChangeLog.m68k5
-rw-r--r--libc/ports/ChangeLog.mips5
-rw-r--r--libc/ports/ChangeLog.tile6
-rw-r--r--libc/ports/sysdeps/aarch64/bits/setjmp.h2
-rw-r--r--libc/ports/sysdeps/aarch64/bzero.S27
-rw-r--r--libc/ports/sysdeps/aarch64/memcmp.S151
-rw-r--r--libc/ports/sysdeps/aarch64/memcpy.S176
-rw-r--r--libc/ports/sysdeps/aarch64/memmove.S312
-rw-r--r--libc/ports/sysdeps/aarch64/memset.S229
-rw-r--r--libc/ports/sysdeps/aarch64/strcmp.S155
-rw-r--r--libc/ports/sysdeps/aarch64/strlen.S117
-rw-r--r--libc/ports/sysdeps/aarch64/sysdep.h31
-rw-r--r--libc/ports/sysdeps/arm/Makefile5
-rw-r--r--libc/ports/sysdeps/hppa/fpu/fpu_control.h4
-rw-r--r--libc/ports/sysdeps/m68k/bits/byteswap.h1
-rw-r--r--libc/ports/sysdeps/mips/Makefile5
-rw-r--r--libc/ports/sysdeps/unix/sysv/linux/hppa/bits/ipc.h4
-rw-r--r--libc/ports/sysdeps/unix/sysv/linux/mips/bits/sigcontext.h32
-rw-r--r--libc/ports/sysdeps/unix/sysv/linux/tile/tilegx/configure4
-rw-r--r--libc/ports/sysdeps/unix/sysv/linux/tile/tilegx/configure.in4
-rw-r--r--libc/ports/sysdeps/unix/sysv/linux/tile/tilegx/ldd-rewrite.sed1
23 files changed, 1281 insertions, 42 deletions
diff --git a/libc/ports/ChangeLog.aarch64 b/libc/ports/ChangeLog.aarch64
index dd1d64cb6..c7487f5b7 100644
--- a/libc/ports/ChangeLog.aarch64
+++ b/libc/ports/ChangeLog.aarch64
@@ -1,3 +1,42 @@
+2013-01-17 Marcus Shawcroft <marcus.shawcroft@linaro.org>
+
+ * sysdeps/aarch64/strlen.S: New file.
+
+2013-01-17 Marcus Shawcroft <marcus.shawcroft@linaro.org>
+
+ * sysdeps/aarch64/strcmp.S: New file.
+
+2013-01-17 Marcus Shawcroft <marcus.shawcroft@linaro.org>
+
+ * sysdeps/aarch64/bzero.S: New file.
+
+2013-01-17 Marcus Shawcroft <marcus.shawcroft@linaro.org>
+
+ * sysdeps/aarch64/memmove.S: New file.
+
+2013-01-17 Marcus Shawcroft <marcus.shawcroft@linaro.org>
+
+ * sysdeps/aarch64/memcpy.S: New file.
+
+2013-01-17 Marcus Shawcroft <marcus.shawcroft@linaro.org>
+
+ * sysdeps/aarch64/memset.S: New file.
+
+2013-01-17 Marcus Shawcroft <marcus.shawcroft@linaro.org>
+
+ * sysdeps/aarch64/sysdep.h (ENTRY_ALIGN): New.
+ * sysdeps/aarch64/memcmp.S: New file.
+
+2013-01-17 Marcus Shawcroft <marcus.shawcroft@linaro.org>
+
+ * sysdeps/aarch64/sysdep.h (ENTRY, END): Adjust
+ whitespace.
+
+2013-01-10 Joseph Myers <joseph@codesourcery.com>
+
+ * sysdeps/aarch64/bits/setjmp.h (__jmp_buf): Use __extension__
+ with long long.
+
2013-01-02 Joseph Myers <joseph@codesourcery.com>
* All files with FSF copyright notices: Update copyright dates
diff --git a/libc/ports/ChangeLog.hppa b/libc/ports/ChangeLog.hppa
index 2b798529b..ef904c267 100644
--- a/libc/ports/ChangeLog.hppa
+++ b/libc/ports/ChangeLog.hppa
@@ -1,3 +1,11 @@
+2013-01-10 Joseph Myers <joseph@codesourcery.com>
+
+ * sysdeps/hppa/fpu/fpu_control.h (_FPU_GETCW): Use __extension__
+ with long long.
+ (_FPU_SETCW): Likewise.
+ * sysdeps/unix/sysv/linux/hppa/bits/ipc.h (struct ipc_perm):
+ Likewise.
+
2013-01-08 Andreas Jaeger <aj@suse.de>
[BZ# 14985]
diff --git a/libc/ports/ChangeLog.m68k b/libc/ports/ChangeLog.m68k
index b6e93669c..20eaac716 100644
--- a/libc/ports/ChangeLog.m68k
+++ b/libc/ports/ChangeLog.m68k
@@ -1,3 +1,8 @@
+2013-01-10 Joseph Myers <joseph@codesourcery.com>
+
+ * sysdeps/m68k/bits/byteswap.h (__bswap_64): Use __extension__
+ with long long.
+
2013-01-04 Andreas Schwab <schwab@suse.de>
* sysdeps/m68k/m680x0/fpu/libm-test-ulps: Update.
diff --git a/libc/ports/ChangeLog.mips b/libc/ports/ChangeLog.mips
index 5a80540bc..9e2bcffc6 100644
--- a/libc/ports/ChangeLog.mips
+++ b/libc/ports/ChangeLog.mips
@@ -1,3 +1,8 @@
+2013-01-10 Joseph Myers <joseph@codesourcery.com>
+
+ * sysdeps/unix/sysv/linux/mips/bits/sigcontext.h (struct
+ sigcontext): Use __extension__ with long long in all definitions.
+
2013-01-08 Steve Ellcey <sellcey@mips.com>
* sysdeps/mips/memcpy.S: Change prefetch hint, reorder partial
diff --git a/libc/ports/ChangeLog.tile b/libc/ports/ChangeLog.tile
index e70742bb3..a5eaec3a9 100644
--- a/libc/ports/ChangeLog.tile
+++ b/libc/ports/ChangeLog.tile
@@ -1,3 +1,9 @@
+2013-01-10 Chris Metcalf <cmetcalf@tilera.com>
+
+ * sysdeps/unix/sysv/linux/tile/tilegx/ldd-rewrite.sed: New file.
+ * sysdeps/unix/sysv/linux/tile/tilegx/configure.in: New file.
+ * sysdeps/unix/sysv/linux/tile/tilegx/configure: New file.
+
2013-01-02 Joseph Myers <joseph@codesourcery.com>
* All files with FSF copyright notices: Update copyright dates
diff --git a/libc/ports/sysdeps/aarch64/bits/setjmp.h b/libc/ports/sysdeps/aarch64/bits/setjmp.h
index 6d78b7e87..6a93e0a25 100644
--- a/libc/ports/sysdeps/aarch64/bits/setjmp.h
+++ b/libc/ports/sysdeps/aarch64/bits/setjmp.h
@@ -27,7 +27,7 @@
/* Jump buffer contains:
x19-x28, x29(fp), x30(lr), (x31)sp, d8-d15. Other registers are not
saved. */
-typedef unsigned long long __jmp_buf [22];
+__extension__ typedef unsigned long long __jmp_buf [22];
#endif
#endif
diff --git a/libc/ports/sysdeps/aarch64/bzero.S b/libc/ports/sysdeps/aarch64/bzero.S
new file mode 100644
index 000000000..228c0a5f3
--- /dev/null
+++ b/libc/ports/sysdeps/aarch64/bzero.S
@@ -0,0 +1,27 @@
+/* Copyright (C) 2013 Free Software Foundation, Inc.
+
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public License as
+ published by the Free Software Foundation; either version 2.1 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+ .text
+ENTRY(__bzero)
+ mov x2, x1
+ mov x1, xzr
+ b __memset
+END(__bzero)
+weak_alias (__bzero, bzero)
diff --git a/libc/ports/sysdeps/aarch64/memcmp.S b/libc/ports/sysdeps/aarch64/memcmp.S
new file mode 100644
index 000000000..6398ddd3e
--- /dev/null
+++ b/libc/ports/sysdeps/aarch64/memcmp.S
@@ -0,0 +1,151 @@
+/* memcmp - compare memory
+
+ Copyright (C) 2013 Free Software Foundation, Inc.
+
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+/* Assumptions:
+ *
+ * ARMv8-a, AArch64
+ */
+
+/* Parameters and result. */
+#define src1 x0
+#define src2 x1
+#define limit x2
+#define result x0
+
+/* Internal variables. */
+#define data1 x3
+#define data1w w3
+#define data2 x4
+#define data2w w4
+#define has_nul x5
+#define diff x6
+#define endloop x7
+#define tmp1 x8
+#define tmp2 x9
+#define tmp3 x10
+#define pos x11
+#define limit_wd x12
+#define mask x13
+
+ENTRY_ALIGN (memcmp, 6)
+ cbz limit, L(ret0)
+ eor tmp1, src1, src2
+ tst tmp1, #7
+ b.ne L(misaligned8)
+ ands tmp1, src1, #7
+ b.ne L(mutual_align)
+ add limit_wd, limit, #7
+ lsr limit_wd, limit_wd, #3
+ /* Start of performance-critical section -- one 64B cache line. */
+L(loop_aligned):
+ ldr data1, [src1], #8
+ ldr data2, [src2], #8
+L(start_realigned):
+ subs limit_wd, limit_wd, #1
+ eor diff, data1, data2 /* Non-zero if differences found. */
+ csinv endloop, diff, xzr, ne /* Last Dword or differences. */
+ cbz endloop, L(loop_aligned)
+ /* End of performance-critical section -- one 64B cache line. */
+
+ /* Not reached the limit, must have found a diff. */
+ cbnz limit_wd, L(not_limit)
+
+ /* Limit % 8 == 0 => all bytes significant. */
+ ands limit, limit, #7
+ b.eq L(not_limit)
+
+ lsl limit, limit, #3 /* Bits -> bytes. */
+ mov mask, #~0
+#ifdef __AARCH64EB__
+ lsr mask, mask, limit
+#else
+ lsl mask, mask, limit
+#endif
+ bic data1, data1, mask
+ bic data2, data2, mask
+
+ orr diff, diff, mask
+L(not_limit):
+
+#ifndef __AARCH64EB__
+ rev diff, diff
+ rev data1, data1
+ rev data2, data2
+#endif
+ /* The MS-non-zero bit of DIFF marks either the first bit
+ that is different, or the end of the significant data.
+ Shifting left now will bring the critical information into the
+ top bits. */
+ clz pos, diff
+ lsl data1, data1, pos
+ lsl data2, data2, pos
+ /* But we need to zero-extend (char is unsigned) the value and then
+ perform a signed 32-bit subtraction. */
+ lsr data1, data1, #56
+ sub result, data1, data2, lsr #56
+ RET
+
+L(mutual_align):
+ /* Sources are mutually aligned, but are not currently at an
+ alignment boundary. Round down the addresses and then mask off
+ the bytes that precede the start point. */
+ bic src1, src1, #7
+ bic src2, src2, #7
+ add limit, limit, tmp1 /* Adjust the limit for the extra. */
+ lsl tmp1, tmp1, #3 /* Bytes beyond alignment -> bits. */
+ ldr data1, [src1], #8
+ neg tmp1, tmp1 /* Bits to alignment -64. */
+ ldr data2, [src2], #8
+ mov tmp2, #~0
+#ifdef __AARCH64EB__
+ /* Big-endian. Early bytes are at MSB. */
+ lsl tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */
+#else
+ /* Little-endian. Early bytes are at LSB. */
+ lsr tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */
+#endif
+ add limit_wd, limit, #7
+ orr data1, data1, tmp2
+ orr data2, data2, tmp2
+ lsr limit_wd, limit_wd, #3
+ b L(start_realigned)
+
+L(ret0):
+ mov result, #0
+ RET
+
+ .p2align 6
+L(misaligned8):
+ sub limit, limit, #1
+1:
+ /* Perhaps we can do better than this. */
+ ldrb data1w, [src1], #1
+ ldrb data2w, [src2], #1
+ subs limit, limit, #1
+ ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */
+ b.eq 1b
+ sub result, data1, data2
+ RET
+END (memcmp)
+#undef bcmp
+weak_alias (memcmp, bcmp)
+libc_hidden_builtin_def (memcmp)
diff --git a/libc/ports/sysdeps/aarch64/memcpy.S b/libc/ports/sysdeps/aarch64/memcpy.S
new file mode 100644
index 000000000..4f4e36c06
--- /dev/null
+++ b/libc/ports/sysdeps/aarch64/memcpy.S
@@ -0,0 +1,176 @@
+/* Copyright (C) 2012-2013 Free Software Foundation, Inc.
+
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Assumptions:
+ *
+ * ARMv8-a, AArch64
+ * Unaligned accesses
+ *
+ */
+
+#define dstin x0
+#define src x1
+#define count x2
+#define tmp1 x3
+#define tmp1w w3
+#define tmp2 x4
+#define tmp2w w4
+#define tmp3 x5
+#define tmp3w w5
+#define dst x6
+
+#define A_l x7
+#define A_h x8
+#define B_l x9
+#define B_h x10
+#define C_l x11
+#define C_h x12
+#define D_l x13
+#define D_h x14
+
+#include <sysdep.h>
+
+ENTRY_ALIGN (memcpy, 6)
+
+ mov dst, dstin
+ cmp count, #64
+ b.ge L(cpy_not_short)
+ cmp count, #15
+ b.le L(tail15tiny)
+
+ /* Deal with small copies quickly by dropping straight into the
+ * exit block. */
+L(tail63):
+ /* Copy up to 48 bytes of data. At this point we only need the
+ * bottom 6 bits of count to be accurate. */
+ ands tmp1, count, #0x30
+ b.eq L(tail15)
+ add dst, dst, tmp1
+ add src, src, tmp1
+ cmp tmp1w, #0x20
+ b.eq 1f
+ b.lt 2f
+ ldp A_l, A_h, [src, #-48]
+ stp A_l, A_h, [dst, #-48]
+1:
+ ldp A_l, A_h, [src, #-32]
+ stp A_l, A_h, [dst, #-32]
+2:
+ ldp A_l, A_h, [src, #-16]
+ stp A_l, A_h, [dst, #-16]
+
+L(tail15):
+ ands count, count, #15
+ beq 1f
+ add src, src, count
+ ldp A_l, A_h, [src, #-16]
+ add dst, dst, count
+ stp A_l, A_h, [dst, #-16]
+1:
+ RET
+
+L(tail15tiny):
+ /* Copy up to 15 bytes of data. Does not assume additional data
+ being copied. */
+ tbz count, #3, 1f
+ ldr tmp1, [src], #8
+ str tmp1, [dst], #8
+1:
+ tbz count, #2, 1f
+ ldr tmp1w, [src], #4
+ str tmp1w, [dst], #4
+1:
+ tbz count, #1, 1f
+ ldrh tmp1w, [src], #2
+ strh tmp1w, [dst], #2
+1:
+ tbz count, #0, 1f
+ ldrb tmp1w, [src]
+ strb tmp1w, [dst]
+1:
+ RET
+
+L(cpy_not_short):
+ /* We don't much care about the alignment of DST, but we want SRC
+ * to be 128-bit (16 byte) aligned so that we don't cross cache line
+ * boundaries on both loads and stores. */
+ neg tmp2, src
+ ands tmp2, tmp2, #15 /* Bytes to reach alignment. */
+ b.eq 2f
+ sub count, count, tmp2
+ /* Copy more data than needed; it's faster than jumping
+ * around copying sub-Quadword quantities. We know that
+ * it can't overrun. */
+ ldp A_l, A_h, [src]
+ add src, src, tmp2
+ stp A_l, A_h, [dst]
+ add dst, dst, tmp2
+ /* There may be less than 63 bytes to go now. */
+ cmp count, #63
+ b.le L(tail63)
+2:
+ subs count, count, #128
+ b.ge L(cpy_body_large)
+ /* Less than 128 bytes to copy, so handle 64 here and then jump
+ * to the tail. */
+ ldp A_l, A_h, [src]
+ ldp B_l, B_h, [src, #16]
+ ldp C_l, C_h, [src, #32]
+ ldp D_l, D_h, [src, #48]
+ stp A_l, A_h, [dst]
+ stp B_l, B_h, [dst, #16]
+ stp C_l, C_h, [dst, #32]
+ stp D_l, D_h, [dst, #48]
+ tst count, #0x3f
+ add src, src, #64
+ add dst, dst, #64
+ b.ne L(tail63)
+ RET
+
+ /* Critical loop. Start at a new cache line boundary. Assuming
+ * 64 bytes per line this ensures the entire loop is in one line. */
+ .p2align 6
+L(cpy_body_large):
+ /* There are at least 128 bytes to copy. */
+ ldp A_l, A_h, [src, #0]
+ sub dst, dst, #16 /* Pre-bias. */
+ ldp B_l, B_h, [src, #16]
+ ldp C_l, C_h, [src, #32]
+ ldp D_l, D_h, [src, #48]! /* src += 64 - Pre-bias. */
+1:
+ stp A_l, A_h, [dst, #16]
+ ldp A_l, A_h, [src, #16]
+ stp B_l, B_h, [dst, #32]
+ ldp B_l, B_h, [src, #32]
+ stp C_l, C_h, [dst, #48]
+ ldp C_l, C_h, [src, #48]
+ stp D_l, D_h, [dst, #64]!
+ ldp D_l, D_h, [src, #64]!
+ subs count, count, #64
+ b.ge 1b
+ stp A_l, A_h, [dst, #16]
+ stp B_l, B_h, [dst, #32]
+ stp C_l, C_h, [dst, #48]
+ stp D_l, D_h, [dst, #64]
+ add src, src, #16
+ add dst, dst, #64 + 16
+ tst count, #0x3f
+ b.ne L(tail63)
+ RET
+END (memcpy)
+libc_hidden_builtin_def (memcpy)
diff --git a/libc/ports/sysdeps/aarch64/memmove.S b/libc/ports/sysdeps/aarch64/memmove.S
new file mode 100644
index 000000000..c42eb1c13
--- /dev/null
+++ b/libc/ports/sysdeps/aarch64/memmove.S
@@ -0,0 +1,312 @@
+/* Copyright (C) 2012-2013 Free Software Foundation, Inc.
+
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+/* Assumptions:
+ *
+ * ARMv8-a, AArch64
+ * Unaligned accesses
+ */
+
+/* Parameters and result. */
+#define dstin x0
+#define src x1
+#define count x2
+#define tmp1 x3
+#define tmp1w w3
+#define tmp2 x4
+#define tmp2w w4
+#define tmp3 x5
+#define tmp3w w5
+#define dst x6
+
+#define A_l x7
+#define A_h x8
+#define B_l x9
+#define B_h x10
+#define C_l x11
+#define C_h x12
+#define D_l x13
+#define D_h x14
+
+ENTRY_ALIGN (memmove, 6)
+
+ cmp dstin, src
+ b.lo L(downwards)
+ add tmp1, src, count
+ cmp dstin, tmp1
+ b.hs memcpy /* No overlap. */
+
+ /* Upwards move with potential overlap.
+ * Need to move from the tail backwards. SRC and DST point one
+ * byte beyond the remaining data to move. */
+ add dst, dstin, count
+ add src, src, count
+ cmp count, #64
+ b.ge L(mov_not_short_up)
+
+ /* Deal with small moves quickly by dropping straight into the
+ * exit block. */
+L(tail63up):
+ /* Move up to 48 bytes of data. At this point we only need the
+ * bottom 6 bits of count to be accurate. */
+ ands tmp1, count, #0x30
+ b.eq L(tail15up)
+ sub dst, dst, tmp1
+ sub src, src, tmp1
+ cmp tmp1w, #0x20
+ b.eq 1f
+ b.lt 2f
+ ldp A_l, A_h, [src, #32]
+ stp A_l, A_h, [dst, #32]
+1:
+ ldp A_l, A_h, [src, #16]
+ stp A_l, A_h, [dst, #16]
+2:
+ ldp A_l, A_h, [src]
+ stp A_l, A_h, [dst]
+L(tail15up):
+ /* Move up to 15 bytes of data. Does not assume additional data
+ * being moved. */
+ tbz count, #3, 1f
+ ldr tmp1, [src, #-8]!
+ str tmp1, [dst, #-8]!
+1:
+ tbz count, #2, 1f
+ ldr tmp1w, [src, #-4]!
+ str tmp1w, [dst, #-4]!
+1:
+ tbz count, #1, 1f
+ ldrh tmp1w, [src, #-2]!
+ strh tmp1w, [dst, #-2]!
+1:
+ tbz count, #0, 1f
+ ldrb tmp1w, [src, #-1]
+ strb tmp1w, [dst, #-1]
+1:
+ RET
+
+L(mov_not_short_up):
+ /* We don't much care about the alignment of DST, but we want SRC
+ * to be 128-bit (16 byte) aligned so that we don't cross cache line
+ * boundaries on both loads and stores. */
+ ands tmp2, src, #15 /* Bytes to reach alignment. */
+ b.eq 2f
+ sub count, count, tmp2
+ /* Move enough data to reach alignment; unlike memcpy, we have to
+ * be aware of the overlap, which means we can't move data twice. */
+ tbz tmp2, #3, 1f
+ ldr tmp1, [src, #-8]!
+ str tmp1, [dst, #-8]!
+1:
+ tbz tmp2, #2, 1f
+ ldr tmp1w, [src, #-4]!
+ str tmp1w, [dst, #-4]!
+1:
+ tbz tmp2, #1, 1f
+ ldrh tmp1w, [src, #-2]!
+ strh tmp1w, [dst, #-2]!
+1:
+ tbz tmp2, #0, 1f
+ ldrb tmp1w, [src, #-1]!
+ strb tmp1w, [dst, #-1]!
+1:
+
+ /* There may be less than 63 bytes to go now. */
+ cmp count, #63
+ b.le L(tail63up)
+2:
+ subs count, count, #128
+ b.ge L(mov_body_large_up)
+ /* Less than 128 bytes to move, so handle 64 here and then jump
+ * to the tail. */
+ ldp A_l, A_h, [src, #-64]!
+ ldp B_l, B_h, [src, #16]
+ ldp C_l, C_h, [src, #32]
+ ldp D_l, D_h, [src, #48]
+ stp A_l, A_h, [dst, #-64]!
+ stp B_l, B_h, [dst, #16]
+ stp C_l, C_h, [dst, #32]
+ stp D_l, D_h, [dst, #48]
+ tst count, #0x3f
+ b.ne L(tail63up)
+ RET
+
+ /* Critical loop. Start at a new Icache line boundary. Assuming
+ * 64 bytes per line this ensures the entire loop is in one line. */
+ .p2align 6
+L(mov_body_large_up):
+ /* There are at least 128 bytes to move. */
+ ldp A_l, A_h, [src, #-16]
+ ldp B_l, B_h, [src, #-32]
+ ldp C_l, C_h, [src, #-48]
+ ldp D_l, D_h, [src, #-64]!
+1:
+ stp A_l, A_h, [dst, #-16]
+ ldp A_l, A_h, [src, #-16]
+ stp B_l, B_h, [dst, #-32]
+ ldp B_l, B_h, [src, #-32]
+ stp C_l, C_h, [dst, #-48]
+ ldp C_l, C_h, [src, #-48]
+ stp D_l, D_h, [dst, #-64]!
+ ldp D_l, D_h, [src, #-64]!
+ subs count, count, #64
+ b.ge 1b
+ stp A_l, A_h, [dst, #-16]
+ stp B_l, B_h, [dst, #-32]
+ stp C_l, C_h, [dst, #-48]
+ stp D_l, D_h, [dst, #-64]!
+ tst count, #0x3f
+ b.ne L(tail63up)
+ RET
+
+L(downwards):
+ /* For a downwards move we can safely use memcpy provided that
+ * DST is more than 16 bytes away from SRC. */
+ sub tmp1, src, #16
+ cmp dstin, tmp1
+ b.ls memcpy /* May overlap, but not critically. */
+
+ mov dst, dstin /* Preserve DSTIN for return value. */
+ cmp count, #64
+ b.ge L(mov_not_short_down)
+
+ /* Deal with small moves quickly by dropping straight into the
+ * exit block. */
+L(tail63down):
+ /* Move up to 48 bytes of data. At this point we only need the
+ * bottom 6 bits of count to be accurate. */
+ ands tmp1, count, #0x30
+ b.eq L(tail15down)
+ add dst, dst, tmp1
+ add src, src, tmp1
+ cmp tmp1w, #0x20
+ b.eq 1f
+ b.lt 2f
+ ldp A_l, A_h, [src, #-48]
+ stp A_l, A_h, [dst, #-48]
+1:
+ ldp A_l, A_h, [src, #-32]
+ stp A_l, A_h, [dst, #-32]
+2:
+ ldp A_l, A_h, [src, #-16]
+ stp A_l, A_h, [dst, #-16]
+L(tail15down):
+ /* Move up to 15 bytes of data. Does not assume additional data
+ being moved. */
+ tbz count, #3, 1f
+ ldr tmp1, [src], #8
+ str tmp1, [dst], #8
+1:
+ tbz count, #2, 1f
+ ldr tmp1w, [src], #4
+ str tmp1w, [dst], #4
+1:
+ tbz count, #1, 1f
+ ldrh tmp1w, [src], #2
+ strh tmp1w, [dst], #2
+1:
+ tbz count, #0, 1f
+ ldrb tmp1w, [src]
+ strb tmp1w, [dst]
+1:
+ RET
+
+L(mov_not_short_down):
+ /* We don't much care about the alignment of DST, but we want SRC
+ * to be 128-bit (16 byte) aligned so that we don't cross cache line
+ * boundaries on both loads and stores. */
+ neg tmp2, src
+ ands tmp2, tmp2, #15 /* Bytes to reach alignment. */
+ b.eq 2f
+ sub count, count, tmp2
+ /* Move enough data to reach alignment; unlike memcpy, we have to
+ * be aware of the overlap, which means we can't move data twice. */
+ tbz tmp2, #3, 1f
+ ldr tmp1, [src], #8
+ str tmp1, [dst], #8
+1:
+ tbz tmp2, #2, 1f
+ ldr tmp1w, [src], #4
+ str tmp1w, [dst], #4
+1:
+ tbz tmp2, #1, 1f
+ ldrh tmp1w, [src], #2
+ strh tmp1w, [dst], #2
+1:
+ tbz tmp2, #0, 1f
+ ldrb tmp1w, [src], #1
+ strb tmp1w, [dst], #1
+1:
+
+ /* There may be less than 63 bytes to go now. */
+ cmp count, #63
+ b.le L(tail63down)
+2:
+ subs count, count, #128
+ b.ge L(mov_body_large_down)
+ /* Less than 128 bytes to move, so handle 64 here and then jump
+ * to the tail. */
+ ldp A_l, A_h, [src]
+ ldp B_l, B_h, [src, #16]
+ ldp C_l, C_h, [src, #32]
+ ldp D_l, D_h, [src, #48]
+ stp A_l, A_h, [dst]
+ stp B_l, B_h, [dst, #16]
+ stp C_l, C_h, [dst, #32]
+ stp D_l, D_h, [dst, #48]
+ tst count, #0x3f
+ add src, src, #64
+ add dst, dst, #64
+ b.ne L(tail63down)
+ RET
+
+ /* Critical loop. Start at a new cache line boundary. Assuming
+ * 64 bytes per line this ensures the entire loop is in one line. */
+ .p2align 6
+L(mov_body_large_down):
+ /* There are at least 128 bytes to move. */
+ ldp A_l, A_h, [src, #0]
+ sub dst, dst, #16 /* Pre-bias. */
+ ldp B_l, B_h, [src, #16]
+ ldp C_l, C_h, [src, #32]
+ ldp D_l, D_h, [src, #48]! /* src += 64 - Pre-bias. */
+1:
+ stp A_l, A_h, [dst, #16]
+ ldp A_l, A_h, [src, #16]
+ stp B_l, B_h, [dst, #32]
+ ldp B_l, B_h, [src, #32]
+ stp C_l, C_h, [dst, #48]
+ ldp C_l, C_h, [src, #48]
+ stp D_l, D_h, [dst, #64]!
+ ldp D_l, D_h, [src, #64]!
+ subs count, count, #64
+ b.ge 1b
+ stp A_l, A_h, [dst, #16]
+ stp B_l, B_h, [dst, #32]
+ stp C_l, C_h, [dst, #48]
+ stp D_l, D_h, [dst, #64]
+ add src, src, #16
+ add dst, dst, #64 + 16
+ tst count, #0x3f
+ b.ne L(tail63down)
+ RET
+END (memmove)
+
+libc_hidden_builtin_def (memmove)
diff --git a/libc/ports/sysdeps/aarch64/memset.S b/libc/ports/sysdeps/aarch64/memset.S
new file mode 100644
index 000000000..f96f6a6ba
--- /dev/null
+++ b/libc/ports/sysdeps/aarch64/memset.S
@@ -0,0 +1,229 @@
+/* Copyright (C) 2012-2013 Free Software Foundation, Inc.
+
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Assumptions:
+ *
+ * ARMv8-a, AArch64
+ * Unaligned accesses
+ *
+ */
+
+#include <sysdep.h>
+
+/* By default we assume that the DC instruction can be used to zero
+ data blocks more efficiently. In some circumstances this might be
+ unsafe, for example in an asymmetric multiprocessor environment with
+ different DC clear lengths (neither the upper nor lower lengths are
+ safe to use). The feature can be disabled by defining DONT_USE_DC.
+
+ If code may be run in a virtualized environment, then define
+ MAYBE_VIRT. This will cause the code to cache the system register
+ values rather than re-reading them each call. */
+
+#define dstin x0
+#define val w1
+#define count x2
+#define tmp1 x3
+#define tmp1w w3
+#define tmp2 x4
+#define tmp2w w4
+#define zva_len_x x5
+#define zva_len w5
+#define zva_bits_x x6
+
+#define A_l x7
+#define A_lw w7
+#define dst x8
+#define tmp3w w9
+
+ENTRY_ALIGN (__memset, 6)
+
+ mov dst, dstin /* Preserve return value. */
+ ands A_lw, val, #255
+#ifndef DONT_USE_DC
+ b.eq L(zero_mem)
+#endif
+ orr A_lw, A_lw, A_lw, lsl #8
+ orr A_lw, A_lw, A_lw, lsl #16
+ orr A_l, A_l, A_l, lsl #32
+L(tail_maybe_long):
+ cmp count, #64
+ b.ge L(not_short)
+L(tail_maybe_tiny):
+ cmp count, #15
+ b.le L(tail15tiny)
+L(tail63):
+ ands tmp1, count, #0x30
+ b.eq L(tail15)
+ add dst, dst, tmp1
+ cmp tmp1w, #0x20
+ b.eq 1f
+ b.lt 2f
+ stp A_l, A_l, [dst, #-48]
+1:
+ stp A_l, A_l, [dst, #-32]
+2:
+ stp A_l, A_l, [dst, #-16]
+
+L(tail15):
+ and count, count, #15
+ add dst, dst, count
+ stp A_l, A_l, [dst, #-16] /* Repeat some/all of last store. */
+ RET
+
+L(tail15tiny):
+ /* Set up to 15 bytes. Does not assume earlier memory
+ being set. */
+ tbz count, #3, 1f
+ str A_l, [dst], #8
+1:
+ tbz count, #2, 1f
+ str A_lw, [dst], #4
+1:
+ tbz count, #1, 1f
+ strh A_lw, [dst], #2
+1:
+ tbz count, #0, 1f
+ strb A_lw, [dst]
+1:
+ RET
+
+ /* Critical loop. Start at a new cache line boundary. Assuming
+ * 64 bytes per line, this ensures the entire loop is in one line. */
+ .p2align 6
+L(not_short):
+ neg tmp2, dst
+ ands tmp2, tmp2, #15
+ b.eq 2f
+ /* Bring DST to 128-bit (16-byte) alignment. We know that there's
+ * more than that to set, so we simply store 16 bytes and advance by
+ * the amount required to reach alignment. */
+ sub count, count, tmp2
+ stp A_l, A_l, [dst]
+ add dst, dst, tmp2
+ /* There may be less than 63 bytes to go now. */
+ cmp count, #63
+ b.le L(tail63)
+2:
+ sub dst, dst, #16 /* Pre-bias. */
+ sub count, count, #64
+1:
+ stp A_l, A_l, [dst, #16]
+ stp A_l, A_l, [dst, #32]
+ stp A_l, A_l, [dst, #48]
+ stp A_l, A_l, [dst, #64]!
+ subs count, count, #64
+ b.ge 1b
+ tst count, #0x3f
+ add dst, dst, #16
+ b.ne L(tail63)
+ RET
+
+#ifndef DONT_USE_DC
+ /* For zeroing memory, check to see if we can use the ZVA feature to
+ * zero entire 'cache' lines. */
+L(zero_mem):
+ mov A_l, #0
+ cmp count, #63
+ b.le L(tail_maybe_tiny)
+ neg tmp2, dst
+ ands tmp2, tmp2, #15
+ b.eq 1f
+ sub count, count, tmp2
+ stp A_l, A_l, [dst]
+ add dst, dst, tmp2
+ cmp count, #63
+ b.le L(tail63)
+1:
+ /* For zeroing small amounts of memory, it's not worth setting up
+ * the line-clear code. */
+ cmp count, #128
+ b.lt L(not_short)
+#ifdef MAYBE_VIRT
+ /* For efficiency when virtualized, we cache the ZVA capability. */
+ adrp tmp2, L(cache_clear)
+ ldr zva_len, [tmp2, #:lo12:L(cache_clear)]
+ tbnz zva_len, #31, L(not_short)
+ cbnz zva_len, L(zero_by_line)
+ mrs tmp1, dczid_el0
+ tbz tmp1, #4, 1f
+ /* ZVA not available. Remember this for next time. */
+ mov zva_len, #~0
+ str zva_len, [tmp2, #:lo12:L(cache_clear)]
+ b L(not_short)
+1:
+ mov tmp3w, #4
+ and zva_len, tmp1w, #15 /* Safety: other bits reserved. */
+ lsl zva_len, tmp3w, zva_len
+ str zva_len, [tmp2, #:lo12:L(cache_clear)]
+#else
+ mrs tmp1, dczid_el0
+ tbnz tmp1, #4, L(not_short)
+ mov tmp3w, #4
+ and zva_len, tmp1w, #15 /* Safety: other bits reserved. */
+ lsl zva_len, tmp3w, zva_len
+#endif
+
+L(zero_by_line):
+ /* Compute how far we need to go to become suitably aligned. We're
+ * already at quad-word alignment. */
+ cmp count, zva_len_x
+ b.lt L(not_short) /* Not enough to reach alignment. */
+ sub zva_bits_x, zva_len_x, #1
+ neg tmp2, dst
+ ands tmp2, tmp2, zva_bits_x
+ b.eq 1f /* Already aligned. */
+ /* Not aligned, check that there's enough to copy after alignment. */
+ sub tmp1, count, tmp2
+ cmp tmp1, #64
+ ccmp tmp1, zva_len_x, #8, ge /* NZCV=0b1000 */
+ b.lt L(not_short)
+ /* We know that there's at least 64 bytes to zero and that it's safe
+ * to overrun by 64 bytes. */
+ mov count, tmp1
+2:
+ stp A_l, A_l, [dst]
+ stp A_l, A_l, [dst, #16]
+ stp A_l, A_l, [dst, #32]
+ subs tmp2, tmp2, #64
+ stp A_l, A_l, [dst, #48]
+ add dst, dst, #64
+ b.ge 2b
+ /* We've overrun a bit, so adjust dst downwards. */
+ add dst, dst, tmp2
+1:
+ sub count, count, zva_len_x
+3:
+ dc zva, dst
+ add dst, dst, zva_len_x
+ subs count, count, zva_len_x
+ b.ge 3b
+ ands count, count, zva_bits_x
+ b.ne L(tail_maybe_long)
+ RET
+#ifdef MAYBE_VIRT
+ .bss
+ .p2align 2
+L(cache_clear):
+ .space 4
+#endif
+#endif /* DONT_USE_DC */
+
+END (__memset)
+weak_alias (__memset, memset)
+libc_hidden_builtin_def (memset)
diff --git a/libc/ports/sysdeps/aarch64/strcmp.S b/libc/ports/sysdeps/aarch64/strcmp.S
new file mode 100644
index 000000000..fa4705c8b
--- /dev/null
+++ b/libc/ports/sysdeps/aarch64/strcmp.S
@@ -0,0 +1,155 @@
+/* Copyright (C) 2012-2013 Free Software Foundation, Inc.
+
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Assumptions:
+ *
+ * ARMv8-a, AArch64
+ */
+
+#include <sysdep.h>
+
+#define REP8_01 0x0101010101010101
+#define REP8_7f 0x7f7f7f7f7f7f7f7f
+#define REP8_80 0x8080808080808080
+
+/* Parameters and result. */
+#define src1 x0
+#define src2 x1
+#define result x0
+
+/* Internal variables. */
+#define data1 x2
+#define data1w w2
+#define data2 x3
+#define data2w w3
+#define has_nul x4
+#define diff x5
+#define syndrome x6
+#define tmp1 x7
+#define tmp2 x8
+#define tmp3 x9
+#define zeroones x10
+#define pos x11
+
+ /* Start of performance-critical section -- one 64B cache line. */
+ENTRY_ALIGN(strcmp, 6)
+
+ eor tmp1, src1, src2
+ mov zeroones, #REP8_01
+ tst tmp1, #7
+ b.ne L(misaligned8)
+ ands tmp1, src1, #7
+ b.ne L(mutual_align)
+ /* NUL detection works on the principle that (X - 1) & (~X) & 0x80
+ (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
+ can be done in parallel across the entire word. */
+L(loop_aligned):
+ ldr data1, [src1], #8
+ ldr data2, [src2], #8
+L(start_realigned):
+ sub tmp1, data1, zeroones
+ orr tmp2, data1, #REP8_7f
+ eor diff, data1, data2 /* Non-zero if differences found. */
+ bic has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */
+ orr syndrome, diff, has_nul
+ cbz syndrome, L(loop_aligned)
+ /* End of performance-critical section -- one 64B cache line. */
+
+#ifndef __AARCH64EB__
+ rev syndrome, syndrome
+ rev data1, data1
+ /* The MS-non-zero bit of the syndrome marks either the first bit
+ that is different, or the top bit of the first zero byte.
+ Shifting left now will bring the critical information into the
+ top bits. */
+ clz pos, syndrome
+ rev data2, data2
+ lsl data1, data1, pos
+ lsl data2, data2, pos
+ /* But we need to zero-extend (char is unsigned) the value and then
+ perform a signed 32-bit subtraction. */
+ lsr data1, data1, #56
+ sub result, data1, data2, lsr #56
+ RET
+#else
+ /* For big-endian we cannot use the trick with the syndrome value
+ as carry-propagation can corrupt the upper bits if the trailing
+ bytes in the string contain 0x01. */
+ /* However, if there is no NUL byte in the dword, we can generate
+ the result directly. We can't just subtract the bytes as the
+ MSB might be significant. */
+ cbnz has_nul, 1f
+ cmp data1, data2
+ cset result, ne
+ cneg result, result, lo
+ RET
+1:
+ /* Re-compute the NUL-byte detection, using a byte-reversed value. */
+ rev tmp3, data1
+ sub tmp1, tmp3, zeroones
+ orr tmp2, tmp3, #REP8_7f
+ bic has_nul, tmp1, tmp2
+ rev has_nul, has_nul
+ orr syndrome, diff, has_nul
+ clz pos, syndrome
+ /* The MS-non-zero bit of the syndrome marks either the first bit
+ that is different, or the top bit of the first zero byte.
+ Shifting left now will bring the critical information into the
+ top bits. */
+ lsl data1, data1, pos
+ lsl data2, data2, pos
+ /* But we need to zero-extend (char is unsigned) the value and then
+ perform a signed 32-bit subtraction. */
+ lsr data1, data1, #56
+ sub result, data1, data2, lsr #56
+ RET
+#endif
+
+L(mutual_align):
+ /* Sources are mutually aligned, but are not currently at an
+ alignment boundary. Round down the addresses and then mask off
+ the bytes that preceed the start point. */
+ bic src1, src1, #7
+ bic src2, src2, #7
+ lsl tmp1, tmp1, #3 /* Bytes beyond alignment -> bits. */
+ ldr data1, [src1], #8
+ neg tmp1, tmp1 /* Bits to alignment -64. */
+ ldr data2, [src2], #8
+ mov tmp2, #~0
+#ifdef __AARCH64EB__
+ /* Big-endian. Early bytes are at MSB. */
+ lsl tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */
+#else
+ /* Little-endian. Early bytes are at LSB. */
+ lsr tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */
+#endif
+ orr data1, data1, tmp2
+ orr data2, data2, tmp2
+ b L(start_realigned)
+
+L(misaligned8):
+ /* We can do better than this. */
+ ldrb data1w, [src1], #1
+ ldrb data2w, [src2], #1
+ cmp data1w, #1
+ ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */
+ b.eq L(misaligned8)
+ sub result, data1, data2
+ RET
+END(strcmp)
+libc_hidden_builtin_def (strcmp)
diff --git a/libc/ports/sysdeps/aarch64/strlen.S b/libc/ports/sysdeps/aarch64/strlen.S
new file mode 100644
index 000000000..ba05009c6
--- /dev/null
+++ b/libc/ports/sysdeps/aarch64/strlen.S
@@ -0,0 +1,117 @@
+/* Copyright (C) 2012-2013 Free Software Foundation, Inc.
+
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+/* Assumptions:
+ *
+ * ARMv8-a, AArch64
+ */
+
+/* Arguments and results. */
+#define srcin x0
+#define len x0
+
+/* Locals and temporaries. */
+#define src x1
+#define data1 x2
+#define data2 x3
+#define data2a x4
+#define has_nul1 x5
+#define has_nul2 x6
+#define tmp1 x7
+#define tmp2 x8
+#define tmp3 x9
+#define tmp4 x10
+#define zeroones x11
+#define pos x12
+
+#define REP8_01 0x0101010101010101
+#define REP8_7f 0x7f7f7f7f7f7f7f7f
+#define REP8_80 0x8080808080808080
+
+ /* Start of critial section -- keep to one 64Byte cache line. */
+ENTRY_ALIGN (strlen, 6)
+ mov zeroones, #REP8_01
+ bic src, srcin, #15
+ ands tmp1, srcin, #15
+ b.ne L(misaligned)
+ /* NUL detection works on the principle that (X - 1) & (~X) & 0x80
+ (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
+ can be done in parallel across the entire word. */
+ /* The inner loop deals with two Dwords at a time. This has a
+ slightly higher start-up cost, but we should win quite quickly,
+ especially on cores with a high number of issue slots per
+ cycle, as we get much better parallelism out of the operations. */
+L(loop):
+ ldp data1, data2, [src], #16
+L(realigned):
+ sub tmp1, data1, zeroones
+ orr tmp2, data1, #REP8_7f
+ sub tmp3, data2, zeroones
+ orr tmp4, data2, #REP8_7f
+ bic has_nul1, tmp1, tmp2
+ bics has_nul2, tmp3, tmp4
+ ccmp has_nul1, #0, #0, eq /* NZCV = 0000 */
+ b.eq L(loop)
+ /* End of critical section -- keep to one 64Byte cache line. */
+
+ sub len, src, srcin
+ cbz has_nul1, L(nul_in_data2)
+#ifdef __AARCH64EB__
+ mov data2, data1
+#endif
+ sub len, len, #8
+ mov has_nul2, has_nul1
+L(nul_in_data2):
+#ifdef __AARCH64EB__
+ /* For big-endian, carry propagation (if the final byte in the
+ string is 0x01) means we cannot use has_nul directly. The
+ easiest way to get the correct byte is to byte-swap the data
+ and calculate the syndrome a second time. */
+ rev data2, data2
+ sub tmp1, data2, zeroones
+ orr tmp2, data2, #REP8_7f
+ bic has_nul2, tmp1, tmp2
+#endif
+ sub len, len, #8
+ rev has_nul2, has_nul2
+ clz pos, has_nul2
+ add len, len, pos, lsr #3 /* Bits to bytes. */
+ RET
+
+L(misaligned):
+ cmp tmp1, #8
+ neg tmp1, tmp1
+ ldp data1, data2, [src], #16
+ lsl tmp1, tmp1, #3 /* Bytes beyond alignment -> bits. */
+ mov tmp2, #~0
+#ifdef __AARCH64EB__
+ /* Big-endian. Early bytes are at MSB. */
+ lsl tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */
+#else
+ /* Little-endian. Early bytes are at LSB. */
+ lsr tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */
+#endif
+ orr data1, data1, tmp2
+ orr data2a, data2, tmp2
+ csinv data1, data1, xzr, le
+ csel data2, data2, data2a, le
+ b L(realigned)
+END (strlen)
+libc_hidden_builtin_def (strlen)
diff --git a/libc/ports/sysdeps/aarch64/sysdep.h b/libc/ports/sysdeps/aarch64/sysdep.h
index d9469b8e6..6b75ada14 100644
--- a/libc/ports/sysdeps/aarch64/sysdep.h
+++ b/libc/ports/sysdeps/aarch64/sysdep.h
@@ -25,24 +25,33 @@
#define ASM_SIZE_DIRECTIVE(name) .size name,.-name
/* Define an entry point visible from C. */
-#define ENTRY(name) \
- .globl C_SYMBOL_NAME(name); \
- .type C_SYMBOL_NAME(name),%function; \
- .align 4; \
- C_LABEL(name) \
- cfi_startproc; \
+#define ENTRY(name) \
+ .globl C_SYMBOL_NAME(name); \
+ .type C_SYMBOL_NAME(name),%function; \
+ .align 4; \
+ C_LABEL(name) \
+ cfi_startproc; \
+ CALL_MCOUNT
+
+/* Define an entry point visible from C. */
+#define ENTRY_ALIGN(name, align) \
+ .globl C_SYMBOL_NAME(name); \
+ .type C_SYMBOL_NAME(name),%function; \
+ .p2align align; \
+ C_LABEL(name) \
+ cfi_startproc; \
CALL_MCOUNT
#undef END
-#define END(name) \
- cfi_endproc; \
+#define END(name) \
+ cfi_endproc; \
ASM_SIZE_DIRECTIVE(name)
/* If compiled for profiling, call `mcount' at the start of each function. */
#ifdef PROF
-# define CALL_MCOUNT \
- str x30, [sp, #-16]!; \
- bl mcount; \
+# define CALL_MCOUNT \
+ str x30, [sp, #-16]!; \
+ bl mcount; \
ldr x30, [sp], #16 ;
#else
# define CALL_MCOUNT /* Do nothing. */
diff --git a/libc/ports/sysdeps/arm/Makefile b/libc/ports/sysdeps/arm/Makefile
index 355f5b309..7c19398ff 100644
--- a/libc/ports/sysdeps/arm/Makefile
+++ b/libc/ports/sysdeps/arm/Makefile
@@ -39,11 +39,6 @@ endif
ifeq ($(subdir),debug)
CFLAGS-backtrace.c += -funwind-tables
-CFLAGS-tst-backtrace2.c += -funwind-tables
-CFLAGS-tst-backtrace3.c += -funwind-tables
-CFLAGS-tst-backtrace4.c += -funwind-tables
-CFLAGS-tst-backtrace5.c += -funwind-tables
-CFLAGS-tst-backtrace6.c += -funwind-tables
endif
ifeq ($(subdir),math)
diff --git a/libc/ports/sysdeps/hppa/fpu/fpu_control.h b/libc/ports/sysdeps/hppa/fpu/fpu_control.h
index 7aa16c9dd..5cac3344d 100644
--- a/libc/ports/sysdeps/hppa/fpu/fpu_control.h
+++ b/libc/ports/sysdeps/hppa/fpu/fpu_control.h
@@ -44,7 +44,7 @@ typedef unsigned int fpu_control_t;
/* Macros for accessing the hardware control word. */
#define _FPU_GETCW(cw) \
({ \
- union { unsigned long long __fpreg; unsigned int __halfreg[2]; } __fullfp; \
+ union { __extension__ unsigned long long __fpreg; unsigned int __halfreg[2]; } __fullfp; \
/* Get the current status word. */ \
__asm__ ("fstd %%fr0,0(%1)\n\t" \
"fldd 0(%1),%%fr0\n\t" \
@@ -54,7 +54,7 @@ typedef unsigned int fpu_control_t;
#define _FPU_SETCW(cw) \
({ \
- union { unsigned long long __fpreg; unsigned int __halfreg[2]; } __fullfp; \
+ union { __extension__ unsigned long long __fpreg; unsigned int __halfreg[2]; } __fullfp; \
__fullfp.__halfreg[0] = cw; \
__asm__ ("fldd 0(%1),%%fr0\n\t" \
: : "m" (__fullfp.__fpreg), "r" (__fullfp.__fpreg) : "%r0" ); \
diff --git a/libc/ports/sysdeps/m68k/bits/byteswap.h b/libc/ports/sysdeps/m68k/bits/byteswap.h
index 4e4dd2341..9f0a7b707 100644
--- a/libc/ports/sysdeps/m68k/bits/byteswap.h
+++ b/libc/ports/sysdeps/m68k/bits/byteswap.h
@@ -74,6 +74,7 @@ __bswap_32 (unsigned int __bsx)
| (((x) & 0x00000000000000ffull) << 56))
/* Swap bytes in 64 bit value. */
+__extension__
static __inline unsigned long long
__bswap_64 (unsigned long long __bsx)
{
diff --git a/libc/ports/sysdeps/mips/Makefile b/libc/ports/sysdeps/mips/Makefile
index d87b2c406..a1526998e 100644
--- a/libc/ports/sysdeps/mips/Makefile
+++ b/libc/ports/sysdeps/mips/Makefile
@@ -13,11 +13,6 @@ endif
ifeq ($(subdir),debug)
CFLAGS-backtrace.c += -funwind-tables
-CFLAGS-tst-backtrace2.c += -funwind-tables
-CFLAGS-tst-backtrace3.c += -funwind-tables
-CFLAGS-tst-backtrace4.c += -funwind-tables
-CFLAGS-tst-backtrace5.c += -funwind-tables
-CFLAGS-tst-backtrace6.c += -funwind-tables
endif
ifeq ($(subdir),csu)
diff --git a/libc/ports/sysdeps/unix/sysv/linux/hppa/bits/ipc.h b/libc/ports/sysdeps/unix/sysv/linux/hppa/bits/ipc.h
index bc07c1fa3..04a75e2e8 100644
--- a/libc/ports/sysdeps/unix/sysv/linux/hppa/bits/ipc.h
+++ b/libc/ports/sysdeps/unix/sysv/linux/hppa/bits/ipc.h
@@ -57,6 +57,6 @@ struct ipc_perm
#endif
unsigned short int __seq; /* Sequence number. */
unsigned int __pad3;
- unsigned long long int __unused1;
- unsigned long long int __unused2;
+ __extension__ unsigned long long int __unused1;
+ __extension__ unsigned long long int __unused2;
};
diff --git a/libc/ports/sysdeps/unix/sysv/linux/mips/bits/sigcontext.h b/libc/ports/sysdeps/unix/sysv/linux/mips/bits/sigcontext.h
index 085a00d04..f3c5180b8 100644
--- a/libc/ports/sysdeps/unix/sysv/linux/mips/bits/sigcontext.h
+++ b/libc/ports/sysdeps/unix/sysv/linux/mips/bits/sigcontext.h
@@ -39,16 +39,16 @@
struct sigcontext {
unsigned int sc_regmask;
unsigned int sc_status;
- unsigned long long sc_pc;
- unsigned long long sc_regs[32];
- unsigned long long sc_fpregs[32];
+ __extension__ unsigned long long sc_pc;
+ __extension__ unsigned long long sc_regs[32];
+ __extension__ unsigned long long sc_fpregs[32];
unsigned int sc_ownedfp;
unsigned int sc_fpc_csr;
unsigned int sc_fpc_eir;
unsigned int sc_used_math;
unsigned int sc_dsp;
- unsigned long long sc_mdhi;
- unsigned long long sc_mdlo;
+ __extension__ unsigned long long sc_mdhi;
+ __extension__ unsigned long long sc_mdlo;
unsigned long sc_hi1;
unsigned long sc_lo1;
unsigned long sc_hi2;
@@ -61,17 +61,17 @@ struct sigcontext {
/* This structure changed in 2.6.12-rc4 when DSP support was added. */
struct sigcontext {
- unsigned long long sc_regs[32];
- unsigned long long sc_fpregs[32];
- unsigned long long sc_mdhi;
- unsigned long long sc_hi1;
- unsigned long long sc_hi2;
- unsigned long long sc_hi3;
- unsigned long long sc_mdlo;
- unsigned long long sc_lo1;
- unsigned long long sc_lo2;
- unsigned long long sc_lo3;
- unsigned long long sc_pc;
+ __extension__ unsigned long long sc_regs[32];
+ __extension__ unsigned long long sc_fpregs[32];
+ __extension__ unsigned long long sc_mdhi;
+ __extension__ unsigned long long sc_hi1;
+ __extension__ unsigned long long sc_hi2;
+ __extension__ unsigned long long sc_hi3;
+ __extension__ unsigned long long sc_mdlo;
+ __extension__ unsigned long long sc_lo1;
+ __extension__ unsigned long long sc_lo2;
+ __extension__ unsigned long long sc_lo3;
+ __extension__ unsigned long long sc_pc;
unsigned int sc_fpc_csr;
unsigned int sc_used_math;
unsigned int sc_dsp;
diff --git a/libc/ports/sysdeps/unix/sysv/linux/tile/tilegx/configure b/libc/ports/sysdeps/unix/sysv/linux/tile/tilegx/configure
new file mode 100644
index 000000000..bfa30f6dc
--- /dev/null
+++ b/libc/ports/sysdeps/unix/sysv/linux/tile/tilegx/configure
@@ -0,0 +1,4 @@
+# This file is generated from configure.in by Autoconf. DO NOT EDIT!
+ # Local configure fragment for sysdeps/unix/sysv/linux/tile/tilegx
+
+ldd_rewrite_script=$dir/ldd-rewrite.sed
diff --git a/libc/ports/sysdeps/unix/sysv/linux/tile/tilegx/configure.in b/libc/ports/sysdeps/unix/sysv/linux/tile/tilegx/configure.in
new file mode 100644
index 000000000..87d86bd4c
--- /dev/null
+++ b/libc/ports/sysdeps/unix/sysv/linux/tile/tilegx/configure.in
@@ -0,0 +1,4 @@
+GLIBC_PROVIDES dnl See aclocal.m4 in the top level source directory.
+# Local configure fragment for sysdeps/unix/sysv/linux/tile/tilegx
+
+ldd_rewrite_script=$dir/ldd-rewrite.sed
diff --git a/libc/ports/sysdeps/unix/sysv/linux/tile/tilegx/ldd-rewrite.sed b/libc/ports/sysdeps/unix/sysv/linux/tile/tilegx/ldd-rewrite.sed
new file mode 100644
index 000000000..8b0bb691c
--- /dev/null
+++ b/libc/ports/sysdeps/unix/sysv/linux/tile/tilegx/ldd-rewrite.sed
@@ -0,0 +1 @@
+s_^\(RTLDLIST=\)\(.*lib\)\(\|32\)\(/[^/]*\.so\.[0-9.]*\)[ ]*$_\1"\2\4 \232\4"_