131 files changed, 1701 insertions, 5256 deletions
diff --git a/libc/sysdeps/generic/bp-sym.h b/libc/sysdeps/generic/bp-sym.h
deleted file mode 100644
index 089912a68..000000000
--- a/libc/sysdeps/generic/bp-sym.h
+++ /dev/null
@@ -1,25 +0,0 @@
-/* Bounded-pointer symbol modifier.
-   Copyright (C) 2000-2013 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-   Contributed by Greg McGary <greg@mcgary.org>
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#define BP_SYM(name) _BP_SYM (name)
-#if __BOUNDED_POINTERS__
-# define _BP_SYM(name) __BP_##name
-#else
-# define _BP_SYM(name) name
-#endif
diff --git a/libc/sysdeps/generic/ldsodefs.h b/libc/sysdeps/generic/ldsodefs.h
index 1781574a9..b89691a97 100644
--- a/libc/sysdeps/generic/ldsodefs.h
+++ b/libc/sysdeps/generic/ldsodefs.h
@@ -1018,6 +1018,13 @@ extern struct link_map *_dl_find_dso_for_object (const ElfW(Addr) addr)
      internal_function;
 rtld_hidden_proto (_dl_find_dso_for_object)
 
+/* Initialization which is normally done by the dynamic linker.  */
+extern void _dl_non_dynamic_init (void) internal_function;
+
+/* Used by static binaries to check the auxiliary vector.  */
+extern void _dl_aux_init (ElfW(auxv_t) *av) internal_function;
+
+
 __END_DECLS
 
 #endif /* ldsodefs.h */
diff --git a/libc/sysdeps/i386/fpu/libm-test-ulps b/libc/sysdeps/i386/fpu/libm-test-ulps
index fd0180ffd..6186c99af 100644
--- a/libc/sysdeps/i386/fpu/libm-test-ulps
+++ b/libc/sysdeps/i386/fpu/libm-test-ulps
@@ -2475,6 +2475,9 @@ ldouble: 2
 Test "j0 (0x1.d7ce3ap+107) == 2.775523647291230802651040996274861694514e-17":
 float: 1
 ifloat: 1
+Test "j0 (0x1p16382) == -1.2193782500509000574176799046642541129387e-2466":
+ildouble: 1
+ldouble: 1
 Test "j0 (10.0) == -0.245935764451348335197760862485328754":
 double: 3
 float: 1
@@ -2508,6 +2511,9 @@ ldouble: 1
 Test "j1 (0x1.ff00000000002p+840) == 1.846591691699331493194965158699937660696e-127":
 double: 1
 idouble: 1
+Test "j1 (0x1p16382) == 8.0839224448726336195866026476176740513439e-2467":
+ildouble: 1
+ldouble: 1
 Test "j1 (10.0) == 0.0434727461688614366697487680258592883":
 double: 2
 float: 1
@@ -3285,6 +3291,9 @@ idouble: 1
 Test "y0 (0x1p-80) == -3.5375500319532942168707373066828113573541e+1":
 double: 1
 idouble: 1
+Test "y0 (0x1p16382) == 8.0839224448726336195866026476176740513439e-2467":
+ildouble: 1
+ldouble: 1
 Test "y0 (1.0) == 0.0882569642156769579829267660235151628":
 double: 2
 float: 1
@@ -3329,6 +3338,9 @@ ldouble: 1
 Test "y1 (0x1p-10) == -6.5190099301063115047395187618929589514382e+02":
 float: 1
 ifloat: 1
+Test "y1 (0x1p16382) == 1.2193782500509000574176799046642541129387e-2466":
+ildouble: 1
+ldouble: 1
 Test "y1 (1.0) == -0.781212821300288716547150000047964821":
 double: 1
 idouble: 1
diff --git a/libc/sysdeps/ieee754/bits/nan.h b/libc/sysdeps/ieee754/bits/nan.h
index 935271a7c..41f47ba09 100644
--- a/libc/sysdeps/ieee754/bits/nan.h
+++ b/libc/sysdeps/ieee754/bits/nan.h
@@ -39,14 +39,14 @@
 # include <endian.h>
 
 # if __BYTE_ORDER == __BIG_ENDIAN
-#  define __nan_bytes		{ 0x7f, 0xc0, 0, 0 }
+#  define __qnan_bytes		{ 0x7f, 0xc0, 0, 0 }
 # endif
 # if __BYTE_ORDER == __LITTLE_ENDIAN
-#  define __nan_bytes		{ 0, 0, 0xc0, 0x7f }
+#  define __qnan_bytes		{ 0, 0, 0xc0, 0x7f }
 # endif
 
-static union { unsigned char __c[4]; float __d; } __nan_union
-  __attribute__ ((__unused__)) = { __nan_bytes };
-# define NAN	(__nan_union.__d)
+static union { unsigned char __c[4]; float __d; } __qnan_union
+  __attribute__ ((__unused__)) = { __qnan_bytes };
+# define NAN	(__qnan_union.__d)
 
 #endif	/* GCC.  */
diff --git a/libc/sysdeps/ieee754/dbl-64/e_j0.c b/libc/sysdeps/ieee754/dbl-64/e_j0.c
index f393a762b..d641a0914 100644
--- a/libc/sysdeps/ieee754/dbl-64/e_j0.c
+++ b/libc/sysdeps/ieee754/dbl-64/e_j0.c
@@ -293,7 +293,8 @@ pzero(double x)
 	int32_t ix;
 	GET_HIGH_WORD(ix,x);
 	ix &= 0x7fffffff;
-	if(ix>=0x40200000)     {p = pR8; q= pS8;}
+	if (ix>=0x41b00000)    {return one;}
+	else if(ix>=0x40200000){p = pR8; q= pS8;}
 	else if(ix>=0x40122E8B){p = pR5; q= pS5;}
 	else if(ix>=0x4006DB6D){p = pR3; q= pS3;}
 	else if(ix>=0x40000000){p = pR2; q= pS2;}
@@ -400,7 +401,8 @@ qzero(double x)
 	int32_t ix;
 	GET_HIGH_WORD(ix,x);
 	ix &= 0x7fffffff;
-	if(ix>=0x40200000)     {p = qR8; q= qS8;}
+	if (ix>=0x41b00000)    {return -.125/x;}
+	else if(ix>=0x40200000){p = qR8; q= qS8;}
 	else if(ix>=0x40122E8B){p = qR5; q= qS5;}
 	else if(ix>=0x4006DB6D){p = qR3; q= qS3;}
 	else if(ix>=0x40000000){p = qR2; q= qS2;}
diff --git a/libc/sysdeps/ieee754/dbl-64/e_j1.c b/libc/sysdeps/ieee754/dbl-64/e_j1.c
index cba4d46b1..cca5f20b4 100644
--- a/libc/sysdeps/ieee754/dbl-64/e_j1.c
+++ b/libc/sysdeps/ieee754/dbl-64/e_j1.c
@@ -291,7 +291,8 @@ pone(double x)
 	int32_t ix;
 	GET_HIGH_WORD(ix,x);
 	ix &= 0x7fffffff;
-	if(ix>=0x40200000)     {p = pr8; q= ps8;}
+	if (ix>=0x41b00000)    {return one;}
+	else if(ix>=0x40200000){p = pr8; q= ps8;}
 	else if(ix>=0x40122E8B){p = pr5; q= ps5;}
 	else if(ix>=0x4006DB6D){p = pr3; q= ps3;}
 	else if(ix>=0x40000000){p = pr2; q= ps2;}
@@ -399,7 +400,8 @@ qone(double x)
 	int32_t ix;
 	GET_HIGH_WORD(ix,x);
 	ix &= 0x7fffffff;
-	if(ix>=0x40200000)     {p = qr8; q= qs8;}
+	if (ix>=0x41b00000)    {return .375/x;}
+	else if(ix>=0x40200000){p = qr8; q= qs8;}
 	else if(ix>=0x40122E8B){p = qr5; q= qs5;}
 	else if(ix>=0x4006DB6D){p = qr3; q= qs3;}
 	else if(ix>=0x40000000){p = qr2; q= qs2;}
diff --git a/libc/sysdeps/ieee754/dbl-64/mpa.c b/libc/sysdeps/ieee754/dbl-64/mpa.c
index 8fc2626f7..076647654 100644
--- a/libc/sysdeps/ieee754/dbl-64/mpa.c
+++ b/libc/sysdeps/ieee754/dbl-64/mpa.c
@@ -611,6 +611,7 @@ __sub (const mp_no *x, const mp_no *y, mp_no *z, int p)
     }
 }
 
+#ifndef NO__MUL
 /* Multiply *X and *Y and store result in *Z.  X and Y may overlap but not X
    and Z or Y and Z.  For P in [1, 2, 3], the exact result is truncated to P
    digits.  In case P > 3 the error is bounded by 1.001 ULP.  */
@@ -761,7 +762,9 @@ __mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
   EZ = e;
   Z[0] = X[0] * Y[0];
 }
+#endif
 
+#ifndef NO__SQR
 /* Square *X and store result in *Y.  X and Y may not overlap.  For P in
    [1, 2, 3], the exact result is truncated to P digits.  In case P > 3 the
    error is bounded by 1.001 ULP.  This is a faster special case of
@@ -862,6 +865,7 @@ __sqr (const mp_no *x, mp_no *y, int p)
 
   EY = e;
 }
+#endif
 
 /* Invert *X and store in *Y.  Relative error bound:
    - For P = 2: 1.001 * R ^ (1 - P)
diff --git a/libc/sysdeps/ieee754/dbl-64/slowexp.c b/libc/sysdeps/ieee754/dbl-64/slowexp.c
index c423fc311..8f353f634 100644
--- a/libc/sysdeps/ieee754/dbl-64/slowexp.c
+++ b/libc/sysdeps/ieee754/dbl-64/slowexp.c
@@ -27,20 +27,23 @@
 /*Converting from double precision to Multi-precision and calculating     */
 /* e^x                                                                    */
 /**************************************************************************/
-#include "mpa.h"
 #include <math_private.h>
 
+#ifndef USE_LONG_DOUBLE_FOR_MP
+# include "mpa.h"
+void __mpexp (mp_no *x, mp_no *y, int p);
+#endif
+
 #ifndef SECTION
 # define SECTION
 #endif
 
-void __mpexp (mp_no *x, mp_no *y, int p);
-
 /*Converting from double precision to Multi-precision and calculating  e^x */
 double
 SECTION
 __slowexp (double x)
 {
+#ifndef USE_LONG_DOUBLE_FOR_MP
   double w, z, res, eps = 3.0e-26;
   int p;
   mp_no mpx, mpy, mpz, mpw, mpeps, mpcor;
@@ -66,4 +69,7 @@ __slowexp (double x)
       __mp_dbl (&mpy, &res, p);
       return res;
     }
+#else
+  return (double) __ieee754_expl((long double)x);
+#endif
 }
diff --git a/libc/sysdeps/ieee754/dbl-64/slowpow.c b/libc/sysdeps/ieee754/dbl-64/slowpow.c
index cccc7e32c..a379728b1 100644
--- a/libc/sysdeps/ieee754/dbl-64/slowpow.c
+++ b/libc/sysdeps/ieee754/dbl-64/slowpow.c
@@ -59,6 +59,23 @@ __slowpow (double x, double y, double z)
   if (res >= 0)
     return res;
 
+  /* Compute pow as long double.  This is currently only used by powerpc, where
+     one may get 106 bits of accuracy.  */
+#ifdef USE_LONG_DOUBLE_FOR_MP
+  long double ldw, ldz, ldpp;
+  static const long double ldeps = 0x4.0p-96;
+
+  ldz = __ieee754_logl ((long double) x);
+  ldw = (long double) y *ldz;
+  ldpp = __ieee754_expl (ldw);
+  res = (double) (ldpp + ldeps);
+  res1 = (double) (ldpp - ldeps);
+
+  /* Return the result if it is accurate enough.  */
+  if (res == res1)
+    return res;
+#endif
+
   /* Or else, calculate using multiple precision.  P = 10 implies accuracy of
      240 bits accuracy, since MP_NO has a radix of 2^24.  */
   p = 10;
diff --git a/libc/sysdeps/ieee754/ldbl-128/e_j0l.c b/libc/sysdeps/ieee754/ldbl-128/e_j0l.c
index 1b1828958..9e7880c49 100644
--- a/libc/sysdeps/ieee754/ldbl-128/e_j0l.c
+++ b/libc/sysdeps/ieee754/ldbl-128/e_j0l.c
@@ -700,6 +700,25 @@ __ieee754_j0l (long double x)
       return p;
     }
 
+  /* X = x - pi/4
+     cos(X) = cos(x) cos(pi/4) + sin(x) sin(pi/4)
+     = 1/sqrt(2) * (cos(x) + sin(x))
+     sin(X) = sin(x) cos(pi/4) - cos(x) sin(pi/4)
+     = 1/sqrt(2) * (sin(x) - cos(x))
+     sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x))
+     cf. Fdlibm.  */
+  __sincosl (xx, &s, &c);
+  ss = s - c;
+  cc = s + c;
+  z = -__cosl (xx + xx);
+  if ((s * c) < 0)
+    cc = z / ss;
+  else
+    ss = z / cc;
+
+  if (xx > 0x1p256L)
+    return ONEOSQPI * cc / __ieee754_sqrtl (xx);
+
   xinv = 1.0L / xx;
   z = xinv * xinv;
   if (xinv <= 0.25)
@@ -761,21 +780,6 @@ __ieee754_j0l (long double x)
   p = 1.0L + z * p;
   q = z * xinv * q;
   q = q - 0.125L * xinv;
-  /* X = x - pi/4
-     cos(X) = cos(x) cos(pi/4) + sin(x) sin(pi/4)
-     = 1/sqrt(2) * (cos(x) + sin(x))
-     sin(X) = sin(x) cos(pi/4) - cos(x) sin(pi/4)
-     = 1/sqrt(2) * (sin(x) - cos(x))
-     sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x))
-     cf. Fdlibm.  */
-  __sincosl (xx, &s, &c);
-  ss = s - c;
-  cc = s + c;
-  z = -__cosl (xx + xx);
-  if ((s * c) < 0)
-    cc = z / ss;
-  else
-    ss = z / cc;
   z = ONEOSQPI * (p * cc - q * ss) / __ieee754_sqrtl (xx);
   return z;
 }
@@ -843,6 +847,25 @@ long double
       return p;
     }
 
+  /* X = x - pi/4
+     cos(X) = cos(x) cos(pi/4) + sin(x) sin(pi/4)
+     = 1/sqrt(2) * (cos(x) + sin(x))
+     sin(X) = sin(x) cos(pi/4) - cos(x) sin(pi/4)
+     = 1/sqrt(2) * (sin(x) - cos(x))
+     sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x))
+     cf. Fdlibm.  */
+  __sincosl (x, &s, &c);
+  ss = s - c;
+  cc = s + c;
+  z = -__cosl (x + x);
+  if ((s * c) < 0)
+    cc = z / ss;
+  else
+    ss = z / cc;
+
+  if (xx > 0x1p256L)
+    return ONEOSQPI * ss / __ieee754_sqrtl (x);
+
   xinv = 1.0L / xx;
   z = xinv * xinv;
   if (xinv <= 0.25)
@@ -904,21 +927,6 @@ long double
   p = 1.0L + z * p;
   q = z * xinv * q;
   q = q - 0.125L * xinv;
-  /* X = x - pi/4
-     cos(X) = cos(x) cos(pi/4) + sin(x) sin(pi/4)
-     = 1/sqrt(2) * (cos(x) + sin(x))
-     sin(X) = sin(x) cos(pi/4) - cos(x) sin(pi/4)
-     = 1/sqrt(2) * (sin(x) - cos(x))
-     sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x))
-     cf. Fdlibm.  */
-  __sincosl (x, &s, &c);
-  ss = s - c;
-  cc = s + c;
-  z = -__cosl (x + x);
-  if ((s * c) < 0)
-    cc = z / ss;
-  else
-    ss = z / cc;
   z = ONEOSQPI * (p * ss + q * cc) / __ieee754_sqrtl (x);
   return z;
 }
diff --git a/libc/sysdeps/ieee754/ldbl-128/e_j1l.c b/libc/sysdeps/ieee754/ldbl-128/e_j1l.c
index f16343b26..95e01a39c 100644
--- a/libc/sysdeps/ieee754/ldbl-128/e_j1l.c
+++ b/libc/sysdeps/ieee754/ldbl-128/e_j1l.c
@@ -706,6 +706,29 @@ __ieee754_j1l (long double x)
       return p;
     }
 
+  /* X = x - 3 pi/4
+     cos(X) = cos(x) cos(3 pi/4) + sin(x) sin(3 pi/4)
+     = 1/sqrt(2) * (-cos(x) + sin(x))
+     sin(X) = sin(x) cos(3 pi/4) - cos(x) sin(3 pi/4)
+     = -1/sqrt(2) * (sin(x) + cos(x))
+     cf. Fdlibm.  */
+  __sincosl (xx, &s, &c);
+  ss = -s - c;
+  cc = s - c;
+  z = __cosl (xx + xx);
+  if ((s * c) > 0)
+    cc = z / ss;
+  else
+    ss = z / cc;
+
+  if (xx > 0x1p256L)
+    {
+      z = ONEOSQPI * cc / __ieee754_sqrtl (xx);
+      if (x < 0)
+	z = -z;
+      return z;
+    }
+
   xinv = 1.0L / xx;
   z = xinv * xinv;
   if (xinv <= 0.25)
@@ -767,20 +790,6 @@ __ieee754_j1l (long double x)
   p = 1.0L + z * p;
   q = z * q;
   q = q * xinv + 0.375L * xinv;
-  /* X = x - 3 pi/4
-     cos(X) = cos(x) cos(3 pi/4) + sin(x) sin(3 pi/4)
-     = 1/sqrt(2) * (-cos(x) + sin(x))
-     sin(X) = sin(x) cos(3 pi/4) - cos(x) sin(3 pi/4)
-     = -1/sqrt(2) * (sin(x) + cos(x))
-     cf. Fdlibm.  */
-  __sincosl (xx, &s, &c);
-  ss = -s - c;
-  cc = s - c;
-  z = __cosl (xx + xx);
-  if ((s * c) > 0)
-    cc = z / ss;
-  else
-    ss = z / cc;
   z = ONEOSQPI * (p * cc - q * ss) / __ieee754_sqrtl (xx);
   if (x < 0)
     z = -z;
@@ -850,6 +859,24 @@ __ieee754_y1l (long double x)
       return p;
     }
 
+  /* X = x - 3 pi/4
+     cos(X) = cos(x) cos(3 pi/4) + sin(x) sin(3 pi/4)
+     = 1/sqrt(2) * (-cos(x) + sin(x))
+     sin(X) = sin(x) cos(3 pi/4) - cos(x) sin(3 pi/4)
+     = -1/sqrt(2) * (sin(x) + cos(x))
+     cf. Fdlibm.  */
+  __sincosl (xx, &s, &c);
+  ss = -s - c;
+  cc = s - c;
+  z = __cosl (xx + xx);
+  if ((s * c) > 0)
+    cc = z / ss;
+  else
+    ss = z / cc;
+
+  if (xx > 0x1p256L)
+    return ONEOSQPI * ss / __ieee754_sqrtl (xx);
+
   xinv = 1.0L / xx;
   z = xinv * xinv;
   if (xinv <= 0.25)
@@ -911,20 +938,6 @@ __ieee754_y1l (long double x)
   p = 1.0L + z * p;
   q = z * q;
   q = q * xinv + 0.375L * xinv;
-  /* X = x - 3 pi/4
-     cos(X) = cos(x) cos(3 pi/4) + sin(x) sin(3 pi/4)
-     = 1/sqrt(2) * (-cos(x) + sin(x))
-     sin(X) = sin(x) cos(3 pi/4) - cos(x) sin(3 pi/4)
-     = -1/sqrt(2) * (sin(x) + cos(x))
-     cf. Fdlibm.  */
-  __sincosl (xx, &s, &c);
-  ss = -s - c;
-  cc = s - c;
-  z = __cosl (xx + xx);
-  if ((s * c) > 0)
-    cc = z / ss;
-  else
-    ss = z / cc;
   z = ONEOSQPI * (p * ss + q * cc) / __ieee754_sqrtl (xx);
   return z;
 }
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/e_acoshl.c b/libc/sysdeps/ieee754/ldbl-128ibm/e_acoshl.c
index 117bd0f05..abc78a35b 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/e_acoshl.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/e_acoshl.c
@@ -52,7 +52,7 @@ __ieee754_acoshl(long double x)
 	    return __ieee754_logl(2.0*x-one/(x+__ieee754_sqrtl(t-one)));
 	} else {			/* 1<x<2 */
 	    t = x-one;
-	    return __log1p(t+__sqrtl(2.0*t+t*t));
+	    return __log1p(t+__ieee754_sqrtl(2.0*t+t*t));
 	}
 }
 strong_alias (__ieee754_acoshl, __acoshl_finite)
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/math_ldbl.h b/libc/sysdeps/ieee754/ldbl-128ibm/math_ldbl.h
index be9ac71cb..1cce1fc4d 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/math_ldbl.h
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/math_ldbl.h
@@ -125,7 +125,7 @@ ldbl_insert_mantissa (int sign, int exp, int64_t hi64, u_int64_t lo64)
 /* Handy utility functions to pack/unpack/cononicalize and find the nearbyint
    of long double implemented as double double.  */
 static inline long double
-ldbl_pack (double a, double aa)
+default_ldbl_pack (double a, double aa)
 {
   union ibm_extended_long_double u;
   u.dd[0] = a;
@@ -134,7 +134,7 @@ ldbl_pack (double a, double aa)
 }
 
 static inline void
-ldbl_unpack (long double l, double *a, double *aa)
+default_ldbl_unpack (long double l, double *a, double *aa)
 {
   union ibm_extended_long_double u;
   u.d = l;
@@ -142,6 +142,12 @@ ldbl_unpack (long double l, double *a, double *aa)
   *aa = u.dd[1];
 }
 
+#ifndef ldbl_pack
+# define ldbl_pack   default_ldbl_pack
+#endif
+#ifndef ldbl_unpack
+# define ldbl_unpack default_ldbl_unpack
+#endif
 
 /* Convert a finite long double to canonical form.
    Does not handle +/-Inf properly.  */
diff --git a/libc/sysdeps/ieee754/ldbl-96/e_j1l.c b/libc/sysdeps/ieee754/ldbl-96/e_j1l.c
index 785c0b067..4c13018ae 100644
--- a/libc/sysdeps/ieee754/ldbl-96/e_j1l.c
+++ b/libc/sysdeps/ieee754/ldbl-96/e_j1l.c
@@ -203,7 +203,7 @@ __ieee754_y1l (long double x)
       __sincosl (x, &s, &c);
       ss = -s - c;
       cc = s - c;
-      if (ix < 0x7fe00000)
+      if (ix < 0x7ffe)
 	{			/* make sure x+x not overflow */
 	  z = __cosl (x + x);
 	  if ((s * c) > zero)
diff --git a/libc/sysdeps/init_array/crti.S b/libc/sysdeps/init_array/crti.S
new file mode 100644
index 000000000..0a6e9fd95
--- /dev/null
+++ b/libc/sysdeps/init_array/crti.S
@@ -0,0 +1,13 @@
+/* Dummy crti file.
+
+   In this configuration, crti.o and crtn.o are both empty because the
+   .init_array/.fini_array sections are used exclusively.
+
+   Older ports cannot use this because even if the linker used to
+   build libc itself has .init_array support, we don't want to produce
+   a crt[in].o that presume a linker that new will be used to link
+   other things later.
+
+   But new configurations without compatibility concerns for
+   toolchains without .init_array support can use this to avoid the
+   superfluous .init and .fini boilerplate code.  */
diff --git a/libc/sysdeps/init_array/crtn.S b/libc/sysdeps/init_array/crtn.S
new file mode 100644
index 000000000..6f70e7716
--- /dev/null
+++ b/libc/sysdeps/init_array/crtn.S
@@ -0,0 +1,13 @@
+/* Dummy crtn file.
+
+   In this configuration, crti.o and crtn.o are both empty because the
+   .init_array/.fini_array sections are used exclusively.
+
+   Older ports cannot use this because even if the linker used to
+   build libc itself has .init_array support, we don't want to produce
+   a crt[in].o that presume a linker that new will be used to link
+   other things later.
+
+   But new configurations without compatibility concerns for
+   toolchains without .init_array support can use this to avoid the
+   superfluous .init and .fini boilerplate code.  */
diff --git a/libc/sysdeps/init_array/elf-init.c b/libc/sysdeps/init_array/elf-init.c
new file mode 100644
index 000000000..c6467aac8
--- /dev/null
+++ b/libc/sysdeps/init_array/elf-init.c
@@ -0,0 +1,37 @@
+/* Startup support for ELF initializers/finalizers in the main executable.
+   Copyright (C) 2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file with other
+   programs, and to distribute those programs without any restriction
+   coming from the use of this file. (The GNU Lesser General Public
+   License restrictions do apply in other respects; for example, they
+   cover modification of the file, and distribution when not linked
+   into another program.)
+
+   Note that people who make modified versions of this file are not
+   obligated to grant this special exception for their modified
+   versions; it is their choice whether to do so. The GNU Lesser
+   General Public License gives permission to release a modified
+   version without this exception; this exception also makes it
+   possible to release a modified version which carries forward this
+   exception.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define NO_INITFINI
+#include <csu/elf-init.c>
diff --git a/libc/sysdeps/init_array/gmon-start.c b/libc/sysdeps/init_array/gmon-start.c
new file mode 100644
index 000000000..6f2d6dc8b
--- /dev/null
+++ b/libc/sysdeps/init_array/gmon-start.c
@@ -0,0 +1,41 @@
+/* gmon startup hook using .preinit_array.
+   Copyright (C) 2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file with other
+   programs, and to distribute those programs without any restriction
+   coming from the use of this file.  (The GNU Lesser General Public
+   License restrictions do apply in other respects; for example, they
+   cover modification of the file, and distribution when not linked
+   into another program.)
+
+   Note that people who make modified versions of this file are not
+   obligated to grant this special exception for their modified
+   versions; it is their choice whether to do so.  The GNU Lesser
+   General Public License gives permission to release a modified
+   version without this exception; this exception also makes it
+   possible to release a modified version which carries forward this
+   exception.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Instead of defining __gmon_start__ globally in gcrt1.o, we make it
+   static and just put a pointer to it into the .preinit_array section.  */
+
+#define GMON_START_ARRAY_SECTION	".preinit_array"
+
+#include <csu/gmon-start.c>
diff --git a/libc/sysdeps/powerpc/Implies b/libc/sysdeps/powerpc/Implies
index 7ccf9a7c4..78dba9510 100644
--- a/libc/sysdeps/powerpc/Implies
+++ b/libc/sysdeps/powerpc/Implies
@@ -1,4 +1,5 @@
 # On PowerPC we use the IBM extended long double format.
 ieee754/ldbl-128ibm
+ieee754/ldbl-opt
 ieee754/dbl-64
 ieee754/flt-32
diff --git a/libc/sysdeps/powerpc/bits/fenv.h b/libc/sysdeps/powerpc/bits/fenv.h
index 1054ba142..07cd3c8e5 100644
--- a/libc/sysdeps/powerpc/bits/fenv.h
+++ b/libc/sysdeps/powerpc/bits/fenv.h
@@ -123,7 +123,7 @@ enum
        these bits is set.  Note, though, that you can't disable or
        enable these exceptions individually.  */
 
-    /* Operation with SNaN. */
+    /* Operation with a sNaN.  */
     FE_INVALID_SNAN =
 # define FE_INVALID_SNAN	(1 << (31 - 7))
       FE_INVALID_SNAN,
@@ -148,7 +148,7 @@ enum
 # define FE_INVALID_IMZ	(1 << (31 - 11))
       FE_INVALID_IMZ,
 
-    /* Comparison with NaN or SNaN.  */
+    /* Comparison with a NaN.  */
     FE_INVALID_COMPARE =
 # define FE_INVALID_COMPARE	(1 << (31 - 12))
       FE_INVALID_COMPARE,
diff --git a/libc/sysdeps/powerpc/fpu/Makefile b/libc/sysdeps/powerpc/fpu/Makefile
index ffacf1a75..fda59f9fa 100644
--- a/libc/sysdeps/powerpc/fpu/Makefile
+++ b/libc/sysdeps/powerpc/fpu/Makefile
@@ -1,6 +1,5 @@
 ifeq ($(subdir),math)
 libm-support += fenv_const fe_nomask fe_mask t_sqrt
-libm-tests += test-powerpc-snan
 
 # libm needs ld.so to access dl_hwcap
 $(objpfx)libm.so: $(elfobjdir)/ld.so
diff --git a/libc/sysdeps/powerpc/fpu/fenv_libc.h b/libc/sysdeps/powerpc/fpu/fenv_libc.h
index abae2f3df..191095156 100644
--- a/libc/sysdeps/powerpc/fpu/fenv_libc.h
+++ b/libc/sysdeps/powerpc/fpu/fenv_libc.h
@@ -116,7 +116,7 @@ enum {
   FPSCR_UX,        /* underflow */
   FPSCR_ZX,        /* zero divide */
   FPSCR_XX,        /* inexact */
-  FPSCR_VXSNAN,    /* invalid operation for SNaN */
+  FPSCR_VXSNAN,    /* invalid operation for sNaN */
   FPSCR_VXISI,     /* invalid operation for Inf-Inf */
   FPSCR_VXIDI,     /* invalid operation for Inf/Inf */
   FPSCR_VXZDZ,     /* invalid operation for 0/0 */
@@ -152,7 +152,7 @@ enum {
 #endif /* _ARCH_PWR6 */
 
 /* This operation (i) sets the appropriate FPSCR bits for its
-   parameter, (ii) converts SNaN to the corresponding NaN, and (iii)
+   parameter, (ii) converts sNaN to the corresponding qNaN, and (iii)
    otherwise passes its parameter through unchanged (in particular, -0
    and +0 stay as they were).  The `obvious' way to do this is optimised
    out by gcc.  */
diff --git a/libc/sysdeps/powerpc/fpu/math_ldbl.h b/libc/sysdeps/powerpc/fpu/math_ldbl.h
index 20224e664..36378c023 100644
--- a/libc/sysdeps/powerpc/fpu/math_ldbl.h
+++ b/libc/sysdeps/powerpc/fpu/math_ldbl.h
@@ -2,132 +2,12 @@
 #error "Never use <math_ldbl.h> directly; include <math_private.h> instead."
 #endif
 
-#include <sysdeps/ieee754/ldbl-128/math_ldbl.h>
-#include <ieee754.h>
-  
-static inline void
-ldbl_extract_mantissa (int64_t *hi64, u_int64_t *lo64, int *exp, long double x)
-{
-  /* We have 105 bits of mantissa plus one implicit digit.  Since
-     106 bits are representable we use the first implicit digit for
-     the number before the decimal point and the second implicit bit
-     as bit 53 of the mantissa.  */
-  unsigned long long hi, lo;
-  int ediff;
-  union ibm_extended_long_double eldbl;
-  eldbl.d = x;
-  *exp = eldbl.ieee.exponent - IBM_EXTENDED_LONG_DOUBLE_BIAS;
-
-  lo = ((long long)eldbl.ieee.mantissa2 << 32) | eldbl.ieee.mantissa3;
-  hi = ((long long)eldbl.ieee.mantissa0 << 32) | eldbl.ieee.mantissa1;
-  /* If the lower double is not a denomal or zero then set the hidden
-     53rd bit.  */
-  if (eldbl.ieee.exponent2 > 0x001)
-    {
-      lo |= (1ULL << 52);
-      lo = lo << 7; /* pre-shift lo to match ieee854.  */
-      /* The lower double is normalized separately from the upper.  We
-	 may need to adjust the lower mantissa to reflect this.  */
-      ediff = eldbl.ieee.exponent - eldbl.ieee.exponent2;
-      if (ediff > 53)
-	lo = lo >> (ediff-53);
-    }
-  hi |= (1ULL << 52);
-  
-  if ((eldbl.ieee.negative != eldbl.ieee.negative2)
-      && ((eldbl.ieee.exponent2 != 0) && (lo != 0LL)))
-    {
-      hi--;
-      lo = (1ULL << 60) - lo;
-      if (hi < (1ULL << 52))
-	{
-	  /* we have a borrow from the hidden bit, so shift left 1.  */
-	  hi = (hi << 1) | (lo >> 59);
-	  lo = 0xfffffffffffffffLL & (lo << 1);
-	  *exp = *exp - 1;
-	}
-    }
-  *lo64 = (hi << 60) | lo;
-  *hi64 = hi >> 4;
-}
-
-static inline long double
-ldbl_insert_mantissa (int sign, int exp, int64_t hi64, u_int64_t lo64)
-{
-  union ibm_extended_long_double u;
-  unsigned long hidden2, lzcount;
-  unsigned long long hi, lo;
-
-  u.ieee.negative = sign;
-  u.ieee.negative2 = sign;
-  u.ieee.exponent = exp + IBM_EXTENDED_LONG_DOUBLE_BIAS;
-  u.ieee.exponent2 = exp-53 + IBM_EXTENDED_LONG_DOUBLE_BIAS;
-  /* Expect 113 bits (112 bits + hidden) right justified in two longs.
-     The low order 53 bits (52 + hidden) go into the lower double */ 
-  lo = (lo64 >> 7)& ((1ULL << 53) - 1);
-  hidden2 = (lo64 >> 59) &  1ULL;
-  /* The high order 53 bits (52 + hidden) go into the upper double */
-  hi = (lo64 >> 60) & ((1ULL << 11) - 1);
-  hi |= (hi64 << 4);
-
-  if (lo != 0LL)
-    {
-      /* hidden2 bit of low double controls rounding of the high double.
-	 If hidden2 is '1' then round up hi and adjust lo (2nd mantissa)
-	 plus change the sign of the low double to compensate.  */
-      if (hidden2)
-	{
-	  hi++;
-	  u.ieee.negative2 = !sign;
-	  lo = (1ULL << 53) - lo;
-	}
-      /* The hidden bit of the lo mantissa is zero so we need to
-	 normalize the it for the low double.  Shift it left until the
-	 hidden bit is '1' then adjust the 2nd exponent accordingly.  */ 
-
-      if (sizeof (lo) == sizeof (long))
-	lzcount = __builtin_clzl (lo);
-      else if ((lo >> 32) != 0)
-	lzcount = __builtin_clzl ((long) (lo >> 32));
-      else
-	lzcount = __builtin_clzl ((long) lo) + 32;
-      lzcount = lzcount - 11;
-      if (lzcount > 0)
-	{
-	  int expnt2 = u.ieee.exponent2 - lzcount;
-	  if (expnt2 >= 1)
-	    {
-	      /* Not denormal.  Normalize and set low exponent.  */
-	      lo = lo << lzcount;
-	      u.ieee.exponent2 = expnt2;
-	    }
-	  else
-	    {
-	      /* Is denormal.  */
-	      lo = lo << (lzcount + expnt2);
-	      u.ieee.exponent2 = 0;
-	    }
-	}
-    }
-  else
-    {
-      u.ieee.negative2 = 0;
-      u.ieee.exponent2 = 0;
-    }
-
-  u.ieee.mantissa3 = lo & ((1ULL << 32) - 1);
-  u.ieee.mantissa2 = (lo >> 32) & ((1ULL << 20) - 1);
-  u.ieee.mantissa1 = hi & ((1ULL << 32) - 1);
-  u.ieee.mantissa0 = (hi >> 32) & ((1ULL << 20) - 1);
-  return u.d;
-}
-  
-/* gcc generates disgusting code to pack and unpack long doubles.
-   This tells gcc that pack/unpack is really a nop.  We use fr1/fr2
-   because those are the regs used to pass/return a single
-   long double arg.  */
+/* GCC does not optimize the default ldbl_pack code to not spill register
+   in the stack. The following optimization tells gcc that pack/unpack
+   is really a nop.  We use fr1/fr2 because those are the regs used to
+   pass/return a single long double arg.  */
 static inline long double
-ldbl_pack (double a, double aa)
+ldbl_pack_ppc (double a, double aa)
 {
   register long double x __asm__ ("fr1");
   register double xh __asm__ ("fr1");
@@ -139,7 +19,7 @@ ldbl_pack (double a, double aa)
 }
 
 static inline void
-ldbl_unpack (long double l, double *a, double *aa)
+ldbl_unpack_ppc (long double l, double *a, double *aa)
 {
   register long double x __asm__ ("fr1");
   register double xh __asm__ ("fr1");
@@ -150,40 +30,7 @@ ldbl_unpack (long double l, double *a, double *aa)
   *aa = xl;
 }
 
+#define ldbl_pack   ldbl_pack_ppc
+#define ldbl_unpack ldbl_unpack_ppc
 
-/* Convert a finite long double to canonical form.
-   Does not handle +/-Inf properly.  */
-static inline void
-ldbl_canonicalize (double *a, double *aa)
-{
-  double xh, xl;
-
-  xh = *a + *aa;
-  xl = (*a - xh) + *aa;
-  *a = xh;
-  *aa = xl;
-}
-
-/* Simple inline nearbyint (double) function .
-   Only works in the default rounding mode
-   but is useful in long double rounding functions.  */
-static inline double
-ldbl_nearbyint (double a)
-{
-  double two52 = 0x10000000000000LL;
-
-  if (__builtin_expect ((__builtin_fabs (a) < two52), 1))
-    {
-      if (__builtin_expect ((a > 0.0), 1))
-	{
-	  a += two52;
-	  a -= two52;
-	}
-      else if (__builtin_expect ((a < 0.0), 1))
-	{
-	  a = two52 - a;
-	  a = -(a - two52);
-	}
-    }
-  return a;
-}
+#include <sysdeps/ieee754/ldbl-128ibm/math_ldbl.h>
diff --git a/libc/sysdeps/powerpc/fpu/test-powerpc-snan.c b/libc/sysdeps/powerpc/fpu/test-powerpc-snan.c
deleted file mode 100644
index e3bd47aba..000000000
--- a/libc/sysdeps/powerpc/fpu/test-powerpc-snan.c
+++ /dev/null
@@ -1,382 +0,0 @@
-/* Test Signalling NaN in isnan, isinf etc functions.
-   Copyright (C) 2008-2013 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-   Contributed by Andreas Jaeger <aj@suse.de>, 2005.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#define _GNU_SOURCE 1
-#include <stdio.h>
-#include <stdlib.h>
-#include <sys/time.h>
-#include <string.h>
-#include <math.h>
-#include <float.h>
-#include <fenv.h>
-#include <signal.h>
-#include <setjmp.h>
-#include <errno.h>
-
-int dest_offset;
-char *dest_address;
-double	value = 123.456;
-double	zero = 0.0;
-
-float SNANf;
-double SNAN;
-long double SNANl;
-
-static sigjmp_buf sigfpe_buf;
-
-void
-init_signaling_nan (void)
-{
-    union {
-	double _ld16;
-	double _d8;
-	unsigned int _ui4[4];
-	float _f4;
-    } nan_temp;
-    
-    nan_temp._ui4[0] = 0x7fa00000;
-    SNANf = nan_temp._f4;
-
-    nan_temp._ui4[0] = 0x7ff40000;
-    nan_temp._ui4[1] = 0x00000000;
-    SNAN = nan_temp._d8;
-
-    nan_temp._ui4[0] = 0x7ff40000;
-    nan_temp._ui4[1] = 0x00000000;
-    nan_temp._ui4[2] = 0x00000000;
-    nan_temp._ui4[3] = 0x00000000;
-    SNANl = nan_temp._ld16;
-}
-
-static float
-snan_float (void)
-{
-  return SNANf;
-}
-
-static double
-snan_double (void)
-{
-  return SNAN;
-}
-
-typedef long double ldouble;
-
-static ldouble
-snan_ldouble (void)
-{
-  return SNANl;
-}
-
-
-void
-myFPsighandler(int signal,
-             siginfo_t *info,
-             void *context)
-{
-  siglongjmp(sigfpe_buf, 0);
-}
-
-int
-set_sigaction_FP(void)
-{
-    struct sigaction sa;
-    /* register RT signal handler via sigaction */
-    sa.sa_flags = SA_SIGINFO;
-    sa.sa_sigaction = &myFPsighandler;
-    sigemptyset(&sa.sa_mask);
-    sigaction(SIGFPE, &sa, NULL);
-
-    return 0;
-}
-
-int
-remove_sigaction_FP(void)
-{
-    struct sigaction sa;
-    /* restore default RT signal handler via sigaction */
-    sa.sa_flags = SA_SIGINFO;
-    sa.sa_handler = SIG_DFL;
-    sigemptyset(&sa.sa_mask);
-    sigaction(SIGFPE, &sa, NULL);
-
-    return 0;
-}
-
-static int errors = 0;
-
-static void
-check (const char *testname, int result)
-{
-  if (!result) {
-    printf ("Failure: %s\n", testname);
-    errors++;
-  }
-}
-
-#define TEST_FUNC(NAME, FLOAT) \
-static void								      \
-NAME (void)								      \
-{									      \
-  /* Variables are declared volatile to forbid some compiler		      \
-     optimizations.  */							      \
-  volatile FLOAT Inf_var, NaN_var, zero_var, one_var, SNaN_var;		      \
-  fenv_t saved_fenv;							      \
-									      \
-  zero_var = 0.0;							      \
-  one_var = 1.0;							      \
-  NaN_var = zero_var / zero_var;					      \
-  SNaN_var = snan_##FLOAT ();						      \
-  Inf_var = one_var / zero_var;						      \
-									      \
-  (void) &zero_var;							      \
-  (void) &one_var;							      \
-  (void) &NaN_var;							      \
-  (void) &SNaN_var;							      \
-  (void) &Inf_var;							      \
-									      \
-  set_sigaction_FP ();							      \
-  fegetenv(&saved_fenv);						      \
-									      \
-  feclearexcept(FE_ALL_EXCEPT);						      \
-  feenableexcept (FE_ALL_EXCEPT);					      \
-  if (sigsetjmp(sigfpe_buf, 0))						      \
-    {									      \
-      printf (#FLOAT " isnan(NaN) raised SIGFPE\n");			      \
-      errors++;								      \
-    } else {								      \
-      check (#FLOAT " isnan (NaN)", isnan (NaN_var));			      \
-    }									      \
-									      \
-  feclearexcept(FE_ALL_EXCEPT);						      \
-  feenableexcept (FE_ALL_EXCEPT);					      \
-  if (sigsetjmp(sigfpe_buf, 0))						      \
-    {									      \
-      printf (#FLOAT " isnan(-NaN) raised SIGFPE\n");			      \
-      errors++;								      \
-    } else {								      \
-      check (#FLOAT " isnan (-NaN)", isnan (-NaN_var));			      \
-    }									      \
-									      \
-  feclearexcept(FE_ALL_EXCEPT);						      \
-  feenableexcept (FE_ALL_EXCEPT);					      \
-  if (sigsetjmp(sigfpe_buf, 0))						      \
-    {									      \
-      printf (#FLOAT " isnan(SNaN) raised SIGFPE\n");			      \
-      errors++;								      \
-    } else {								      \
-      check (#FLOAT " isnan (SNaN)", isnan (SNaN_var));			      \
-    }									      \
-									      \
-  feclearexcept(FE_ALL_EXCEPT);						      \
-  feenableexcept (FE_ALL_EXCEPT);					      \
-  if (sigsetjmp(sigfpe_buf, 0))						      \
-    {									      \
-      printf (#FLOAT " isnan(-SNaN) raised SIGFPE\n");			      \
-      errors++;								      \
-    } else {								      \
-      check (#FLOAT " isnan (-SNaN)", isnan (-SNaN_var));		      \
-    }									      \
-									      \
-  feclearexcept(FE_ALL_EXCEPT);						      \
-  feenableexcept (FE_ALL_EXCEPT);					      \
-  if (sigsetjmp(sigfpe_buf, 0))						      \
-    {									      \
-      printf (#FLOAT " isinf(NaN) raised SIGFPE\n");			      \
-      errors++;								      \
-    } else {								      \
-      check (#FLOAT " isinf (NaN)", !isinf (NaN_var));			      \
-    }									      \
-									      \
-  feclearexcept(FE_ALL_EXCEPT);						      \
-  feenableexcept (FE_ALL_EXCEPT);					      \
-  if (sigsetjmp(sigfpe_buf, 0))						      \
-    {									      \
-      printf (#FLOAT " isinf(-NaN) raised SIGFPE\n");			      \
-      errors++;								      \
-    } else {								      \
-      check (#FLOAT " isinf (-NaN)", !isinf (-NaN_var));		      \
-    }									      \
-									      \
-  feclearexcept(FE_ALL_EXCEPT);						      \
-  feenableexcept (FE_ALL_EXCEPT);					      \
-  if (sigsetjmp(sigfpe_buf, 0))						      \
-    {									      \
-      printf (#FLOAT " isinf(SNaN) raised SIGFPE\n");			      \
-      errors++;								      \
-    } else {								      \
-      check (#FLOAT " isinf (SNaN)", !isinf (SNaN_var));		      \
-    }									      \
-									      \
-  feclearexcept(FE_ALL_EXCEPT);						      \
-  feenableexcept (FE_ALL_EXCEPT);					      \
-  if (sigsetjmp(sigfpe_buf, 0))						      \
-    {									      \
-      printf (#FLOAT " isinf(-SNaN) raised SIGFPE\n");			      \
-      errors++;								      \
-    } else {								      \
-      check (#FLOAT " isinf (-SNaN)", !isinf (-SNaN_var));		      \
-    }									      \
-									      \
-  feclearexcept(FE_ALL_EXCEPT);						      \
-  feenableexcept (FE_ALL_EXCEPT);					      \
-  if (sigsetjmp(sigfpe_buf, 0))						      \
-    {									      \
-      printf (#FLOAT " isfinite(NaN) raised SIGFPE\n");			      \
-      errors++;								      \
-    } else {								      \
-      check (#FLOAT " isfinite (NaN)", !isfinite (NaN_var));		      \
-    }									      \
-									      \
-  feclearexcept(FE_ALL_EXCEPT);						      \
-  feenableexcept (FE_ALL_EXCEPT);					      \
-  if (sigsetjmp(sigfpe_buf, 0))						      \
-    {									      \
-      printf (#FLOAT " isfinite(-NaN) raised SIGFPE\n");		      \
-      errors++;								      \
-    } else {								      \
-      check (#FLOAT " isfinite (-NaN)", !isfinite (-NaN_var));		      \
-    }									      \
-									      \
-  feclearexcept(FE_ALL_EXCEPT);						      \
-  feenableexcept (FE_ALL_EXCEPT);					      \
-  if (sigsetjmp(sigfpe_buf, 0))						      \
-    {									      \
-      printf (#FLOAT " isfinite(SNaN) raised SIGFPE\n");		      \
-      errors++;								      \
-    } else {								      \
-      check (#FLOAT " isfinite (SNaN)", !isfinite (SNaN_var));		      \
-    }									      \
-									      \
-  feclearexcept(FE_ALL_EXCEPT);						      \
-  feenableexcept (FE_ALL_EXCEPT);					      \
-  if (sigsetjmp(sigfpe_buf, 0))						      \
-    {									      \
-      printf (#FLOAT " isfinite(-SNaN) raised SIGFPE\n");		      \
-      errors++;								      \
-    } else {								      \
-      check (#FLOAT " isfinite (-SNaN)", !isfinite (-SNaN_var));	      \
-    }									      \
-									      \
-  feclearexcept(FE_ALL_EXCEPT);						      \
-  feenableexcept (FE_ALL_EXCEPT);					      \
-  if (sigsetjmp(sigfpe_buf, 0))						      \
-    {									      \
-      printf (#FLOAT " isnormal(NaN) raised SIGFPE\n");			      \
-      errors++;								      \
-    } else {								      \
-      check (#FLOAT " isnormal (NaN)", !isnormal (NaN_var));		      \
-    }									      \
-									      \
-  feclearexcept(FE_ALL_EXCEPT);						      \
-  feenableexcept (FE_ALL_EXCEPT);					      \
-  if (sigsetjmp(sigfpe_buf, 0))						      \
-    {									      \
-      printf (#FLOAT " isnormal(-NaN) raised SIGFPE\n");		      \
-      errors++;								      \
-    } else {								      \
-      check (#FLOAT " isnormal (-NaN)", !isnormal (-NaN_var));		      \
-    }									      \
-									      \
-  feclearexcept(FE_ALL_EXCEPT);						      \
-  feenableexcept (FE_ALL_EXCEPT);					      \
-  if (sigsetjmp(sigfpe_buf, 0))						      \
-    {									      \
-      printf (#FLOAT " isnormal(SNaN) isnormal SIGFPE\n");		      \
-      errors++;								      \
-    } else {								      \
-      check (#FLOAT " isnormal (SNaN)", !isnormal (SNaN_var));		      \
-    }									      \
-									      \
-  feclearexcept(FE_ALL_EXCEPT);						      \
-  feenableexcept (FE_ALL_EXCEPT);					      \
-  if (sigsetjmp(sigfpe_buf, 0))						      \
-    {									      \
-      printf (#FLOAT " isnormal(-SNaN) raised SIGFPE\n");		      \
-      errors++;								      \
-    } else {								      \
-      check (#FLOAT " isnormal (-SNaN)", !isnormal (-SNaN_var));	      \
-    }									      \
-									      \
-  feclearexcept(FE_ALL_EXCEPT);						      \
-  feenableexcept (FE_ALL_EXCEPT);					      \
-  if (sigsetjmp(sigfpe_buf, 0))						      \
-    {									      \
-      printf (#FLOAT " fpclassify(NaN) raised SIGFPE\n");		      \
-      errors++;								      \
-    } else {								      \
-      check (#FLOAT " fpclassify (NaN)", (fpclassify (NaN_var)==FP_NAN));     \
-    }									      \
-									      \
-  feclearexcept(FE_ALL_EXCEPT);						      \
-  feenableexcept (FE_ALL_EXCEPT);					      \
-  if (sigsetjmp(sigfpe_buf, 0))						      \
-    {									      \
-      printf (#FLOAT " fpclassify(-NaN) raised SIGFPE\n");		      \
-      errors++;								      \
-    } else {								      \
-      check (#FLOAT " fpclassify (-NaN)", (fpclassify (-NaN_var)==FP_NAN));   \
-    }									      \
-									      \
-  feclearexcept(FE_ALL_EXCEPT);						      \
-  feenableexcept (FE_ALL_EXCEPT);					      \
-  if (sigsetjmp(sigfpe_buf, 0))						      \
-    {									      \
-      printf (#FLOAT " fpclassify(SNaN) isnormal SIGFPE\n");		      \
-      errors++;								      \
-    } else {								      \
-      check (#FLOAT " fpclassify (SNaN)", (fpclassify (SNaN_var)==FP_NAN));   \
-    }									      \
-									      \
-  feclearexcept(FE_ALL_EXCEPT);						      \
-  feenableexcept (FE_ALL_EXCEPT);					      \
-  if (sigsetjmp(sigfpe_buf, 0))						      \
-    {									      \
-      printf (#FLOAT " fpclassify(-SNaN) raised SIGFPE\n");		      \
-      errors++;								      \
-    } else {								      \
-      check (#FLOAT " fpclassify (-SNaN)", (fpclassify (-SNaN_var)==FP_NAN)); \
-    }									      \
-									      \
-  fesetenv(&saved_fenv); /* restore saved fenv */			      \
-  remove_sigaction_FP();						      \
-}
-
-TEST_FUNC (float_test, float)
-TEST_FUNC (double_test, double)
-#ifndef NO_LONG_DOUBLE
-TEST_FUNC (ldouble_test, ldouble)
-#endif
-
-static int
-do_test (void)
-{
-  init_signaling_nan();
-
-  float_test();
-  double_test();
-#ifndef NO_LONG_DOUBLE
-  ldouble_test();
-#endif
-
-  return errors != 0;
-}
-
-#define TEST_FUNCTION do_test ()
-#include "../test-skeleton.c"
diff --git a/libc/sysdeps/powerpc/powerpc32/power4/fpu/Makefile b/libc/sysdeps/powerpc/power4/fpu/Makefile
index f487ed601..e17d32f30 100644
--- a/libc/sysdeps/powerpc/powerpc32/power4/fpu/Makefile
+++ b/libc/sysdeps/powerpc/power4/fpu/Makefile
@@ -2,4 +2,6 @@
 
 ifeq ($(subdir),math)
 CFLAGS-mpa.c += --param max-unroll-times=4 -funroll-loops -fpeel-loops
+CPPFLAGS-slowpow.c += -DUSE_LONG_DOUBLE_FOR_MP=1
+CPPFLAGS-slowexp.c += -DUSE_LONG_DOUBLE_FOR_MP=1
 endif
diff --git a/libc/sysdeps/powerpc/power4/fpu/mpa.c b/libc/sysdeps/powerpc/power4/fpu/mpa.c
new file mode 100644
index 000000000..1858c9740
--- /dev/null
+++ b/libc/sysdeps/powerpc/power4/fpu/mpa.c
@@ -0,0 +1,214 @@
+
+/*
+ * IBM Accurate Mathematical Library
+ * written by International Business Machines Corp.
+ * Copyright (C) 2001-2013 Free Software Foundation, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/* Define __mul and __sqr and use the rest from generic code.  */
+#define NO__MUL
+#define NO__SQR
+
+#include <sysdeps/ieee754/dbl-64/mpa.c>
+
+/* Multiply *X and *Y and store result in *Z.  X and Y may overlap but not X
+   and Z or Y and Z.  For P in [1, 2, 3], the exact result is truncated to P
+   digits.  In case P > 3 the error is bounded by 1.001 ULP.  */
+void
+__mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
+{
+  long i, i1, i2, j, k, k2;
+  long p2 = p;
+  double u, zk, zk2;
+
+  /* Is z=0?  */
+  if (__glibc_unlikely (X[0] * Y[0] == ZERO))
+    {
+      Z[0] = ZERO;
+      return;
+    }
+
+  /* Multiply, add and carry */
+  k2 = (p2 < 3) ? p2 + p2 : p2 + 3;
+  zk = Z[k2] = ZERO;
+  for (k = k2; k > 1;)
+    {
+      if (k > p2)
+	{
+	  i1 = k - p2;
+	  i2 = p2 + 1;
+	}
+      else
+	{
+	  i1 = 1;
+	  i2 = k;
+	}
+#if 1
+      /* Rearrange this inner loop to allow the fmadd instructions to be
+         independent and execute in parallel on processors that have
+         dual symmetrical FP pipelines.  */
+      if (i1 < (i2 - 1))
+	{
+	  /* Make sure we have at least 2 iterations.  */
+	  if (((i2 - i1) & 1L) == 1L)
+	    {
+	      /* Handle the odd iterations case.  */
+	      zk2 = x->d[i2 - 1] * y->d[i1];
+	    }
+	  else
+	    zk2 = 0.0;
+	  /* Do two multiply/adds per loop iteration, using independent
+	     accumulators; zk and zk2.  */
+	  for (i = i1, j = i2 - 1; i < i2 - 1; i += 2, j -= 2)
+	    {
+	      zk += x->d[i] * y->d[j];
+	      zk2 += x->d[i + 1] * y->d[j - 1];
+	    }
+	  zk += zk2;		/* Final sum.  */
+	}
+      else
+	{
+	  /* Special case when iterations is 1.  */
+	  zk += x->d[i1] * y->d[i1];
+	}
+#else
+      /* The original code.  */
+      for (i = i1, j = i2 - 1; i < i2; i++, j--)
+	zk += X[i] * Y[j];
+#endif
+
+      u = (zk + CUTTER) - CUTTER;
+      if (u > zk)
+	u -= RADIX;
+      Z[k] = zk - u;
+      zk = u * RADIXI;
+      --k;
+    }
+  Z[k] = zk;
+
+  int e = EX + EY;
+  /* Is there a carry beyond the most significant digit?  */
+  if (Z[1] == ZERO)
+    {
+      for (i = 1; i <= p2; i++)
+	Z[i] = Z[i + 1];
+      e--;
+    }
+
+  EZ = e;
+  Z[0] = X[0] * Y[0];
+}
+
+/* Square *X and store result in *Y.  X and Y may not overlap.  For P in
+   [1, 2, 3], the exact result is truncated to P digits.  In case P > 3 the
+   error is bounded by 1.001 ULP.  This is a faster special case of
+   multiplication.  */
+void
+__sqr (const mp_no *x, mp_no *y, int p)
+{
+  long i, j, k, ip;
+  double u, yk;
+
+  /* Is z=0?  */
+  if (__glibc_unlikely (X[0] == ZERO))
+    {
+      Y[0] = ZERO;
+      return;
+    }
+
+  /* We need not iterate through all X's since it's pointless to
+     multiply zeroes.  */
+  for (ip = p; ip > 0; ip--)
+    if (X[ip] != ZERO)
+      break;
+
+  k = (__glibc_unlikely (p < 3)) ? p + p : p + 3;
+
+  while (k > 2 * ip + 1)
+    Y[k--] = ZERO;
+
+  yk = ZERO;
+
+  while (k > p)
+    {
+      double yk2 = 0.0;
+      long lim = k / 2;
+
+      if (k % 2 == 0)
+        {
+	  yk += X[lim] * X[lim];
+	  lim--;
+	}
+
+      /* In __mul, this loop (and the one within the next while loop) run
+         between a range to calculate the mantissa as follows:
+
+         Z[k] = X[k] * Y[n] + X[k+1] * Y[n-1] ... + X[n-1] * Y[k+1]
+		+ X[n] * Y[k]
+
+         For X == Y, we can get away with summing halfway and doubling the
+	 result.  For cases where the range size is even, the mid-point needs
+	 to be added separately (above).  */
+      for (i = k - p, j = p; i <= lim; i++, j--)
+	yk2 += X[i] * X[j];
+
+      yk += 2.0 * yk2;
+
+      u = (yk + CUTTER) - CUTTER;
+      if (u > yk)
+	u -= RADIX;
+      Y[k--] = yk - u;
+      yk = u * RADIXI;
+    }
+
+  while (k > 1)
+    {
+      double yk2 = 0.0;
+      long lim = k / 2;
+
+      if (k % 2 == 0)
+        {
+	  yk += X[lim] * X[lim];
+	  lim--;
+	}
+
+      /* Likewise for this loop.  */
+      for (i = 1, j = k - 1; i <= lim; i++, j--)
+	yk2 += X[i] * X[j];
+
+      yk += 2.0 * yk2;
+
+      u = (yk + CUTTER) - CUTTER;
+      if (u > yk)
+	u -= RADIX;
+      Y[k--] = yk - u;
+      yk = u * RADIXI;
+    }
+  Y[k] = yk;
+
+  /* Squares are always positive.  */
+  Y[0] = 1.0;
+
+  int e = EX * 2;
+  /* Is there a carry beyond the most significant digit?  */
+  if (__glibc_unlikely (Y[1] == ZERO))
+    {
+      for (i = 1; i <= p; i++)
+	Y[i] = Y[i + 1];
+      e--;
+    }
+  EY = e;
+}
diff --git a/libc/sysdeps/powerpc/powerpc32/power4/Implies b/libc/sysdeps/powerpc/powerpc32/power4/Implies
new file mode 100644
index 000000000..a372141bb
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/power4/Implies
@@ -0,0 +1,2 @@
+powerpc/power4/fpu
+powerpc/power4
diff --git a/libc/sysdeps/powerpc/powerpc32/power4/fpu/mpa.c b/libc/sysdeps/powerpc/powerpc32/power4/fpu/mpa.c
deleted file mode 100644
index b22664772..000000000
--- a/libc/sysdeps/powerpc/powerpc32/power4/fpu/mpa.c
+++ /dev/null
@@ -1,837 +0,0 @@
-
-/*
- * IBM Accurate Mathematical Library
- * written by International Business Machines Corp.
- * Copyright (C) 2001-2013 Free Software Foundation, Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- */
-/************************************************************************/
-/*  MODULE_NAME: mpa.c                                                  */
-/*                                                                      */
-/*  FUNCTIONS:                                                          */
-/*               mcr                                                    */
-/*               acr                                                    */
-/*               cpy                                                    */
-/*               norm                                                   */
-/*               denorm                                                 */
-/*               mp_dbl                                                 */
-/*               dbl_mp                                                 */
-/*               add_magnitudes                                         */
-/*               sub_magnitudes                                         */
-/*               add                                                    */
-/*               sub                                                    */
-/*               mul                                                    */
-/*               inv                                                    */
-/*               dvd                                                    */
-/*                                                                      */
-/* Arithmetic functions for multiple precision numbers.                 */
-/* Relative errors are bounded                                          */
-/************************************************************************/
-
-
-#include "endian.h"
-#include "mpa.h"
-#include <sys/param.h>
-
-const mp_no mpone = {1, {1.0, 1.0}};
-const mp_no mptwo = {1, {1.0, 2.0}};
-
-/* Compare mantissa of two multiple precision numbers regardless of the sign
-   and exponent of the numbers.  */
-static int
-mcr (const mp_no *x, const mp_no *y, int p)
-{
-  long i;
-  long p2 = p;
-  for (i = 1; i <= p2; i++)
-    {
-      if (X[i] == Y[i])
-	continue;
-      else if (X[i] > Y[i])
-	return 1;
-      else
-	return -1;
-    }
-  return 0;
-}
-
-/* Compare the absolute values of two multiple precision numbers.  */
-int
-__acr (const mp_no *x, const mp_no *y, int p)
-{
-  long i;
-
-  if (X[0] == ZERO)
-    {
-      if (Y[0] == ZERO)
-	i = 0;
-      else
-	i = -1;
-    }
-  else if (Y[0] == ZERO)
-    i = 1;
-  else
-    {
-      if (EX > EY)
-	i = 1;
-      else if (EX < EY)
-	i = -1;
-      else
-	i = mcr (x, y, p);
-    }
-
-  return i;
-}
-
-/* Copy multiple precision number X into Y.  They could be the same
-   number.  */
-void
-__cpy (const mp_no *x, mp_no *y, int p)
-{
-  long i;
-
-  EY = EX;
-  for (i = 0; i <= p; i++)
-    Y[i] = X[i];
-}
-
-/* Convert a multiple precision number *X into a double precision
-   number *Y, normalized case  (|x| >= 2**(-1022))).  */
-static void
-norm (const mp_no *x, double *y, int p)
-{
-#define R RADIXI
-  long i;
-  double a, c, u, v, z[5];
-  if (p < 5)
-    {
-      if (p == 1)
-	c = X[1];
-      else if (p == 2)
-	c = X[1] + R * X[2];
-      else if (p == 3)
-	c = X[1] + R * (X[2] + R * X[3]);
-      else if (p == 4)
-	c = (X[1] + R * X[2]) + R * R * (X[3] + R * X[4]);
-    }
-  else
-    {
-      for (a = ONE, z[1] = X[1]; z[1] < TWO23;)
-	{
-	  a *= TWO;
-	  z[1] *= TWO;
-	}
-
-      for (i = 2; i < 5; i++)
-	{
-	  z[i] = X[i] * a;
-	  u = (z[i] + CUTTER) - CUTTER;
-	  if (u > z[i])
-	    u -= RADIX;
-	  z[i] -= u;
-	  z[i - 1] += u * RADIXI;
-	}
-
-      u = (z[3] + TWO71) - TWO71;
-      if (u > z[3])
-	u -= TWO19;
-      v = z[3] - u;
-
-      if (v == TWO18)
-	{
-	  if (z[4] == ZERO)
-	    {
-	      for (i = 5; i <= p; i++)
-		{
-		  if (X[i] == ZERO)
-		    continue;
-		  else
-		    {
-		      z[3] += ONE;
-		      break;
-		    }
-		}
-	    }
-	  else
-	    z[3] += ONE;
-	}
-
-      c = (z[1] + R * (z[2] + R * z[3])) / a;
-    }
-
-  c *= X[0];
-
-  for (i = 1; i < EX; i++)
-    c *= RADIX;
-  for (i = 1; i > EX; i--)
-    c *= RADIXI;
-
-  *y = c;
-#undef R
-}
-
-/* Convert a multiple precision number *X into a double precision
-   number *Y, Denormal case  (|x| < 2**(-1022))).  */
-static void
-denorm (const mp_no *x, double *y, int p)
-{
-  long i, k;
-  long p2 = p;
-  double c, u, z[5];
-
-#define R RADIXI
-  if (EX < -44 || (EX == -44 && X[1] < TWO5))
-    {
-      *y = ZERO;
-      return;
-    }
-
-  if (p2 == 1)
-    {
-      if (EX == -42)
-	{
-	  z[1] = X[1] + TWO10;
-	  z[2] = ZERO;
-	  z[3] = ZERO;
-	  k = 3;
-	}
-      else if (EX == -43)
-	{
-	  z[1] = TWO10;
-	  z[2] = X[1];
-	  z[3] = ZERO;
-	  k = 2;
-	}
-      else
-	{
-	  z[1] = TWO10;
-	  z[2] = ZERO;
-	  z[3] = X[1];
-	  k = 1;
-	}
-    }
-  else if (p2 == 2)
-    {
-      if (EX == -42)
-	{
-	  z[1] = X[1] + TWO10;
-	  z[2] = X[2];
-	  z[3] = ZERO;
-	  k = 3;
-	}
-      else if (EX == -43)
-	{
-	  z[1] = TWO10;
-	  z[2] = X[1];
-	  z[3] = X[2];
-	  k = 2;
-	}
-      else
-	{
-	  z[1] = TWO10;
-	  z[2] = ZERO;
-	  z[3] = X[1];
-	  k = 1;
-	}
-    }
-  else
-    {
-      if (EX == -42)
-	{
-	  z[1] = X[1] + TWO10;
-	  z[2] = X[2];
-	  k = 3;
-	}
-      else if (EX == -43)
-	{
-	  z[1] = TWO10;
-	  z[2] = X[1];
-	  k = 2;
-	}
-      else
-	{
-	  z[1] = TWO10;
-	  z[2] = ZERO;
-	  k = 1;
-	}
-      z[3] = X[k];
-    }
-
-  u = (z[3] + TWO57) - TWO57;
-  if (u > z[3])
-    u -= TWO5;
-
-  if (u == z[3])
-    {
-      for (i = k + 1; i <= p2; i++)
-	{
-	  if (X[i] == ZERO)
-	    continue;
-	  else
-	    {
-	      z[3] += ONE;
-	      break;
-	    }
-	}
-    }
-
-  c = X[0] * ((z[1] + R * (z[2] + R * z[3])) - TWO10);
-
-  *y = c * TWOM1032;
-#undef R
-}
-
-/* Convert multiple precision number *X into double precision number *Y.  The
-   result is correctly rounded to the nearest/even.  */
-void
-__mp_dbl (const mp_no *x, double *y, int p)
-{
-  if (X[0] == ZERO)
-    {
-      *y = ZERO;
-      return;
-    }
-
-  if (__glibc_likely (EX > -42 || (EX == -42 && X[1] >= TWO10)))
-    norm (x, y, p);
-  else
-    denorm (x, y, p);
-}
-
-/* Get the multiple precision equivalent of X into *Y.  If the precision is too
-   small, the result is truncated.  */
-void
-__dbl_mp (double x, mp_no *y, int p)
-{
-  long i, n;
-  long p2 = p;
-  double u;
-
-  /* Sign.  */
-  if (x == ZERO)
-    {
-      Y[0] = ZERO;
-      return;
-    }
-  else if (x > ZERO)
-    Y[0] = ONE;
-  else
-    {
-      Y[0] = MONE;
-      x = -x;
-    }
-
-  /* Exponent.  */
-  for (EY = ONE; x >= RADIX; EY += ONE)
-    x *= RADIXI;
-  for (; x < ONE; EY -= ONE)
-    x *= RADIX;
-
-  /* Digits.  */
-  n = MIN (p2, 4);
-  for (i = 1; i <= n; i++)
-    {
-      u = (x + TWO52) - TWO52;
-      if (u > x)
-	u -= ONE;
-      Y[i] = u;
-      x -= u;
-      x *= RADIX;
-    }
-  for (; i <= p2; i++)
-    Y[i] = ZERO;
-}
-
-/* Add magnitudes of *X and *Y assuming that abs (*X) >= abs (*Y) > 0.  The
-   sign of the sum *Z is not changed.  X and Y may overlap but not X and Z or
-   Y and Z.  No guard digit is used.  The result equals the exact sum,
-   truncated.  */
-static void
-add_magnitudes (const mp_no *x, const mp_no *y, mp_no *z, int p)
-{
-  long i, j, k;
-  long p2 = p;
-  double zk;
-
-  EZ = EX;
-
-  i = p2;
-  j = p2 + EY - EX;
-  k = p2 + 1;
-
-  if (__glibc_unlikely (j < 1))
-    {
-      __cpy (x, z, p);
-      return;
-    }
-
-  zk = ZERO;
-
-  for (; j > 0; i--, j--)
-    {
-      zk += X[i] + Y[j];
-      if (zk >= RADIX)
-	{
-	  Z[k--] = zk - RADIX;
-	  zk = ONE;
-	}
-      else
-        {
-	  Z[k--] = zk;
-	  zk = ZERO;
-	}
-    }
-
-  for (; i > 0; i--)
-    {
-      zk += X[i];
-      if (zk >= RADIX)
-	{
-	  Z[k--] = zk - RADIX;
-	  zk = ONE;
-	}
-      else
-        {
-	  Z[k--] = zk;
-	  zk = ZERO;
-	}
-    }
-
-  if (zk == ZERO)
-    {
-      for (i = 1; i <= p2; i++)
-	Z[i] = Z[i + 1];
-    }
-  else
-    {
-      Z[1] = zk;
-      EZ += ONE;
-    }
-}
-
-/* Subtract the magnitudes of *X and *Y assuming that abs (*x) > abs (*y) > 0.
-   The sign of the difference *Z is not changed.  X and Y may overlap but not X
-   and Z or Y and Z.  One guard digit is used.  The error is less than one
-   ULP.  */
-static void
-sub_magnitudes (const mp_no *x, const mp_no *y, mp_no *z, int p)
-{
-  long i, j, k;
-  long p2 = p;
-  double zk;
-
-  EZ = EX;
-  i = p2;
-  j = p2 + EY - EX;
-  k = p2;
-
-  /* Y is too small compared to X, copy X over to the result.  */
-  if (__glibc_unlikely (j < 1))
-    {
-      __cpy (x, z, p);
-      return;
-    }
-
-  /* The relevant least significant digit in Y is non-zero, so we factor it in
-     to enhance accuracy.  */
-  if (j < p2 && Y[j + 1] > ZERO)
-    {
-      Z[k + 1] = RADIX - Y[j + 1];
-      zk = MONE;
-    }
-  else
-    zk = Z[k + 1] = ZERO;
-
-  /* Subtract and borrow.  */
-  for (; j > 0; i--, j--)
-    {
-      zk += (X[i] - Y[j]);
-      if (zk < ZERO)
-	{
-	  Z[k--] = zk + RADIX;
-	  zk = MONE;
-	}
-      else
-        {
-	  Z[k--] = zk;
-	  zk = ZERO;
-	}
-    }
-
-  /* We're done with digits from Y, so it's just digits in X.  */
-  for (; i > 0; i--)
-    {
-      zk += X[i];
-      if (zk < ZERO)
-	{
-	  Z[k--] = zk + RADIX;
-	  zk = MONE;
-	}
-      else
-        {
-	  Z[k--] = zk;
-	  zk = ZERO;
-	}
-    }
-
-  /* Normalize.  */
-  for (i = 1; Z[i] == ZERO; i++);
-  EZ = EZ - i + 1;
-  for (k = 1; i <= p2 + 1;)
-    Z[k++] = Z[i++];
-  for (; k <= p2;)
-    Z[k++] = ZERO;
-}
-
-/* Add *X and *Y and store the result in *Z.  X and Y may overlap, but not X
-   and Z or Y and Z.  One guard digit is used.  The error is less than one
-   ULP.  */
-void
-__add (const mp_no *x, const mp_no *y, mp_no *z, int p)
-{
-  int n;
-
-  if (X[0] == ZERO)
-    {
-      __cpy (y, z, p);
-      return;
-    }
-  else if (Y[0] == ZERO)
-    {
-      __cpy (x, z, p);
-      return;
-    }
-
-  if (X[0] == Y[0])
-    {
-      if (__acr (x, y, p) > 0)
-	{
-	  add_magnitudes (x, y, z, p);
-	  Z[0] = X[0];
-	}
-      else
-	{
-	  add_magnitudes (y, x, z, p);
-	  Z[0] = Y[0];
-	}
-    }
-  else
-    {
-      if ((n = __acr (x, y, p)) == 1)
-	{
-	  sub_magnitudes (x, y, z, p);
-	  Z[0] = X[0];
-	}
-      else if (n == -1)
-	{
-	  sub_magnitudes (y, x, z, p);
-	  Z[0] = Y[0];
-	}
-      else
-	Z[0] = ZERO;
-    }
-}
-
-/* Subtract *Y from *X and return the result in *Z.  X and Y may overlap but
-   not X and Z or Y and Z.  One guard digit is used.  The error is less than
-   one ULP.  */
-void
-__sub (const mp_no *x, const mp_no *y, mp_no *z, int p)
-{
-  int n;
-
-  if (X[0] == ZERO)
-    {
-      __cpy (y, z, p);
-      Z[0] = -Z[0];
-      return;
-    }
-  else if (Y[0] == ZERO)
-    {
-      __cpy (x, z, p);
-      return;
-    }
-
-  if (X[0] != Y[0])
-    {
-      if (__acr (x, y, p) > 0)
-	{
-	  add_magnitudes (x, y, z, p);
-	  Z[0] = X[0];
-	}
-      else
-	{
-	  add_magnitudes (y, x, z, p);
-	  Z[0] = -Y[0];
-	}
-    }
-  else
-    {
-      if ((n = __acr (x, y, p)) == 1)
-	{
-	  sub_magnitudes (x, y, z, p);
-	  Z[0] = X[0];
-	}
-      else if (n == -1)
-	{
-	  sub_magnitudes (y, x, z, p);
-	  Z[0] = -Y[0];
-	}
-      else
-	Z[0] = ZERO;
-    }
-}
-
-/* Multiply *X and *Y and store result in *Z.  X and Y may overlap but not X
-   and Z or Y and Z.  For P in [1, 2, 3], the exact result is truncated to P
-   digits.  In case P > 3 the error is bounded by 1.001 ULP.  */
-void
-__mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
-{
-  long i, i1, i2, j, k, k2;
-  long p2 = p;
-  double u, zk, zk2;
-
-  /* Is z=0?  */
-  if (__glibc_unlikely (X[0] * Y[0] == ZERO))
-    {
-      Z[0] = ZERO;
-      return;
-    }
-
-  /* Multiply, add and carry */
-  k2 = (p2 < 3) ? p2 + p2 : p2 + 3;
-  zk = Z[k2] = ZERO;
-  for (k = k2; k > 1;)
-    {
-      if (k > p2)
-	{
-	  i1 = k - p2;
-	  i2 = p2 + 1;
-	}
-      else
-	{
-	  i1 = 1;
-	  i2 = k;
-	}
-#if 1
-      /* Rearrange this inner loop to allow the fmadd instructions to be
-         independent and execute in parallel on processors that have
-         dual symmetrical FP pipelines.  */
-      if (i1 < (i2 - 1))
-	{
-	  /* Make sure we have at least 2 iterations.  */
-	  if (((i2 - i1) & 1L) == 1L)
-	    {
-	      /* Handle the odd iterations case.  */
-	      zk2 = x->d[i2 - 1] * y->d[i1];
-	    }
-	  else
-	    zk2 = 0.0;
-	  /* Do two multiply/adds per loop iteration, using independent
-	     accumulators; zk and zk2.  */
-	  for (i = i1, j = i2 - 1; i < i2 - 1; i += 2, j -= 2)
-	    {
-	      zk += x->d[i] * y->d[j];
-	      zk2 += x->d[i + 1] * y->d[j - 1];
-	    }
-	  zk += zk2;		/* Final sum.  */
-	}
-      else
-	{
-	  /* Special case when iterations is 1.  */
-	  zk += x->d[i1] * y->d[i1];
-	}
-#else
-      /* The original code.  */
-      for (i = i1, j = i2 - 1; i < i2; i++, j--)
-	zk += X[i] * Y[j];
-#endif
-
-      u = (zk + CUTTER) - CUTTER;
-      if (u > zk)
-	u -= RADIX;
-      Z[k] = zk - u;
-      zk = u * RADIXI;
-      --k;
-    }
-  Z[k] = zk;
-
-  /* Is there a carry beyond the most significant digit?  */
-  if (Z[1] == ZERO)
-    {
-      for (i = 1; i <= p2; i++)
-	Z[i] = Z[i + 1];
-      EZ = EX + EY - 1;
-    }
-  else
-    EZ = EX + EY;
-
-  Z[0] = X[0] * Y[0];
-}
-
-/* Square *X and store result in *Y.  X and Y may not overlap.  For P in
-   [1, 2, 3], the exact result is truncated to P digits.  In case P > 3 the
-   error is bounded by 1.001 ULP.  This is a faster special case of
-   multiplication.  */
-void
-__sqr (const mp_no *x, mp_no *y, int p)
-{
-  long i, j, k, ip;
-  double u, yk;
-
-  /* Is z=0?  */
-  if (__glibc_unlikely (X[0] == ZERO))
-    {
-      Y[0] = ZERO;
-      return;
-    }
-
-  /* We need not iterate through all X's since it's pointless to
-     multiply zeroes.  */
-  for (ip = p; ip > 0; ip--)
-    if (X[ip] != ZERO)
-      break;
-
-  k = (__glibc_unlikely (p < 3)) ? p + p : p + 3;
-
-  while (k > 2 * ip + 1)
-    Y[k--] = ZERO;
-
-  yk = ZERO;
-
-  while (k > p)
-    {
-      double yk2 = 0.0;
-      long lim = k / 2;
-
-      if (k % 2 == 0)
-        {
-	  yk += X[lim] * X[lim];
-	  lim--;
-	}
-
-      /* In __mul, this loop (and the one within the next while loop) run
-         between a range to calculate the mantissa as follows:
-
-         Z[k] = X[k] * Y[n] + X[k+1] * Y[n-1] ... + X[n-1] * Y[k+1]
-		+ X[n] * Y[k]
-
-         For X == Y, we can get away with summing halfway and doubling the
-	 result.  For cases where the range size is even, the mid-point needs
-	 to be added separately (above).  */
-      for (i = k - p, j = p; i <= lim; i++, j--)
-	yk2 += X[i] * X[j];
-
-      yk += 2.0 * yk2;
-
-      u = (yk + CUTTER) - CUTTER;
-      if (u > yk)
-	u -= RADIX;
-      Y[k--] = yk - u;
-      yk = u * RADIXI;
-    }
-
-  while (k > 1)
-    {
-      double yk2 = 0.0;
-      long lim = k / 2;
-
-      if (k % 2 == 0)
-        {
-	  yk += X[lim] * X[lim];
-	  lim--;
-	}
-
-      /* Likewise for this loop.  */
-      for (i = 1, j = k - 1; i <= lim; i++, j--)
-	yk2 += X[i] * X[j];
-
-      yk += 2.0 * yk2;
-
-      u = (yk + CUTTER) - CUTTER;
-      if (u > yk)
-	u -= RADIX;
-      Y[k--] = yk - u;
-      yk = u * RADIXI;
-    }
-  Y[k] = yk;
-
-  /* Squares are always positive.  */
-  Y[0] = 1.0;
-
-  EY = 2 * EX;
-  /* Is there a carry beyond the most significant digit?  */
-  if (__glibc_unlikely (Y[1] == ZERO))
-    {
-      for (i = 1; i <= p; i++)
-	Y[i] = Y[i + 1];
-      EY--;
-    }
-}
-
-/* Invert *X and store in *Y.  Relative error bound:
-   - For P = 2: 1.001 * R ^ (1 - P)
-   - For P = 3: 1.063 * R ^ (1 - P)
-   - For P > 3: 2.001 * R ^ (1 - P)
-
-   *X = 0 is not permissible.  */
-static void
-__inv (const mp_no *x, mp_no *y, int p)
-{
-  long i;
-  double t;
-  mp_no z, w;
-  static const int np1[] =
-    { 0, 0, 0, 0, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
-    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
-  };
-
-  __cpy (x, &z, p);
-  z.e = 0;
-  __mp_dbl (&z, &t, p);
-  t = ONE / t;
-  __dbl_mp (t, y, p);
-  EY -= EX;
-
-  for (i = 0; i < np1[p]; i++)
-    {
-      __cpy (y, &w, p);
-      __mul (x, &w, y, p);
-      __sub (&mptwo, y, &z, p);
-      __mul (&w, &z, y, p);
-    }
-}
-
-/* Divide *X by *Y and store result in *Z.  X and Y may overlap but not X and Z
-   or Y and Z.  Relative error bound:
-   - For P = 2: 2.001 * R ^ (1 - P)
-   - For P = 3: 2.063 * R ^ (1 - P)
-   - For P > 3: 3.001 * R ^ (1 - P)
-
-   *X = 0 is not permissible.  */
-void
-__dvd (const mp_no *x, const mp_no *y, mp_no *z, int p)
-{
-  mp_no w;
-
-  if (X[0] == ZERO)
-    Z[0] = ZERO;
-  else
-    {
-      __inv (y, &w, p);
-      __mul (x, &w, z, p);
-    }
-}
diff --git a/libc/sysdeps/powerpc/powerpc32/power4/fpu/slowexp.c b/libc/sysdeps/powerpc/powerpc32/power4/fpu/slowexp.c
deleted file mode 100644
index d93f50544..000000000
--- a/libc/sysdeps/powerpc/powerpc32/power4/fpu/slowexp.c
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * IBM Accurate Mathematical Library
- * written by International Business Machines Corp.
- * Copyright (C) 2001-2013 Free Software Foundation, Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- */
-/**************************************************************************/
-/*  MODULE_NAME:slowexp.c                                                 */
-/*                                                                        */
-/*  FUNCTION:slowexp                                                      */
-/*                                                                        */
-/*  FILES NEEDED:mpa.h                                                    */
-/*               mpa.c mpexp.c                                            */
-/*                                                                        */
-/*Converting from double precision to Multi-precision and calculating     */
-/* e^x                                                                    */
-/**************************************************************************/
-#include <math_private.h>
-
-#ifdef NO_LONG_DOUBLE
-#include "mpa.h"
-void __mpexp(mp_no *x, mp_no *y, int p);
-#endif
-
-/*Converting from double precision to Multi-precision and calculating  e^x */
-double __slowexp(double x) {
-#ifdef NO_LONG_DOUBLE
-  double w,z,res,eps=3.0e-26;
-  int p;
-  mp_no mpx, mpy, mpz,mpw,mpeps,mpcor;
-
-  p=6;
-  __dbl_mp(x,&mpx,p); /* Convert a double precision number  x               */
-                    /* into a multiple precision number mpx with prec. p. */
-  __mpexp(&mpx, &mpy, p); /* Multi-Precision exponential function */
-  __dbl_mp(eps,&mpeps,p);
-  __mul(&mpeps,&mpy,&mpcor,p);
-  __add(&mpy,&mpcor,&mpw,p);
-  __sub(&mpy,&mpcor,&mpz,p);
-  __mp_dbl(&mpw, &w, p);
-  __mp_dbl(&mpz, &z, p);
-  if (w == z) return w;
-  else  {                   /* if calculating is not exactly   */
-    p = 32;
-    __dbl_mp(x,&mpx,p);
-    __mpexp(&mpx, &mpy, p);
-    __mp_dbl(&mpy, &res, p);
-    return res;
-  }
-#else
-  return (double) __ieee754_expl((long double)x);
-#endif
-}
diff --git a/libc/sysdeps/powerpc/powerpc32/power4/fpu/slowpow.c b/libc/sysdeps/powerpc/powerpc32/power4/fpu/slowpow.c
deleted file mode 100644
index 7c97d9581..000000000
--- a/libc/sysdeps/powerpc/powerpc32/power4/fpu/slowpow.c
+++ /dev/null
@@ -1,93 +0,0 @@
-/*
- * IBM Accurate Mathematical Library
- * written by International Business Machines Corp.
- * Copyright (C) 2001-2013 Free Software Foundation, Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- */
-/*************************************************************************/
-/* MODULE_NAME:slowpow.c                                                 */
-/*                                                                       */
-/* FUNCTION:slowpow                                                      */
-/*                                                                       */
-/*FILES NEEDED:mpa.h                                                     */
-/*             mpa.c mpexp.c mplog.c halfulp.c                           */
-/*                                                                       */
-/* Given two IEEE double machine numbers y,x , routine  computes the     */
-/* correctly  rounded (to nearest) value of x^y. Result calculated  by   */
-/* multiplication (in halfulp.c) or if result isn't accurate enough      */
-/* then routine converts x and y into multi-precision doubles and        */
-/* recompute.                                                            */
-/*************************************************************************/
-
-#include "mpa.h"
-#include <math_private.h>
-
-void __mpexp (mp_no * x, mp_no * y, int p);
-void __mplog (mp_no * x, mp_no * y, int p);
-double ulog (double);
-double __halfulp (double x, double y);
-
-double
-__slowpow (double x, double y, double z)
-{
-  double res, res1;
-  long double ldw, ldz, ldpp;
-  static const long double ldeps = 0x4.0p-96;
-
-  res = __halfulp (x, y);	/* halfulp() returns -10 or x^y             */
-  if (res >= 0)
-    return res;			/* if result was really computed by halfulp */
-  /*  else, if result was not really computed by halfulp */
-
-  /* Compute pow as long double, 106 bits */
-  ldz = __ieee754_logl ((long double) x);
-  ldw = (long double) y *ldz;
-  ldpp = __ieee754_expl (ldw);
-  res = (double) (ldpp + ldeps);
-  res1 = (double) (ldpp - ldeps);
-
-  if (res != res1)		/* if result still not accurate enough */
-    {				/* use mpa for higher precision.  */
-      mp_no mpx, mpy, mpz, mpw, mpp, mpr, mpr1;
-      static const mp_no eps = { -3, {1.0, 4.0} };
-      int p;
-
-      p = 10;			/*  p=precision 240 bits  */
-      __dbl_mp (x, &mpx, p);
-      __dbl_mp (y, &mpy, p);
-      __dbl_mp (z, &mpz, p);
-      __mplog (&mpx, &mpz, p);		/* log(x) = z   */
-      __mul (&mpy, &mpz, &mpw, p);	/*  y * z =w    */
-      __mpexp (&mpw, &mpp, p);		/*  e^w =pp     */
-      __add (&mpp, &eps, &mpr, p);	/*  pp+eps =r   */
-      __mp_dbl (&mpr, &res, p);
-      __sub (&mpp, &eps, &mpr1, p);	/*  pp -eps =r1 */
-      __mp_dbl (&mpr1, &res1, p);	/*  converting into double precision */
-      if (res == res1)
-	return res;
-
-      /* if we get here result wasn't calculated exactly, continue for
-         more exact calculation using 768 bits.  */
-      p = 32;
-      __dbl_mp (x, &mpx, p);
-      __dbl_mp (y, &mpy, p);
-      __dbl_mp (z, &mpz, p);
-      __mplog (&mpx, &mpz, p);		/* log(c)=z  */
-      __mul (&mpy, &mpz, &mpw, p);	/* y*z =w    */
-      __mpexp (&mpw, &mpp, p);		/* e^w=pp    */
-      __mp_dbl (&mpp, &res, p);		/* converting into double precision */
-    }
-  return res;
-}
diff --git a/libc/sysdeps/powerpc/powerpc32/power4/strncmp.S b/libc/sysdeps/powerpc/powerpc32/power4/strncmp.S
index b3a69975c..724d9084a 100644
--- a/libc/sysdeps/powerpc/powerpc32/power4/strncmp.S
+++ b/libc/sysdeps/powerpc/powerpc32/power4/strncmp.S
@@ -29,9 +29,6 @@ EALIGN (strncmp, 4, 0)
 #define rSTR1	r3	/* first string arg */
 #define rSTR2	r4	/* second string arg */
 #define rN	r5	/* max string length */
-/* Note:  The Bounded pointer support in this code is broken.  This code
-   was inherited from PPC32 and that support was never completed.
-   Current PPC gcc does not support -fbounds-check or -fbounded-pointers.  */
 #define rWORD1	r6	/* current word in s1 */
 #define rWORD2	r7	/* current word in s2 */
 #define rWORD3  r10
diff --git a/libc/sysdeps/powerpc/powerpc32/power7/strncmp.S b/libc/sysdeps/powerpc/powerpc32/power7/strncmp.S
index b58630e33..fdae44d26 100644
--- a/libc/sysdeps/powerpc/powerpc32/power7/strncmp.S
+++ b/libc/sysdeps/powerpc/powerpc32/power7/strncmp.S
@@ -31,9 +31,6 @@ EALIGN (strncmp,5,0)
 #define rSTR1	r3	/* first string arg */
 #define rSTR2	r4	/* second string arg */
 #define rN	r5	/* max string length */
-/* Note:  The Bounded pointer support in this code is broken.  This code
-   was inherited from PPC32 and that support was never completed.
-   Current PPC gcc does not support -fbounds-check or -fbounded-pointers.  */
 #define rWORD1	r6	/* current word in s1 */
 #define rWORD2	r7	/* current word in s2 */
 #define rWORD3	r10
diff --git a/libc/sysdeps/powerpc/powerpc32/strncmp.S b/libc/sysdeps/powerpc/powerpc32/strncmp.S
index 3cb6509e2..fa345d293 100644
--- a/libc/sysdeps/powerpc/powerpc32/strncmp.S
+++ b/libc/sysdeps/powerpc/powerpc32/strncmp.S
@@ -29,9 +29,6 @@ EALIGN (strncmp, 4, 0)
 #define rSTR1	r3	/* first string arg */
 #define rSTR2	r4	/* second string arg */
 #define rN	r5	/* max string length */
-/* Note:  The Bounded pointer support in this code is broken.  This code
-   was inherited from PPC32 and that support was never completed.
-   Current PPC gcc does not support -fbounds-check or -fbounded-pointers.  */
 #define rWORD1	r6	/* current word in s1 */
 #define rWORD2	r7	/* current word in s2 */
 #define rFEFE	r8	/* constant 0xfefefeff (-0x01010101) */
diff --git a/libc/sysdeps/powerpc/powerpc64/__longjmp-common.S b/libc/sysdeps/powerpc/powerpc64/__longjmp-common.S
index 18032752b..70c370439 100644
--- a/libc/sysdeps/powerpc/powerpc64/__longjmp-common.S
+++ b/libc/sysdeps/powerpc/powerpc64/__longjmp-common.S
@@ -24,8 +24,6 @@
 #else
 # include <jmpbuf-offsets.h>
 #endif
-#include <bp-sym.h>
-#include <bp-asm.h>
 
 #ifndef __NO_VMX__
 	.section	".toc","aw"
@@ -45,9 +43,8 @@
 #endif
 
 	.machine	"altivec"
-ENTRY (BP_SYM (__longjmp))
+ENTRY (__longjmp)
 	CALL_MCOUNT 2
-	CHECK_BOUNDS_BOTH_WIDE_LIT (r3, r8, r9, JB_SIZE)
 #ifndef __NO_VMX__
 	ld    r5,.LC__dl_hwcap@toc(r2)
 # ifdef SHARED
@@ -178,4 +175,4 @@ L(no_vmx):
 	lfd fp31,((JB_FPRS+17)*8)(r3)
 	mr r3,r4
 	blr
-END (BP_SYM (__longjmp))
+END (__longjmp)
diff --git a/libc/sysdeps/powerpc/powerpc64/a2/memcpy.S b/libc/sysdeps/powerpc/powerpc64/a2/memcpy.S
index 42ec5e375..84c82bb76 100644
--- a/libc/sysdeps/powerpc/powerpc64/a2/memcpy.S
+++ b/libc/sysdeps/powerpc/powerpc64/a2/memcpy.S
@@ -18,8 +18,6 @@
    <http://www.gnu.org/licenses/>.  */
 
 #include <sysdep.h>
-#include <bp-sym.h>
-#include <bp-asm.h>
 
 #define PREFETCH_AHEAD 4        /* no cache lines SRC prefetching ahead  */
 #define ZERO_AHEAD 2            /* no cache lines DST zeroing ahead  */
@@ -32,7 +30,7 @@
 
 
 	.machine  a2
-EALIGN (BP_SYM (memcpy), 5, 0)
+EALIGN (memcpy, 5, 0)
 	CALL_MCOUNT 3
 
 	dcbt    0,r4            /* Prefetch ONE SRC cacheline  */
@@ -522,5 +520,5 @@ L(endloop2_128):
 	b       L(lessthancacheline)
 
 
-END_GEN_TB (BP_SYM (memcpy),TB_TOCLESS)
+END_GEN_TB (memcpy,TB_TOCLESS)
 libc_hidden_builtin_def (memcpy)
diff --git a/libc/sysdeps/powerpc/powerpc64/bp-asm.h b/libc/sysdeps/powerpc/powerpc64/bp-asm.h
deleted file mode 100644
index 6c6c38735..000000000
--- a/libc/sysdeps/powerpc/powerpc64/bp-asm.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/* Bounded-pointer definitions for PowerPC64 assembler.
-   Copyright (C) 2000-2013 Free Software Foundation, Inc.
-   Contributed by Greg McGary <greg@mcgary.org>
-
-   This file is part of the GNU C Library.  Its master source is NOT part of
-   the C library, however.  The master source lives in the GNU MP Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Library General Public License as
-   published by the Free Software Foundation; either version 2 of the
-   License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Library General Public License for more details.
-
-   You should have received a copy of the GNU Library General Public
-   License along with the GNU C Library; see the file COPYING.LIB.  If
-   not, see <http://www.gnu.org/licenses/>.  */
-
-#if __BOUNDED_POINTERS__
-
-/* Byte offsets of BP components.  */
-# define oVALUE	0
-# define oLOW	4
-# define oHIGH	8
-
-/* Don't check bounds, just convert the BP register to its simple
-   pointer value.  */
-
-# define DISCARD_BOUNDS(rBP)			\
-	ld	rBP, oVALUE(rBP)
-
-/* Check low bound, with the side effect that the BP register is converted
-   its simple pointer value.  Move the high bound into a register for
-   later use.  */
-
-# define CHECK_BOUNDS_LOW(rBP, rLOW, rHIGH)	\
-	ld	rHIGH, oHIGH(rBP);		\
-	ld	rLOW, oLOW(rBP);		\
-	ld	rBP, oVALUE(rBP);		\
-	tdllt	rBP, rLOW
-
-/* Check the high bound, which is in a register, using the given
-   conditional trap instruction.  */
-
-# define CHECK_BOUNDS_HIGH(rVALUE, rHIGH, TWLcc) \
-	TWLcc	rVALUE, rHIGH
-
-/* Check the high bound, which is stored at the return-value's high
-   bound slot, using the given conditional trap instruction.  */
-
-# define CHECK_BOUNDS_HIGH_RTN(rVALUE, rHIGH, TWLcc)	\
-	ld	rHIGH, oHIGH(rRTN);			\
-	TWLcc	rVALUE, rHIGH
-
-/* Check both bounds, with the side effect that the BP register is
-   converted to its simple pointer value.  */
-
-# define CHECK_BOUNDS_BOTH(rBP, rLOW, rHIGH)	\
-	CHECK_BOUNDS_LOW(rBP, rLOW, rHIGH);	\
-	tdlge	rBP, rHIGH
-
-/* Check bounds on a memory region of given length, with the side
-   effect that the BP register is converted to its simple pointer
-   value.  */
-
-# define CHECK_BOUNDS_BOTH_WIDE(rBP, rLOW, rHIGH, rLENGTH)	\
-	CHECK_BOUNDS_LOW (rBP, rLOW, rHIGH);			\
-	sub	rHIGH, rHIGH, rLENGTH;				\
-	tdlgt	rBP, rHIGH
-
-# define CHECK_BOUNDS_BOTH_WIDE_LIT(rBP, rLOW, rHIGH, LENGTH)	\
-	CHECK_BOUNDS_LOW (rBP, rLOW, rHIGH);			\
-	subi	rHIGH, rHIGH, LENGTH;				\
-	tdlgt	rBP, rHIGH
-
-/* Store a pointer value register into the return-value's pointer
-   value slot.  */
-
-# define STORE_RETURN_VALUE(rVALUE)		\
-	std	rVALUE, oVALUE(rRTN)
-
-/* Store a low and high bounds into the return-value's pointer bounds
-   slots.  */
-
-# define STORE_RETURN_BOUNDS(rLOW, rHIGH)	\
-	std	rLOW, oLOW(rRTN);		\
-	std	rHIGH, oHIGH(rRTN)
-
-/* Stuff zero value/low/high into the BP addressed by rRTN.  */
-
-# define RETURN_NULL_BOUNDED_POINTER		\
-	li	r4, 0;				\
-	STORE_RETURN_VALUE (r4);		\
-	STORE_RETURN_BOUNDS (r4, r4)
-
-#else
-
-# define DISCARD_BOUNDS(rBP)
-# define CHECK_BOUNDS_LOW(rBP, rLOW, rHIGH)
-# define CHECK_BOUNDS_HIGH(rVALUE, rHIGH, TWLcc)
-# define CHECK_BOUNDS_HIGH_RTN(rVALUE, rHIGH, TWLcc)
-# define CHECK_BOUNDS_BOTH(rBP, rLOW, rHIGH)
-# define CHECK_BOUNDS_BOTH_WIDE(rBP, rLOW, rHIGH, rLENGTH)
-# define CHECK_BOUNDS_BOTH_WIDE_LIT(rBP, rLOW, rHIGH, LENGTH)
-# define STORE_RETURN_VALUE(rVALUE)
-# define STORE_RETURN_BOUNDS(rLOW, rHIGH)
-
-# define RETURN_NULL_BOUNDED_POINTER li rRTN, 0
-
-#endif
diff --git a/libc/sysdeps/powerpc/powerpc64/cell/memcpy.S b/libc/sysdeps/powerpc/powerpc64/cell/memcpy.S
index 5ba4ebf62..a271965dd 100644
--- a/libc/sysdeps/powerpc/powerpc64/cell/memcpy.S
+++ b/libc/sysdeps/powerpc/powerpc64/cell/memcpy.S
@@ -17,8 +17,6 @@
    <http://www.gnu.org/licenses/>.  */
 
 #include <sysdep.h>
-#include <bp-sym.h>
-#include <bp-asm.h>
 
 #define PREFETCH_AHEAD 6	/* no cache lines SRC prefetching ahead  */
 #define ZERO_AHEAD 4		/* no cache lines DST zeroing ahead  */
@@ -41,7 +39,7 @@
 
 .align  7
 
-EALIGN (BP_SYM (memcpy), 5, 0)
+EALIGN (memcpy, 5, 0)
 	CALL_MCOUNT 3
 
 	dcbt	0,r4		/* Prefetch ONE SRC cacheline  */
@@ -240,5 +238,5 @@ EALIGN (BP_SYM (memcpy), 5, 0)
 	stb	r0,0(r6)
 1:	blr
 
-END_GEN_TB (BP_SYM (memcpy),TB_TOCLESS)
+END_GEN_TB (memcpy,TB_TOCLESS)
 libc_hidden_builtin_def (memcpy)
diff --git a/libc/sysdeps/powerpc/powerpc64/memcpy.S b/libc/sysdeps/powerpc/powerpc64/memcpy.S
index 7c1b656be..b8c4cc8b1 100644
--- a/libc/sysdeps/powerpc/powerpc64/memcpy.S
+++ b/libc/sysdeps/powerpc/powerpc64/memcpy.S
@@ -17,8 +17,6 @@
    <http://www.gnu.org/licenses/>.  */
 
 #include <sysdep.h>
-#include <bp-sym.h>
-#include <bp-asm.h>
 
 /* __ptr_t [r3] memcpy (__ptr_t dst [r3], __ptr_t src [r4], size_t len [r5]);
    Returns 'dst'.
@@ -35,7 +33,7 @@
    possible when both source and destination are doubleword aligned.
    Each case has a optimized unrolled loop.   */
 
-EALIGN (BP_SYM (memcpy), 5, 0)
+EALIGN (memcpy, 5, 0)
 	CALL_MCOUNT 3
 
     cmpldi cr1,5,31
@@ -364,5 +362,5 @@ EALIGN (BP_SYM (memcpy), 5, 0)
     ld 31,-8(1)
     ld 3,-16(1)
     blr
-END_GEN_TB (BP_SYM (memcpy),TB_TOCLESS)
+END_GEN_TB (memcpy,TB_TOCLESS)
 libc_hidden_builtin_def (memcpy)
diff --git a/libc/sysdeps/powerpc/powerpc64/memset.S b/libc/sysdeps/powerpc/powerpc64/memset.S
index f107f8b40..6acf149c8 100644
--- a/libc/sysdeps/powerpc/powerpc64/memset.S
+++ b/libc/sysdeps/powerpc/powerpc64/memset.S
@@ -17,8 +17,6 @@
    <http://www.gnu.org/licenses/>.  */
 
 #include <sysdep.h>
-#include <bp-sym.h>
-#include <bp-asm.h>
 
 	.section	".toc","aw"
 .LC0:
@@ -33,22 +31,15 @@
    cache line (256 bits). There is a special case for setting cache lines
    to 0, to take advantage of the dcbz instruction.  */
 
-EALIGN (BP_SYM (memset), 5, 0)
+EALIGN (memset, 5, 0)
 	CALL_MCOUNT 3
 
 #define rTMP	r0
 #define rRTN	r3	/* Initial value of 1st argument.  */
-#if __BOUNDED_POINTERS__
-# define rMEMP0	r4	/* Original value of 1st arg.  */
-# define rCHR	r5	/* Char to set in each byte.  */
-# define rLEN	r6	/* Length of region to set.  */
-# define rMEMP	r10	/* Address at which we are storing.  */
-#else
-# define rMEMP0	r3	/* Original value of 1st arg.  */
-# define rCHR	r4	/* Char to set in each byte.  */
-# define rLEN	r5	/* Length of region to set.  */
-# define rMEMP	r6	/* Address at which we are storing.  */
-#endif
+#define rMEMP0	r3	/* Original value of 1st arg.  */
+#define rCHR	r4	/* Char to set in each byte.  */
+#define rLEN	r5	/* Length of region to set.  */
+#define rMEMP	r6	/* Address at which we are storing.  */
 #define rALIGN	r7	/* Number of bytes we are setting now (when aligning). */
 #define rMEMP2	r8
 
@@ -56,14 +47,6 @@ EALIGN (BP_SYM (memset), 5, 0)
 #define rCLS	r8	/* Cache line size obtained from static.  */
 #define rCLM	r9	/* Cache line size mask to check for cache alignment.  */
 L(_memset):
-#if __BOUNDED_POINTERS__
-	cmpldi	cr1, rRTN, 0
-	CHECK_BOUNDS_BOTH_WIDE (rMEMP0, rTMP, rTMP2, rLEN)
-	beq	cr1, L(b0)
-	STORE_RETURN_VALUE (rMEMP0)
-	STORE_RETURN_BOUNDS (rTMP, rTMP2)
-L(b0):
-#endif
 /* Take care of case for size <= 4.  */
 	cmpldi	cr1, rLEN, 8
 	andi.	rALIGN, rMEMP0, 7
@@ -261,25 +244,16 @@ L(medium_27f):
 L(medium_28t):
 	std	rCHR, -8(rMEMP)
 	blr
-END_GEN_TB (BP_SYM (memset),TB_TOCLESS)
+END_GEN_TB (memset,TB_TOCLESS)
 libc_hidden_builtin_def (memset)
 
 /* Copied from bzero.S to prevent the linker from inserting a stub
    between bzero and memset.  */
-ENTRY (BP_SYM (__bzero))
+ENTRY (__bzero)
 	CALL_MCOUNT 3
-#if __BOUNDED_POINTERS__
-	mr	r6,r4
-	li	r5,0
-	mr	r4,r3
-	/* Tell memset that we don't want a return value.  */
-	li	r3,0
-	b	L(_memset)
-#else
 	mr	r5,r4
 	li	r4,0
 	b	L(_memset)
-#endif
-END_GEN_TB (BP_SYM (__bzero),TB_TOCLESS)
+END_GEN_TB (__bzero,TB_TOCLESS)
 
-weak_alias (BP_SYM (__bzero), BP_SYM (bzero))
+weak_alias (__bzero, bzero)
diff --git a/libc/sysdeps/powerpc/powerpc64/power4/Implies b/libc/sysdeps/powerpc/powerpc64/power4/Implies
new file mode 100644
index 000000000..a372141bb
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc64/power4/Implies
@@ -0,0 +1,2 @@
+powerpc/power4/fpu
+powerpc/power4
diff --git a/libc/sysdeps/powerpc/powerpc64/power4/fpu/Makefile b/libc/sysdeps/powerpc/powerpc64/power4/fpu/Makefile
deleted file mode 100644
index f8bb3ef04..000000000
--- a/libc/sysdeps/powerpc/powerpc64/power4/fpu/Makefile
+++ /dev/null
@@ -1,5 +0,0 @@
-# Makefile fragment for POWER4/5/5+ platforms with FPU.
-
-ifeq ($(subdir),math)
-CFLAGS-mpa.c += --param max-unroll-times=4 -funroll-loops -fpeel-loops
-endif
diff --git a/libc/sysdeps/powerpc/powerpc64/power4/fpu/mpa.c b/libc/sysdeps/powerpc/powerpc64/power4/fpu/mpa.c
deleted file mode 100644
index b22664772..000000000
--- a/libc/sysdeps/powerpc/powerpc64/power4/fpu/mpa.c
+++ /dev/null
@@ -1,837 +0,0 @@
-
-/*
- * IBM Accurate Mathematical Library
- * written by International Business Machines Corp.
- * Copyright (C) 2001-2013 Free Software Foundation, Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- */
-/************************************************************************/
-/*  MODULE_NAME: mpa.c                                                  */
-/*                                                                      */
-/*  FUNCTIONS:                                                          */
-/*               mcr                                                    */
-/*               acr                                                    */
-/*               cpy                                                    */
-/*               norm                                                   */
-/*               denorm                                                 */
-/*               mp_dbl                                                 */
-/*               dbl_mp                                                 */
-/*               add_magnitudes                                         */
-/*               sub_magnitudes                                         */
-/*               add                                                    */
-/*               sub                                                    */
-/*               mul                                                    */
-/*               inv                                                    */
-/*               dvd                                                    */
-/*                                                                      */
-/* Arithmetic functions for multiple precision numbers.                 */
-/* Relative errors are bounded                                          */
-/************************************************************************/
-
-
-#include "endian.h"
-#include "mpa.h"
-#include <sys/param.h>
-
-const mp_no mpone = {1, {1.0, 1.0}};
-const mp_no mptwo = {1, {1.0, 2.0}};
-
-/* Compare mantissa of two multiple precision numbers regardless of the sign
-   and exponent of the numbers.  */
-static int
-mcr (const mp_no *x, const mp_no *y, int p)
-{
-  long i;
-  long p2 = p;
-  for (i = 1; i <= p2; i++)
-    {
-      if (X[i] == Y[i])
-	continue;
-      else if (X[i] > Y[i])
-	return 1;
-      else
-	return -1;
-    }
-  return 0;
-}
-
-/* Compare the absolute values of two multiple precision numbers.  */
-int
-__acr (const mp_no *x, const mp_no *y, int p)
-{
-  long i;
-
-  if (X[0] == ZERO)
-    {
-      if (Y[0] == ZERO)
-	i = 0;
-      else
-	i = -1;
-    }
-  else if (Y[0] == ZERO)
-    i = 1;
-  else
-    {
-      if (EX > EY)
-	i = 1;
-      else if (EX < EY)
-	i = -1;
-      else
-	i = mcr (x, y, p);
-    }
-
-  return i;
-}
-
-/* Copy multiple precision number X into Y.  They could be the same
-   number.  */
-void
-__cpy (const mp_no *x, mp_no *y, int p)
-{
-  long i;
-
-  EY = EX;
-  for (i = 0; i <= p; i++)
-    Y[i] = X[i];
-}
-
-/* Convert a multiple precision number *X into a double precision
-   number *Y, normalized case  (|x| >= 2**(-1022))).  */
-static void
-norm (const mp_no *x, double *y, int p)
-{
-#define R RADIXI
-  long i;
-  double a, c, u, v, z[5];
-  if (p < 5)
-    {
-      if (p == 1)
-	c = X[1];
-      else if (p == 2)
-	c = X[1] + R * X[2];
-      else if (p == 3)
-	c = X[1] + R * (X[2] + R * X[3]);
-      else if (p == 4)
-	c = (X[1] + R * X[2]) + R * R * (X[3] + R * X[4]);
-    }
-  else
-    {
-      for (a = ONE, z[1] = X[1]; z[1] < TWO23;)
-	{
-	  a *= TWO;
-	  z[1] *= TWO;
-	}
-
-      for (i = 2; i < 5; i++)
-	{
-	  z[i] = X[i] * a;
-	  u = (z[i] + CUTTER) - CUTTER;
-	  if (u > z[i])
-	    u -= RADIX;
-	  z[i] -= u;
-	  z[i - 1] += u * RADIXI;
-	}
-
-      u = (z[3] + TWO71) - TWO71;
-      if (u > z[3])
-	u -= TWO19;
-      v = z[3] - u;
-
-      if (v == TWO18)
-	{
-	  if (z[4] == ZERO)
-	    {
-	      for (i = 5; i <= p; i++)
-		{
-		  if (X[i] == ZERO)
-		    continue;
-		  else
-		    {
-		      z[3] += ONE;
-		      break;
-		    }
-		}
-	    }
-	  else
-	    z[3] += ONE;
-	}
-
-      c = (z[1] + R * (z[2] + R * z[3])) / a;
-    }
-
-  c *= X[0];
-
-  for (i = 1; i < EX; i++)
-    c *= RADIX;
-  for (i = 1; i > EX; i--)
-    c *= RADIXI;
-
-  *y = c;
-#undef R
-}
-
-/* Convert a multiple precision number *X into a double precision
-   number *Y, Denormal case  (|x| < 2**(-1022))).  */
-static void
-denorm (const mp_no *x, double *y, int p)
-{
-  long i, k;
-  long p2 = p;
-  double c, u, z[5];
-
-#define R RADIXI
-  if (EX < -44 || (EX == -44 && X[1] < TWO5))
-    {
-      *y = ZERO;
-      return;
-    }
-
-  if (p2 == 1)
-    {
-      if (EX == -42)
-	{
-	  z[1] = X[1] + TWO10;
-	  z[2] = ZERO;
-	  z[3] = ZERO;
-	  k = 3;
-	}
-      else if (EX == -43)
-	{
-	  z[1] = TWO10;
-	  z[2] = X[1];
-	  z[3] = ZERO;
-	  k = 2;
-	}
-      else
-	{
-	  z[1] = TWO10;
-	  z[2] = ZERO;
-	  z[3] = X[1];
-	  k = 1;
-	}
-    }
-  else if (p2 == 2)
-    {
-      if (EX == -42)
-	{
-	  z[1] = X[1] + TWO10;
-	  z[2] = X[2];
-	  z[3] = ZERO;
-	  k = 3;
-	}
-      else if (EX == -43)
-	{
-	  z[1] = TWO10;
-	  z[2] = X[1];
-	  z[3] = X[2];
-	  k = 2;
-	}
-      else
-	{
-	  z[1] = TWO10;
-	  z[2] = ZERO;
-	  z[3] = X[1];
-	  k = 1;
-	}
-    }
-  else
-    {
-      if (EX == -42)
-	{
-	  z[1] = X[1] + TWO10;
-	  z[2] = X[2];
-	  k = 3;
-	}
-      else if (EX == -43)
-	{
-	  z[1] = TWO10;
-	  z[2] = X[1];
-	  k = 2;
-	}
-      else
-	{
-	  z[1] = TWO10;
-	  z[2] = ZERO;
-	  k = 1;
-	}
-      z[3] = X[k];
-    }
-
-  u = (z[3] + TWO57) - TWO57;
-  if (u > z[3])
-    u -= TWO5;
-
-  if (u == z[3])
-    {
-      for (i = k + 1; i <= p2; i++)
-	{
-	  if (X[i] == ZERO)
-	    continue;
-	  else
-	    {
-	      z[3] += ONE;
-	      break;
-	    }
-	}
-    }
-
-  c = X[0] * ((z[1] + R * (z[2] + R * z[3])) - TWO10);
-
-  *y = c * TWOM1032;
-#undef R
-}
-
-/* Convert multiple precision number *X into double precision number *Y.  The
-   result is correctly rounded to the nearest/even.  */
-void
-__mp_dbl (const mp_no *x, double *y, int p)
-{
-  if (X[0] == ZERO)
-    {
-      *y = ZERO;
-      return;
-    }
-
-  if (__glibc_likely (EX > -42 || (EX == -42 && X[1] >= TWO10)))
-    norm (x, y, p);
-  else
-    denorm (x, y, p);
-}
-
-/* Get the multiple precision equivalent of X into *Y.  If the precision is too
-   small, the result is truncated.  */
-void
-__dbl_mp (double x, mp_no *y, int p)
-{
-  long i, n;
-  long p2 = p;
-  double u;
-
-  /* Sign.  */
-  if (x == ZERO)
-    {
-      Y[0] = ZERO;
-      return;
-    }
-  else if (x > ZERO)
-    Y[0] = ONE;
-  else
-    {
-      Y[0] = MONE;
-      x = -x;
-    }
-
-  /* Exponent.  */
-  for (EY = ONE; x >= RADIX; EY += ONE)
-    x *= RADIXI;
-  for (; x < ONE; EY -= ONE)
-    x *= RADIX;
-
-  /* Digits.  */
-  n = MIN (p2, 4);
-  for (i = 1; i <= n; i++)
-    {
-      u = (x + TWO52) - TWO52;
-      if (u > x)
-	u -= ONE;
-      Y[i] = u;
-      x -= u;
-      x *= RADIX;
-    }
-  for (; i <= p2; i++)
-    Y[i] = ZERO;
-}
-
-/* Add magnitudes of *X and *Y assuming that abs (*X) >= abs (*Y) > 0.  The
-   sign of the sum *Z is not changed.  X and Y may overlap but not X and Z or
-   Y and Z.  No guard digit is used.  The result equals the exact sum,
-   truncated.  */
-static void
-add_magnitudes (const mp_no *x, const mp_no *y, mp_no *z, int p)
-{
-  long i, j, k;
-  long p2 = p;
-  double zk;
-
-  EZ = EX;
-
-  i = p2;
-  j = p2 + EY - EX;
-  k = p2 + 1;
-
-  if (__glibc_unlikely (j < 1))
-    {
-      __cpy (x, z, p);
-      return;
-    }
-
-  zk = ZERO;
-
-  for (; j > 0; i--, j--)
-    {
-      zk += X[i] + Y[j];
-      if (zk >= RADIX)
-	{
-	  Z[k--] = zk - RADIX;
-	  zk = ONE;
-	}
-      else
-        {
-	  Z[k--] = zk;
-	  zk = ZERO;
-	}
-    }
-
-  for (; i > 0; i--)
-    {
-      zk += X[i];
-      if (zk >= RADIX)
-	{
-	  Z[k--] = zk - RADIX;
-	  zk = ONE;
-	}
-      else
-        {
-	  Z[k--] = zk;
-	  zk = ZERO;
-	}
-    }
-
-  if (zk == ZERO)
-    {
-      for (i = 1; i <= p2; i++)
-	Z[i] = Z[i + 1];
-    }
-  else
-    {
-      Z[1] = zk;
-      EZ += ONE;
-    }
-}
-
-/* Subtract the magnitudes of *X and *Y assuming that abs (*x) > abs (*y) > 0.
-   The sign of the difference *Z is not changed.  X and Y may overlap but not X
-   and Z or Y and Z.  One guard digit is used.  The error is less than one
-   ULP.  */
-static void
-sub_magnitudes (const mp_no *x, const mp_no *y, mp_no *z, int p)
-{
-  long i, j, k;
-  long p2 = p;
-  double zk;
-
-  EZ = EX;
-  i = p2;
-  j = p2 + EY - EX;
-  k = p2;
-
-  /* Y is too small compared to X, copy X over to the result.  */
-  if (__glibc_unlikely (j < 1))
-    {
-      __cpy (x, z, p);
-      return;
-    }
-
-  /* The relevant least significant digit in Y is non-zero, so we factor it in
-     to enhance accuracy.  */
-  if (j < p2 && Y[j + 1] > ZERO)
-    {
-      Z[k + 1] = RADIX - Y[j + 1];
-      zk = MONE;
-    }
-  else
-    zk = Z[k + 1] = ZERO;
-
-  /* Subtract and borrow.  */
-  for (; j > 0; i--, j--)
-    {
-      zk += (X[i] - Y[j]);
-      if (zk < ZERO)
-	{
-	  Z[k--] = zk + RADIX;
-	  zk = MONE;
-	}
-      else
-        {
-	  Z[k--] = zk;
-	  zk = ZERO;
-	}
-    }
-
-  /* We're done with digits from Y, so it's just digits in X.  */
-  for (; i > 0; i--)
-    {
-      zk += X[i];
-      if (zk < ZERO)
-	{
-	  Z[k--] = zk + RADIX;
-	  zk = MONE;
-	}
-      else
-        {
-	  Z[k--] = zk;
-	  zk = ZERO;
-	}
-    }
-
-  /* Normalize.  */
-  for (i = 1; Z[i] == ZERO; i++);
-  EZ = EZ - i + 1;
-  for (k = 1; i <= p2 + 1;)
-    Z[k++] = Z[i++];
-  for (; k <= p2;)
-    Z[k++] = ZERO;
-}
-
-/* Add *X and *Y and store the result in *Z.  X and Y may overlap, but not X
-   and Z or Y and Z.  One guard digit is used.  The error is less than one
-   ULP.  */
-void
-__add (const mp_no *x, const mp_no *y, mp_no *z, int p)
-{
-  int n;
-
-  if (X[0] == ZERO)
-    {
-      __cpy (y, z, p);
-      return;
-    }
-  else if (Y[0] == ZERO)
-    {
-      __cpy (x, z, p);
-      return;
-    }
-
-  if (X[0] == Y[0])
-    {
-      if (__acr (x, y, p) > 0)
-	{
-	  add_magnitudes (x, y, z, p);
-	  Z[0] = X[0];
-	}
-      else
-	{
-	  add_magnitudes (y, x, z, p);
-	  Z[0] = Y[0];
-	}
-    }
-  else
-    {
-      if ((n = __acr (x, y, p)) == 1)
-	{
-	  sub_magnitudes (x, y, z, p);
-	  Z[0] = X[0];
-	}
-      else if (n == -1)
-	{
-	  sub_magnitudes (y, x, z, p);
-	  Z[0] = Y[0];
-	}
-      else
-	Z[0] = ZERO;
-    }
-}
-
-/* Subtract *Y from *X and return the result in *Z.  X and Y may overlap but
-   not X and Z or Y and Z.  One guard digit is used.  The error is less than
-   one ULP.  */
-void
-__sub (const mp_no *x, const mp_no *y, mp_no *z, int p)
-{
-  int n;
-
-  if (X[0] == ZERO)
-    {
-      __cpy (y, z, p);
-      Z[0] = -Z[0];
-      return;
-    }
-  else if (Y[0] == ZERO)
-    {
-      __cpy (x, z, p);
-      return;
-    }
-
-  if (X[0] != Y[0])
-    {
-      if (__acr (x, y, p) > 0)
-	{
-	  add_magnitudes (x, y, z, p);
-	  Z[0] = X[0];
-	}
-      else
-	{
-	  add_magnitudes (y, x, z, p);
-	  Z[0] = -Y[0];
-	}
-    }
-  else
-    {
-      if ((n = __acr (x, y, p)) == 1)
-	{
-	  sub_magnitudes (x, y, z, p);
-	  Z[0] = X[0];
-	}
-      else if (n == -1)
-	{
-	  sub_magnitudes (y, x, z, p);
-	  Z[0] = -Y[0];
-	}
-      else
-	Z[0] = ZERO;
-    }
-}
-
-/* Multiply *X and *Y and store result in *Z.  X and Y may overlap but not X
-   and Z or Y and Z.  For P in [1, 2, 3], the exact result is truncated to P
-   digits.  In case P > 3 the error is bounded by 1.001 ULP.  */
-void
-__mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
-{
-  long i, i1, i2, j, k, k2;
-  long p2 = p;
-  double u, zk, zk2;
-
-  /* Is z=0?  */
-  if (__glibc_unlikely (X[0] * Y[0] == ZERO))
-    {
-      Z[0] = ZERO;
-      return;
-    }
-
-  /* Multiply, add and carry */
-  k2 = (p2 < 3) ? p2 + p2 : p2 + 3;
-  zk = Z[k2] = ZERO;
-  for (k = k2; k > 1;)
-    {
-      if (k > p2)
-	{
-	  i1 = k - p2;
-	  i2 = p2 + 1;
-	}
-      else
-	{
-	  i1 = 1;
-	  i2 = k;
-	}
-#if 1
-      /* Rearrange this inner loop to allow the fmadd instructions to be
-         independent and execute in parallel on processors that have
-         dual symmetrical FP pipelines.  */
-      if (i1 < (i2 - 1))
-	{
-	  /* Make sure we have at least 2 iterations.  */
-	  if (((i2 - i1) & 1L) == 1L)
-	    {
-	      /* Handle the odd iterations case.  */
-	      zk2 = x->d[i2 - 1] * y->d[i1];
-	    }
-	  else
-	    zk2 = 0.0;
-	  /* Do two multiply/adds per loop iteration, using independent
-	     accumulators; zk and zk2.  */
-	  for (i = i1, j = i2 - 1; i < i2 - 1; i += 2, j -= 2)
-	    {
-	      zk += x->d[i] * y->d[j];
-	      zk2 += x->d[i + 1] * y->d[j - 1];
-	    }
-	  zk += zk2;		/* Final sum.  */
-	}
-      else
-	{
-	  /* Special case when iterations is 1.  */
-	  zk += x->d[i1] * y->d[i1];
-	}
-#else
-      /* The original code.  */
-      for (i = i1, j = i2 - 1; i < i2; i++, j--)
-	zk += X[i] * Y[j];
-#endif
-
-      u = (zk + CUTTER) - CUTTER;
-      if (u > zk)
-	u -= RADIX;
-      Z[k] = zk - u;
-      zk = u * RADIXI;
-      --k;
-    }
-  Z[k] = zk;
-
-  /* Is there a carry beyond the most significant digit?  */
-  if (Z[1] == ZERO)
-    {
-      for (i = 1; i <= p2; i++)
-	Z[i] = Z[i + 1];
-      EZ = EX + EY - 1;
-    }
-  else
-    EZ = EX + EY;
-
-  Z[0] = X[0] * Y[0];
-}
-
-/* Square *X and store result in *Y.  X and Y may not overlap.  For P in
-   [1, 2, 3], the exact result is truncated to P digits.  In case P > 3 the
-   error is bounded by 1.001 ULP.  This is a faster special case of
-   multiplication.  */
-void
-__sqr (const mp_no *x, mp_no *y, int p)
-{
-  long i, j, k, ip;
-  double u, yk;
-
-  /* Is z=0?  */
-  if (__glibc_unlikely (X[0] == ZERO))
-    {
-      Y[0] = ZERO;
-      return;
-    }
-
-  /* We need not iterate through all X's since it's pointless to
-     multiply zeroes.  */
-  for (ip = p; ip > 0; ip--)
-    if (X[ip] != ZERO)
-      break;
-
-  k = (__glibc_unlikely (p < 3)) ? p + p : p + 3;
-
-  while (k > 2 * ip + 1)
-    Y[k--] = ZERO;
-
-  yk = ZERO;
-
-  while (k > p)
-    {
-      double yk2 = 0.0;
-      long lim = k / 2;
-
-      if (k % 2 == 0)
-        {
-	  yk += X[lim] * X[lim];
-	  lim--;
-	}
-
-      /* In __mul, this loop (and the one within the next while loop) run
-         between a range to calculate the mantissa as follows:
-
-         Z[k] = X[k] * Y[n] + X[k+1] * Y[n-1] ... + X[n-1] * Y[k+1]
-		+ X[n] * Y[k]
-
-         For X == Y, we can get away with summing halfway and doubling the
-	 result.  For cases where the range size is even, the mid-point needs
-	 to be added separately (above).  */
-      for (i = k - p, j = p; i <= lim; i++, j--)
-	yk2 += X[i] * X[j];
-
-      yk += 2.0 * yk2;
-
-      u = (yk + CUTTER) - CUTTER;
-      if (u > yk)
-	u -= RADIX;
-      Y[k--] = yk - u;
-      yk = u * RADIXI;
-    }
-
-  while (k > 1)
-    {
-      double yk2 = 0.0;
-      long lim = k / 2;
-
-      if (k % 2 == 0)
-        {
-	  yk += X[lim] * X[lim];
-	  lim--;
-	}
-
-      /* Likewise for this loop.  */
-      for (i = 1, j = k - 1; i <= lim; i++, j--)
-	yk2 += X[i] * X[j];
-
-      yk += 2.0 * yk2;
-
-      u = (yk + CUTTER) - CUTTER;
-      if (u > yk)
-	u -= RADIX;
-      Y[k--] = yk - u;
-      yk = u * RADIXI;
-    }
-  Y[k] = yk;
-
-  /* Squares are always positive.  */
-  Y[0] = 1.0;
-
-  EY = 2 * EX;
-  /* Is there a carry beyond the most significant digit?  */
-  if (__glibc_unlikely (Y[1] == ZERO))
-    {
-      for (i = 1; i <= p; i++)
-	Y[i] = Y[i + 1];
-      EY--;
-    }
-}
-
-/* Invert *X and store in *Y.  Relative error bound:
-   - For P = 2: 1.001 * R ^ (1 - P)
-   - For P = 3: 1.063 * R ^ (1 - P)
-   - For P > 3: 2.001 * R ^ (1 - P)
-
-   *X = 0 is not permissible.  */
-static void
-__inv (const mp_no *x, mp_no *y, int p)
-{
-  long i;
-  double t;
-  mp_no z, w;
-  static const int np1[] =
-    { 0, 0, 0, 0, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
-    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
-  };
-
-  __cpy (x, &z, p);
-  z.e = 0;
-  __mp_dbl (&z, &t, p);
-  t = ONE / t;
-  __dbl_mp (t, y, p);
-  EY -= EX;
-
-  for (i = 0; i < np1[p]; i++)
-    {
-      __cpy (y, &w, p);
-      __mul (x, &w, y, p);
-      __sub (&mptwo, y, &z, p);
-      __mul (&w, &z, y, p);
-    }
-}
-
-/* Divide *X by *Y and store result in *Z.  X and Y may overlap but not X and Z
-   or Y and Z.  Relative error bound:
-   - For P = 2: 2.001 * R ^ (1 - P)
-   - For P = 3: 2.063 * R ^ (1 - P)
-   - For P > 3: 3.001 * R ^ (1 - P)
-
-   *X = 0 is not permissible.  */
-void
-__dvd (const mp_no *x, const mp_no *y, mp_no *z, int p)
-{
-  mp_no w;
-
-  if (X[0] == ZERO)
-    Z[0] = ZERO;
-  else
-    {
-      __inv (y, &w, p);
-      __mul (x, &w, z, p);
-    }
-}
diff --git a/libc/sysdeps/powerpc/powerpc64/power4/fpu/slowexp.c b/libc/sysdeps/powerpc/powerpc64/power4/fpu/slowexp.c
deleted file mode 100644
index d93f50544..000000000
--- a/libc/sysdeps/powerpc/powerpc64/power4/fpu/slowexp.c
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * IBM Accurate Mathematical Library
- * written by International Business Machines Corp.
- * Copyright (C) 2001-2013 Free Software Foundation, Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- */
-/**************************************************************************/
-/*  MODULE_NAME:slowexp.c                                                 */
-/*                                                                        */
-/*  FUNCTION:slowexp                                                      */
-/*                                                                        */
-/*  FILES NEEDED:mpa.h                                                    */
-/*               mpa.c mpexp.c                                            */
-/*                                                                        */
-/*Converting from double precision to Multi-precision and calculating     */
-/* e^x                                                                    */
-/**************************************************************************/
-#include <math_private.h>
-
-#ifdef NO_LONG_DOUBLE
-#include "mpa.h"
-void __mpexp(mp_no *x, mp_no *y, int p);
-#endif
-
-/*Converting from double precision to Multi-precision and calculating  e^x */
-double __slowexp(double x) {
-#ifdef NO_LONG_DOUBLE
-  double w,z,res,eps=3.0e-26;
-  int p;
-  mp_no mpx, mpy, mpz,mpw,mpeps,mpcor;
-
-  p=6;
-  __dbl_mp(x,&mpx,p); /* Convert a double precision number  x               */
-                    /* into a multiple precision number mpx with prec. p. */
-  __mpexp(&mpx, &mpy, p); /* Multi-Precision exponential function */
-  __dbl_mp(eps,&mpeps,p);
-  __mul(&mpeps,&mpy,&mpcor,p);
-  __add(&mpy,&mpcor,&mpw,p);
-  __sub(&mpy,&mpcor,&mpz,p);
-  __mp_dbl(&mpw, &w, p);
-  __mp_dbl(&mpz, &z, p);
-  if (w == z) return w;
-  else  {                   /* if calculating is not exactly   */
-    p = 32;
-    __dbl_mp(x,&mpx,p);
-    __mpexp(&mpx, &mpy, p);
-    __mp_dbl(&mpy, &res, p);
-    return res;
-  }
-#else
-  return (double) __ieee754_expl((long double)x);
-#endif
-}
diff --git a/libc/sysdeps/powerpc/powerpc64/power4/fpu/slowpow.c b/libc/sysdeps/powerpc/powerpc64/power4/fpu/slowpow.c
deleted file mode 100644
index 7c97d9581..000000000
--- a/libc/sysdeps/powerpc/powerpc64/power4/fpu/slowpow.c
+++ /dev/null
@@ -1,93 +0,0 @@
-/*
- * IBM Accurate Mathematical Library
- * written by International Business Machines Corp.
- * Copyright (C) 2001-2013 Free Software Foundation, Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- */
-/*************************************************************************/
-/* MODULE_NAME:slowpow.c                                                 */
-/*                                                                       */
-/* FUNCTION:slowpow                                                      */
-/*                                                                       */
-/*FILES NEEDED:mpa.h                                                     */
-/*             mpa.c mpexp.c mplog.c halfulp.c                           */
-/*                                                                       */
-/* Given two IEEE double machine numbers y,x , routine  computes the     */
-/* correctly  rounded (to nearest) value of x^y. Result calculated  by   */
-/* multiplication (in halfulp.c) or if result isn't accurate enough      */
-/* then routine converts x and y into multi-precision doubles and        */
-/* recompute.                                                            */
-/*************************************************************************/
-
-#include "mpa.h"
-#include <math_private.h>
-
-void __mpexp (mp_no * x, mp_no * y, int p);
-void __mplog (mp_no * x, mp_no * y, int p);
-double ulog (double);
-double __halfulp (double x, double y);
-
-double
-__slowpow (double x, double y, double z)
-{
-  double res, res1;
-  long double ldw, ldz, ldpp;
-  static const long double ldeps = 0x4.0p-96;
-
-  res = __halfulp (x, y);	/* halfulp() returns -10 or x^y             */
-  if (res >= 0)
-    return res;			/* if result was really computed by halfulp */
-  /*  else, if result was not really computed by halfulp */
-
-  /* Compute pow as long double, 106 bits */
-  ldz = __ieee754_logl ((long double) x);
-  ldw = (long double) y *ldz;
-  ldpp = __ieee754_expl (ldw);
-  res = (double) (ldpp + ldeps);
-  res1 = (double) (ldpp - ldeps);
-
-  if (res != res1)		/* if result still not accurate enough */
-    {				/* use mpa for higher precision.  */
-      mp_no mpx, mpy, mpz, mpw, mpp, mpr, mpr1;
-      static const mp_no eps = { -3, {1.0, 4.0} };
-      int p;
-
-      p = 10;			/*  p=precision 240 bits  */
-      __dbl_mp (x, &mpx, p);
-      __dbl_mp (y, &mpy, p);
-      __dbl_mp (z, &mpz, p);
-      __mplog (&mpx, &mpz, p);		/* log(x) = z   */
-      __mul (&mpy, &mpz, &mpw, p);	/*  y * z =w    */
-      __mpexp (&mpw, &mpp, p);		/*  e^w =pp     */
-      __add (&mpp, &eps, &mpr, p);	/*  pp+eps =r   */
-      __mp_dbl (&mpr, &res, p);
-      __sub (&mpp, &eps, &mpr1, p);	/*  pp -eps =r1 */
-      __mp_dbl (&mpr1, &res1, p);	/*  converting into double precision */
-      if (res == res1)
-	return res;
-
-      /* if we get here result wasn't calculated exactly, continue for
-         more exact calculation using 768 bits.  */
-      p = 32;
-      __dbl_mp (x, &mpx, p);
-      __dbl_mp (y, &mpy, p);
-      __dbl_mp (z, &mpz, p);
-      __mplog (&mpx, &mpz, p);		/* log(c)=z  */
-      __mul (&mpy, &mpz, &mpw, p);	/* y*z =w    */
-      __mpexp (&mpw, &mpp, p);		/* e^w=pp    */
-      __mp_dbl (&mpp, &res, p);		/* converting into double precision */
-    }
-  return res;
-}
diff --git a/libc/sysdeps/powerpc/powerpc64/power4/memcmp.S b/libc/sysdeps/powerpc/powerpc64/power4/memcmp.S
index 7df52f810..6378ecb2d 100644
--- a/libc/sysdeps/powerpc/powerpc64/power4/memcmp.S
+++ b/libc/sysdeps/powerpc/powerpc64/power4/memcmp.S
@@ -17,13 +17,11 @@
    <http://www.gnu.org/licenses/>.  */
 
 #include <sysdep.h>
-#include <bp-sym.h>
-#include <bp-asm.h>
 
 /* int [r3] memcmp (const char *s1 [r3], const char *s2 [r4], size_t size [r5])  */
 
 	.machine power4
-EALIGN (BP_SYM(memcmp), 4, 0)
+EALIGN (memcmp, 4, 0)
 	CALL_MCOUNT 3
 
 #define rTMP	r0
@@ -31,9 +29,6 @@ EALIGN (BP_SYM(memcmp), 4, 0)
 #define rSTR1	r3	/* first string arg */
 #define rSTR2	r4	/* second string arg */
 #define rN	r5	/* max string length */
-/* Note:  The Bounded pointer support in this code is broken.  This code
-   was inherited from PPC32 and that support was never completed.
-   Current PPC gcc does not support -fbounds-check or -fbounded-pointers.  */
 #define rWORD1	r6	/* current word in s1 */
 #define rWORD2	r7	/* current word in s2 */
 #define rWORD3	r8	/* next word in s1 */
@@ -976,6 +971,6 @@ L(duzeroLength):
 	li	rRTN,0
 	blr
 
-END (BP_SYM (memcmp))
+END (memcmp)
 libc_hidden_builtin_def (memcmp)
 weak_alias (memcmp, bcmp)
diff --git a/libc/sysdeps/powerpc/powerpc64/power4/memcpy.S b/libc/sysdeps/powerpc/powerpc64/power4/memcpy.S
index 734434af0..c43d1d2e4 100644
--- a/libc/sysdeps/powerpc/powerpc64/power4/memcpy.S
+++ b/libc/sysdeps/powerpc/powerpc64/power4/memcpy.S
@@ -17,8 +17,6 @@
    <http://www.gnu.org/licenses/>.  */
 
 #include <sysdep.h>
-#include <bp-sym.h>
-#include <bp-asm.h>
 
 /* __ptr_t [r3] memcpy (__ptr_t dst [r3], __ptr_t src [r4], size_t len [r5]);
    Returns 'dst'.
@@ -36,7 +34,7 @@
    Each case has a optimized unrolled loop.   */
 
 	.machine power4
-EALIGN (BP_SYM (memcpy), 5, 0)
+EALIGN (memcpy, 5, 0)
 	CALL_MCOUNT 3
 
     cmpldi cr1,5,31
@@ -413,5 +411,5 @@ EALIGN (BP_SYM (memcpy), 5, 0)
     ld 31,-8(1)
     ld 3,-16(1)
     blr
-END_GEN_TB (BP_SYM (memcpy),TB_TOCLESS)
+END_GEN_TB (memcpy,TB_TOCLESS)
 libc_hidden_builtin_def (memcpy)
diff --git a/libc/sysdeps/powerpc/powerpc64/power4/memset.S b/libc/sysdeps/powerpc/powerpc64/power4/memset.S
index 198269272..dbecee8b9 100644
--- a/libc/sysdeps/powerpc/powerpc64/power4/memset.S
+++ b/libc/sysdeps/powerpc/powerpc64/power4/memset.S
@@ -17,8 +17,6 @@
    <http://www.gnu.org/licenses/>.  */
 
 #include <sysdep.h>
-#include <bp-sym.h>
-#include <bp-asm.h>
 
 /* __ptr_t [r3] memset (__ptr_t s [r3], int c [r4], size_t n [r5]));
    Returns 's'.
@@ -28,22 +26,15 @@
    to 0, to take advantage of the dcbz instruction.  */
 
 	.machine power4
-EALIGN (BP_SYM (memset), 5, 0)
+EALIGN (memset, 5, 0)
 	CALL_MCOUNT 3
 
 #define rTMP	r0
 #define rRTN	r3	/* Initial value of 1st argument.  */
-#if __BOUNDED_POINTERS__
-# define rMEMP0	r4	/* Original value of 1st arg.  */
-# define rCHR	r5	/* Char to set in each byte.  */
-# define rLEN	r6	/* Length of region to set.  */
-# define rMEMP	r10	/* Address at which we are storing.  */
-#else
-# define rMEMP0	r3	/* Original value of 1st arg.  */
-# define rCHR	r4	/* Char to set in each byte.  */
-# define rLEN	r5	/* Length of region to set.  */
-# define rMEMP	r6	/* Address at which we are storing.  */
-#endif
+#define rMEMP0	r3	/* Original value of 1st arg.  */
+#define rCHR	r4	/* Char to set in each byte.  */
+#define rLEN	r5	/* Length of region to set.  */
+#define rMEMP	r6	/* Address at which we are storing.  */
 #define rALIGN	r7	/* Number of bytes we are setting now (when aligning). */
 #define rMEMP2	r8
 
@@ -51,14 +42,6 @@ EALIGN (BP_SYM (memset), 5, 0)
 #define rCLS	r8	/* Cache line size obtained from static.  */
 #define rCLM	r9	/* Cache line size mask to check for cache alignment.  */
 L(_memset):
-#if __BOUNDED_POINTERS__
-	cmpldi	cr1, rRTN, 0
-	CHECK_BOUNDS_BOTH_WIDE (rMEMP0, rTMP, rTMP2, rLEN)
-	beq	cr1, L(b0)
-	STORE_RETURN_VALUE (rMEMP0)
-	STORE_RETURN_BOUNDS (rTMP, rTMP2)
-L(b0):
-#endif
 /* Take care of case for size <= 4.  */
 	cmpldi	cr1, rLEN, 8
 	andi.	rALIGN, rMEMP0, 7
@@ -249,25 +232,16 @@ L(medium_27f):
 L(medium_28t):
 	std	rCHR, -8(rMEMP)
 	blr
-END_GEN_TB (BP_SYM (memset),TB_TOCLESS)
+END_GEN_TB (memset,TB_TOCLESS)
 libc_hidden_builtin_def (memset)
 
 /* Copied from bzero.S to prevent the linker from inserting a stub
    between bzero and memset.  */
-ENTRY (BP_SYM (__bzero))
+ENTRY (__bzero)
 	CALL_MCOUNT 3
-#if __BOUNDED_POINTERS__
-	mr	r6,r4
-	li	r5,0
-	mr	r4,r3
-	/* Tell memset that we don't want a return value.  */
-	li	r3,0
-	b	L(_memset)
-#else
 	mr	r5,r4
 	li	r4,0
 	b	L(_memset)
-#endif
-END_GEN_TB (BP_SYM (__bzero),TB_TOCLESS)
+END_GEN_TB (__bzero,TB_TOCLESS)
 
-weak_alias (BP_SYM (__bzero), BP_SYM (bzero))
+weak_alias (__bzero, bzero)
diff --git a/libc/sysdeps/powerpc/powerpc64/power4/strncmp.S b/libc/sysdeps/powerpc/powerpc64/power4/strncmp.S
index 19877fa78..1276e16a5 100644
--- a/libc/sysdeps/powerpc/powerpc64/power4/strncmp.S
+++ b/libc/sysdeps/powerpc/powerpc64/power4/strncmp.S
@@ -17,14 +17,12 @@
    <http://www.gnu.org/licenses/>.  */
 
 #include <sysdep.h>
-#include <bp-sym.h>
-#include <bp-asm.h>
 
 /* See strlen.s for comments on how the end-of-string testing works.  */
 
 /* int [r3] strncmp (const char *s1 [r3], const char *s2 [r4], size_t size [r5])  */
 
-EALIGN (BP_SYM(strncmp), 4, 0)
+EALIGN (strncmp, 4, 0)
 	CALL_MCOUNT 3
 
 #define rTMP	r0
@@ -32,9 +30,6 @@ EALIGN (BP_SYM(strncmp), 4, 0)
 #define rSTR1	r3	/* first string arg */
 #define rSTR2	r4	/* second string arg */
 #define rN	r5	/* max string length */
-/* Note:  The Bounded pointer support in this code is broken.  This code
-   was inherited from PPC32 and that support was never completed.
-   Current PPC gcc does not support -fbounds-check or -fbounded-pointers.  */
 #define rWORD1	r6	/* current word in s1 */
 #define rWORD2	r7	/* current word in s2 */
 #define rWORD3  r10
@@ -175,5 +170,5 @@ L(u4):	sub	rRTN, rWORD1, rWORD2
 L(ux):
 	li	rRTN, 0
 	blr
-END (BP_SYM (strncmp))
+END (strncmp)
 libc_hidden_builtin_def (strncmp)
diff --git a/libc/sysdeps/powerpc/powerpc64/power6/memcpy.S b/libc/sysdeps/powerpc/powerpc64/power6/memcpy.S
index 64f5b2f42..55c0d7118 100644
--- a/libc/sysdeps/powerpc/powerpc64/power6/memcpy.S
+++ b/libc/sysdeps/powerpc/powerpc64/power6/memcpy.S
@@ -17,8 +17,6 @@
    <http://www.gnu.org/licenses/>.  */
 
 #include <sysdep.h>
-#include <bp-sym.h>
-#include <bp-asm.h>
 
 /* __ptr_t [r3] memcpy (__ptr_t dst [r3], __ptr_t src [r4], size_t len [r5]);
    Returns 'dst'.
@@ -43,7 +41,7 @@
    for the destination.  */
 
 	.machine	"power6"
-EALIGN (BP_SYM (memcpy), 7, 0)
+EALIGN (memcpy, 7, 0)
 	CALL_MCOUNT 3
 
     cmpldi cr1,5,31
@@ -1165,5 +1163,5 @@ L(du_done):
     ld 31,-8(1)
     ld 3,-16(1)
     blr
-END_GEN_TB (BP_SYM (memcpy),TB_TOCLESS)
+END_GEN_TB (memcpy,TB_TOCLESS)
 libc_hidden_builtin_def (memcpy)
diff --git a/libc/sysdeps/powerpc/powerpc64/power6/memset.S b/libc/sysdeps/powerpc/powerpc64/power6/memset.S
index a7913a10a..541a45fd3 100644
--- a/libc/sysdeps/powerpc/powerpc64/power6/memset.S
+++ b/libc/sysdeps/powerpc/powerpc64/power6/memset.S
@@ -17,8 +17,6 @@
    <http://www.gnu.org/licenses/>.  */
 
 #include <sysdep.h>
-#include <bp-sym.h>
-#include <bp-asm.h>
 
 /* __ptr_t [r3] memset (__ptr_t s [r3], int c [r4], size_t n [r5]));
    Returns 's'.
@@ -28,34 +26,19 @@
    to 0, to take advantage of the dcbz instruction.  */
 
 	.machine power6
-EALIGN (BP_SYM (memset), 7, 0)
+EALIGN (memset, 7, 0)
 	CALL_MCOUNT 3
 
 #define rTMP	r0
 #define rRTN	r3	/* Initial value of 1st argument.  */
-#if __BOUNDED_POINTERS__
-# define rMEMP0	r4	/* Original value of 1st arg.  */
-# define rCHR	r5	/* Char to set in each byte.  */
-# define rLEN	r6	/* Length of region to set.  */
-# define rMEMP	r10	/* Address at which we are storing.  */
-#else
-# define rMEMP0	r3	/* Original value of 1st arg.  */
-# define rCHR	r4	/* Char to set in each byte.  */
-# define rLEN	r5	/* Length of region to set.  */
-# define rMEMP	r6	/* Address at which we are storing.  */
-#endif
+#define rMEMP0	r3	/* Original value of 1st arg.  */
+#define rCHR	r4	/* Char to set in each byte.  */
+#define rLEN	r5	/* Length of region to set.  */
+#define rMEMP	r6	/* Address at which we are storing.  */
 #define rALIGN	r7	/* Number of bytes we are setting now (when aligning). */
 #define rMEMP2	r8
 #define rMEMP3	r9	/* Alt mem pointer.  */
 L(_memset):
-#if __BOUNDED_POINTERS__
-	cmpldi	cr1, rRTN, 0
-	CHECK_BOUNDS_BOTH_WIDE (rMEMP0, rTMP, rTMP2, rLEN)
-	beq	cr1, L(b0)
-	STORE_RETURN_VALUE (rMEMP0)
-	STORE_RETURN_BOUNDS (rTMP, rTMP2)
-L(b0):
-#endif
 /* Take care of case for size <= 4.  */
 	cmpldi	cr1, rLEN, 8
 	andi.	rALIGN, rMEMP0, 7
@@ -393,25 +376,16 @@ L(medium_27f):
 L(medium_28t):
 	std	rCHR, -8(rMEMP)
 	blr
-END_GEN_TB (BP_SYM (memset),TB_TOCLESS)
+END_GEN_TB (memset,TB_TOCLESS)
 libc_hidden_builtin_def (memset)
 
 /* Copied from bzero.S to prevent the linker from inserting a stub
    between bzero and memset.  */
-ENTRY (BP_SYM (__bzero))
+ENTRY (__bzero)
 	CALL_MCOUNT 3
-#if __BOUNDED_POINTERS__
-	mr	r6,r4
-	li	r5,0
-	mr	r4,r3
-	/* Tell memset that we don't want a return value.  */
-	li	r3,0
-	b	L(_memset)
-#else
 	mr	r5,r4
 	li	r4,0
 	b	L(_memset)
-#endif
-END_GEN_TB (BP_SYM (__bzero),TB_TOCLESS)
+END_GEN_TB (__bzero,TB_TOCLESS)
 
-weak_alias (BP_SYM (__bzero), BP_SYM (bzero))
+weak_alias (__bzero, bzero)
diff --git a/libc/sysdeps/powerpc/powerpc64/power7/memchr.S b/libc/sysdeps/powerpc/powerpc64/power7/memchr.S
index 7b71a19e6..3416897f5 100644
--- a/libc/sysdeps/powerpc/powerpc64/power7/memchr.S
+++ b/libc/sysdeps/powerpc/powerpc64/power7/memchr.S
@@ -18,12 +18,10 @@
    <http://www.gnu.org/licenses/>.  */
 
 #include <sysdep.h>
-#include <bp-sym.h>
-#include <bp-asm.h>
 
 /* int [r3] memchr (char *s [r3], int byte [r4], int size [r5])  */
 	.machine  power7
-ENTRY (BP_SYM (__memchr))
+ENTRY (__memchr)
 	CALL_MCOUNT 2
 	dcbt	0,r3
 	clrrdi  r8,r3,3
@@ -202,6 +200,6 @@ L(loop_small):                /* loop_small has been unrolled.  */
 	blr
 
 
-END (BP_SYM (__memchr))
-weak_alias (BP_SYM (__memchr), BP_SYM(memchr))
+END (__memchr)
+weak_alias (__memchr, memchr)
 libc_hidden_builtin_def (memchr)
diff --git a/libc/sysdeps/powerpc/powerpc64/power7/memcmp.S b/libc/sysdeps/powerpc/powerpc64/power7/memcmp.S
index a7caa4894..f190c6461 100644
--- a/libc/sysdeps/powerpc/powerpc64/power7/memcmp.S
+++ b/libc/sysdeps/powerpc/powerpc64/power7/memcmp.S
@@ -17,15 +17,13 @@
    <http://www.gnu.org/licenses/>.  */
 
 #include <sysdep.h>
-#include <bp-sym.h>
-#include <bp-asm.h>
 
 /* int [r3] memcmp (const char *s1 [r3],
 		    const char *s2 [r4],
 		    size_t size [r5])  */
 
 	.machine power7
-EALIGN (BP_SYM(memcmp),4,0)
+EALIGN (memcmp,4,0)
 	CALL_MCOUNT 3
 
 #define rTMP	r0
@@ -33,9 +31,6 @@ EALIGN (BP_SYM(memcmp),4,0)
 #define rSTR1	r3	/* first string arg */
 #define rSTR2	r4	/* second string arg */
 #define rN	r5	/* max string length */
-/* Note:  The Bounded pointer support in this code is broken.  This code
-   was inherited from PPC32 and that support was never completed.
-   Current PPC gcc does not support -fbounds-check or -fbounded-pointers.  */
 #define rWORD1	r6	/* current word in s1 */
 #define rWORD2	r7	/* current word in s2 */
 #define rWORD3	r8	/* next word in s1 */
@@ -978,6 +973,6 @@ L(duzeroLength):
 	li	rRTN,0
 	blr
 
-END (BP_SYM (memcmp))
+END (memcmp)
 libc_hidden_builtin_def (memcmp)
 weak_alias (memcmp,bcmp)
diff --git a/libc/sysdeps/powerpc/powerpc64/power7/memcpy.S b/libc/sysdeps/powerpc/powerpc64/power7/memcpy.S
index aa0db8e15..800a9f1bb 100644
--- a/libc/sysdeps/powerpc/powerpc64/power7/memcpy.S
+++ b/libc/sysdeps/powerpc/powerpc64/power7/memcpy.S
@@ -18,15 +18,13 @@
    <http://www.gnu.org/licenses/>.  */
 
 #include <sysdep.h>
-#include <bp-sym.h>
-#include <bp-asm.h>
 
 
 /* __ptr_t [r3] memcpy (__ptr_t dst [r3], __ptr_t src [r4], size_t len [r5]);
    Returns 'dst'.  */
 
 	.machine power7
-EALIGN (BP_SYM (memcpy), 5, 0)
+EALIGN (memcpy, 5, 0)
 	CALL_MCOUNT 3
 
 	cmpldi  cr1,5,31
@@ -502,5 +500,5 @@ L(end_unaligned_loop):
 	ld	3,-16(1)
 	blr
 
-END_GEN_TB (BP_SYM (memcpy),TB_TOCLESS)
+END_GEN_TB (memcpy,TB_TOCLESS)
 libc_hidden_builtin_def (memcpy)
diff --git a/libc/sysdeps/powerpc/powerpc64/power7/mempcpy.S b/libc/sysdeps/powerpc/powerpc64/power7/mempcpy.S
index 9993040ac..f20be938d 100644
--- a/libc/sysdeps/powerpc/powerpc64/power7/mempcpy.S
+++ b/libc/sysdeps/powerpc/powerpc64/power7/mempcpy.S
@@ -18,15 +18,13 @@
    <http://www.gnu.org/licenses/>.  */
 
 #include <sysdep.h>
-#include <bp-sym.h>
-#include <bp-asm.h>
 
 
 /* __ptr_t [r3] __mempcpy (__ptr_t dst [r3], __ptr_t src [r4], size_t len [r5]);
     Returns 'dst' + 'len'.  */
 
 	.machine  power7
-EALIGN (BP_SYM (__mempcpy), 5, 0)
+EALIGN (__mempcpy, 5, 0)
 	CALL_MCOUNT 3
 
 	cmpldi	cr1,5,31
@@ -451,7 +449,7 @@ L(end_unaligned_loop):
 	add	3,3,5
 	blr
 
-END_GEN_TB (BP_SYM (__mempcpy),TB_TOCLESS)
-libc_hidden_def (BP_SYM (__mempcpy))
-weak_alias (BP_SYM (__mempcpy), BP_SYM (mempcpy))
+END_GEN_TB (__mempcpy,TB_TOCLESS)
+libc_hidden_def (__mempcpy)
+weak_alias (__mempcpy, mempcpy)
 libc_hidden_builtin_def (mempcpy)
diff --git a/libc/sysdeps/powerpc/powerpc64/power7/memrchr.S b/libc/sysdeps/powerpc/powerpc64/power7/memrchr.S
index d3ffe4c08..d24fbbb1b 100644
--- a/libc/sysdeps/powerpc/powerpc64/power7/memrchr.S
+++ b/libc/sysdeps/powerpc/powerpc64/power7/memrchr.S
@@ -18,12 +18,10 @@
    <http://www.gnu.org/licenses/>.  */
 
 #include <sysdep.h>
-#include <bp-sym.h>
-#include <bp-asm.h>
 
 /* int [r3] memrchr (char *s [r3], int byte [r4], int size [r5])  */
 	.machine  power7
-ENTRY (BP_SYM (__memrchr))
+ENTRY (__memrchr)
 	CALL_MCOUNT
 	dcbt	0,r3
 	mr	r7,r3
@@ -174,6 +172,6 @@ L(loop_small):
 	ble	L(null)
 	b	L(loop_small)
 
-END (BP_SYM (__memrchr))
-weak_alias (BP_SYM (__memrchr), BP_SYM(memrchr))
+END (__memrchr)
+weak_alias (__memrchr, memrchr)
 libc_hidden_builtin_def (memrchr)
diff --git a/libc/sysdeps/powerpc/powerpc64/power7/memset.S b/libc/sysdeps/powerpc/powerpc64/power7/memset.S
index abb2d3528..b24cfa163 100644
--- a/libc/sysdeps/powerpc/powerpc64/power7/memset.S
+++ b/libc/sysdeps/powerpc/powerpc64/power7/memset.S
@@ -18,14 +18,12 @@
    <http://www.gnu.org/licenses/>.  */
 
 #include <sysdep.h>
-#include <bp-sym.h>
-#include <bp-asm.h>
 
 /* __ptr_t [r3] memset (__ptr_t s [r3], int c [r4], size_t n [r5]));
    Returns 's'.  */
 
 	.machine power7
-EALIGN (BP_SYM (memset), 5, 0)
+EALIGN (memset, 5, 0)
 	CALL_MCOUNT 3
 
 L(_memset):
@@ -382,16 +380,16 @@ L(small):
 	stw	4,4(10)
 	blr
 
-END_GEN_TB (BP_SYM (memset),TB_TOCLESS)
+END_GEN_TB (memset,TB_TOCLESS)
 libc_hidden_builtin_def (memset)
 
 /* Copied from bzero.S to prevent the linker from inserting a stub
    between bzero and memset.  */
-ENTRY (BP_SYM (__bzero))
+ENTRY (__bzero)
 	CALL_MCOUNT 3
 	mr	r5,r4
 	li	r4,0
 	b	L(_memset)
-END_GEN_TB (BP_SYM (__bzero),TB_TOCLESS)
+END_GEN_TB (__bzero,TB_TOCLESS)
 
-weak_alias (BP_SYM (__bzero), BP_SYM (bzero))
+weak_alias (__bzero, bzero)
diff --git a/libc/sysdeps/powerpc/powerpc64/power7/rawmemchr.S b/libc/sysdeps/powerpc/powerpc64/power7/rawmemchr.S
index 5fc284de8..50a33d8fa 100644
--- a/libc/sysdeps/powerpc/powerpc64/power7/rawmemchr.S
+++ b/libc/sysdeps/powerpc/powerpc64/power7/rawmemchr.S
@@ -18,12 +18,10 @@
    <http://www.gnu.org/licenses/>.  */
 
 #include <sysdep.h>
-#include <bp-sym.h>
-#include <bp-asm.h>
 
 /* int [r3] rawmemchr (void *s [r3], int c [r4])  */
 	.machine  power7
-ENTRY (BP_SYM(__rawmemchr))
+ENTRY (__rawmemchr)
 	CALL_MCOUNT 2
 	dcbt	0,r3
 	clrrdi	r8,r3,3	      /* Align the address to doubleword boundary.  */
@@ -97,6 +95,6 @@ L(done):
 	srdi	r0,r0,3	      /* Convert leading zeroes to bytes.  */
 	add	r3,r8,r0      /* Return address of the matching char.  */
 	blr
-END (BP_SYM (__rawmemchr))
+END (__rawmemchr)
 weak_alias (__rawmemchr,rawmemchr)
 libc_hidden_builtin_def (__rawmemchr)
diff --git a/libc/sysdeps/powerpc/powerpc64/power7/strcasecmp.S b/libc/sysdeps/powerpc/powerpc64/power7/strcasecmp.S
index 6323154ea..9eee38469 100644
--- a/libc/sysdeps/powerpc/powerpc64/power7/strcasecmp.S
+++ b/libc/sysdeps/powerpc/powerpc64/power7/strcasecmp.S
@@ -17,8 +17,6 @@
    <http://www.gnu.org/licenses/>.  */
 
 #include <sysdep.h>
-#include <bp-sym.h>
-#include <bp-asm.h>
 #include <locale-defines.h>
 
 /* int [r3] strcasecmp (const char *s1 [r3], const char *s2 [r4] )
@@ -33,7 +31,7 @@
 # define STRCMP   strcasecmp
 #endif
 
-ENTRY (BP_SYM (__STRCMP))
+ENTRY (__STRCMP)
 	CALL_MCOUNT 2
 
 #define rRTN	r3	/* Return value */
@@ -118,7 +116,7 @@ L(done):
 	subf	r0, rLWR2, rLWR1
 	extsw	rRTN, r0
 	blr
-END (BP_SYM (__STRCMP))
+END (__STRCMP)
 
-weak_alias (BP_SYM (__STRCMP), BP_SYM (STRCMP))
+weak_alias (__STRCMP, STRCMP)
 libc_hidden_builtin_def (__STRCMP)
diff --git a/libc/sysdeps/powerpc/powerpc64/power7/strchr.S b/libc/sysdeps/powerpc/powerpc64/power7/strchr.S
index 04b7d4f5e..3ffe7a188 100644
--- a/libc/sysdeps/powerpc/powerpc64/power7/strchr.S
+++ b/libc/sysdeps/powerpc/powerpc64/power7/strchr.S
@@ -18,12 +18,10 @@
    <http://www.gnu.org/licenses/>.  */
 
 #include <sysdep.h>
-#include <bp-sym.h>
-#include <bp-asm.h>
 
 /* int [r3] strchr (char *s [r3], int c [r4])  */
 	.machine  power7
-ENTRY (BP_SYM(strchr))
+ENTRY (strchr)
 	CALL_MCOUNT 2
 	dcbt	0,r3
 	clrrdi	r8,r3,3	      /* Align the address to doubleword boundary.  */
@@ -198,6 +196,6 @@ L(done_null):
 	srdi	r0,r0,3	      /* Convert leading zeros to bytes.  */
 	add	r3,r8,r0      /* Return address of the matching null byte.  */
 	blr
-END (BP_SYM (strchr))
-weak_alias (BP_SYM (strchr), BP_SYM (index))
+END (strchr)
+weak_alias (strchr, index)
 libc_hidden_builtin_def (strchr)
diff --git a/libc/sysdeps/powerpc/powerpc64/power7/strchrnul.S b/libc/sysdeps/powerpc/powerpc64/power7/strchrnul.S
index 2b1e1c002..9dbc51b0d 100644
--- a/libc/sysdeps/powerpc/powerpc64/power7/strchrnul.S
+++ b/libc/sysdeps/powerpc/powerpc64/power7/strchrnul.S
@@ -18,12 +18,10 @@
    <http://www.gnu.org/licenses/>.  */
 
 #include <sysdep.h>
-#include <bp-sym.h>
-#include <bp-asm.h>
 
 /* int [r3] strchrnul (char *s [r3], int c [r4])  */
 	.machine  power7
-ENTRY (BP_SYM(__strchrnul))
+ENTRY (__strchrnul)
 	CALL_MCOUNT 2
 	dcbt	0,r3
 	clrrdi	r8,r3,3	      /* Align the address to doubleword boundary.  */
@@ -112,6 +110,6 @@ L(done):
 	srdi	r0,r0,3	      /* Convert leading zeros to bytes.  */
 	add	r3,r8,r0      /* Return address of matching c/null byte.  */
 	blr
-END (BP_SYM (__strchrnul))
+END (__strchrnul)
 weak_alias (__strchrnul,strchrnul)
 libc_hidden_builtin_def (__strchrnul)
diff --git a/libc/sysdeps/powerpc/powerpc64/power7/strlen.S b/libc/sysdeps/powerpc/powerpc64/power7/strlen.S
index a36aa7d97..343216952 100644
--- a/libc/sysdeps/powerpc/powerpc64/power7/strlen.S
+++ b/libc/sysdeps/powerpc/powerpc64/power7/strlen.S
@@ -18,12 +18,10 @@
    <http://www.gnu.org/licenses/>.  */
 
 #include <sysdep.h>
-#include <bp-sym.h>
-#include <bp-asm.h>
 
 /* int [r3] strlen (char *s [r3])  */
 	.machine  power7
-ENTRY (BP_SYM (strlen))
+ENTRY (strlen)
 	CALL_MCOUNT 1
 	dcbt	0,r3
 	clrrdi	r4,r3,3	      /* Align the address to doubleword boundary.  */
@@ -94,5 +92,5 @@ L(done):
 	srdi	r0,r0,3	      /* Convert leading zeroes to bytes.  */
 	add	r3,r5,r0      /* Compute final length.  */
 	blr
-END (BP_SYM (strlen))
+END (strlen)
 libc_hidden_builtin_def (strlen)
diff --git a/libc/sysdeps/powerpc/powerpc64/power7/strncmp.S b/libc/sysdeps/powerpc/powerpc64/power7/strncmp.S
index 25a6baf47..77ecad5ab 100644
--- a/libc/sysdeps/powerpc/powerpc64/power7/strncmp.S
+++ b/libc/sysdeps/powerpc/powerpc64/power7/strncmp.S
@@ -17,8 +17,6 @@
    <http://www.gnu.org/licenses/>.  */
 
 #include <sysdep.h>
-#include <bp-sym.h>
-#include <bp-asm.h>
 
 /* See strlen.s for comments on how the end-of-string testing works.  */
 
@@ -26,7 +24,7 @@
 		     const char *s2 [r4],
 		     size_t size [r5])  */
 
-EALIGN (BP_SYM(strncmp),5,0)
+EALIGN (strncmp,5,0)
 	CALL_MCOUNT 3
 
 #define rTMP	r0
@@ -34,9 +32,6 @@ EALIGN (BP_SYM(strncmp),5,0)
 #define rSTR1	r3	/* first string arg */
 #define rSTR2	r4	/* second string arg */
 #define rN	r5	/* max string length */
-/* Note:  The Bounded pointer support in this code is broken.  This code
-   was inherited from PPC32 and that support was never completed.
-   Current PPC gcc does not support -fbounds-check or -fbounded-pointers.  */
 #define rWORD1	r6	/* current word in s1 */
 #define rWORD2	r7	/* current word in s2 */
 #define rWORD3  r10
@@ -179,5 +174,5 @@ L(u4):	sub	rRTN,rWORD1,rWORD2
 L(ux):
 	li	rRTN,0
 	blr
-END (BP_SYM (strncmp))
+END (strncmp)
 libc_hidden_builtin_def (strncmp)
diff --git a/libc/sysdeps/powerpc/powerpc64/power7/strnlen.S b/libc/sysdeps/powerpc/powerpc64/power7/strnlen.S
index 23e0a355c..37c7dbfe8 100644
--- a/libc/sysdeps/powerpc/powerpc64/power7/strnlen.S
+++ b/libc/sysdeps/powerpc/powerpc64/power7/strnlen.S
@@ -18,12 +18,10 @@
    <http://www.gnu.org/licenses/>.  */
 
 #include <sysdep.h>
-#include <bp-sym.h>
-#include <bp-asm.h>
 
 /* int [r3] strnlen (char *s [r3], int size [r4])  */
 	.machine  power7
-ENTRY (BP_SYM (__strnlen))
+ENTRY (__strnlen)
 	CALL_MCOUNT 2
 	dcbt	0,r3
 	clrrdi  r8,r3,3
@@ -167,6 +165,6 @@ L(loop_small):
 	cmpld	r9,r7
 	bge	L(end_max)
 	b	L(loop_small)
-END (BP_SYM (__strnlen))
-weak_alias (BP_SYM (__strnlen), BP_SYM(strnlen))
+END (__strnlen)
+weak_alias (__strnlen, strnlen)
 libc_hidden_builtin_def (strnlen)
diff --git a/libc/sysdeps/powerpc/powerpc64/setjmp-common.S b/libc/sysdeps/powerpc/powerpc64/setjmp-common.S
index 8586c2d4e..58ec61062 100644
--- a/libc/sysdeps/powerpc/powerpc64/setjmp-common.S
+++ b/libc/sysdeps/powerpc/powerpc64/setjmp-common.S
@@ -23,8 +23,6 @@
 #else
 #include <jmpbuf-offsets.h>
 #endif
-#include <bp-sym.h>
-#include <bp-asm.h>
 
 #ifndef __NO_VMX__
 	.section	".toc","aw"
@@ -55,24 +53,23 @@ END (setjmp)
    that saves r2 since the call won't go via a plt call stub.  See
    bugz #269.  __GI__setjmp is used in csu/libc-start.c when
    HAVE_CLEANUP_JMP_BUF is defined.  */
-ENTRY (BP_SYM (__GI__setjmp))
+ENTRY (__GI__setjmp)
 	std r2,40(r1)		/* Save the callers TOC in the save area.  */
 	cfi_endproc
-END_2 (BP_SYM (__GI__setjmp))
+END_2 (__GI__setjmp)
 /* Fall thru. */
 #endif
 
-ENTRY (BP_SYM (_setjmp))
+ENTRY (_setjmp)
 	CALL_MCOUNT 1
 	li r4,0			/* Set second argument to 0.  */
 	b JUMPTARGET (GLUE(__sigsetjmp,_ent))
-END (BP_SYM (_setjmp))
+END (_setjmp)
 libc_hidden_def (_setjmp)
 
-ENTRY (BP_SYM (__sigsetjmp))
+ENTRY (__sigsetjmp)
 	CALL_MCOUNT 2
 JUMPTARGET(GLUE(__sigsetjmp,_ent)):
-	CHECK_BOUNDS_BOTH_WIDE_LIT (r3, r8, r9, JB_SIZE)
 #ifdef PTR_MANGLE
 	mr   r5, r1
 	PTR_MANGLE (r5, r6)
@@ -219,18 +216,18 @@ L(no_vmx):
 	li	r3,0
 	blr
 #elif defined SHARED
-	b	JUMPTARGET (BP_SYM (__sigjmp_save))
+	b	JUMPTARGET (__sigjmp_save)
 #else
 	mflr	r0
 	std	r0,16(r1)
 	stdu	r1,-112(r1)
 	cfi_adjust_cfa_offset(112)
 	cfi_offset(lr,16)
-	bl	JUMPTARGET (BP_SYM (__sigjmp_save))
+	bl	JUMPTARGET (__sigjmp_save)
 	nop
 	ld	r0,112+16(r1)
 	addi	r1,r1,112
 	mtlr	r0
 	blr
 #endif
-END (BP_SYM (__sigsetjmp))
+END (__sigsetjmp)
diff --git a/libc/sysdeps/powerpc/powerpc64/start.S b/libc/sysdeps/powerpc/powerpc64/start.S
index 210779c84..ec0fd30e7 100644
--- a/libc/sysdeps/powerpc/powerpc64/start.S
+++ b/libc/sysdeps/powerpc/powerpc64/start.S
@@ -34,7 +34,6 @@
    <http://www.gnu.org/licenses/>.  */
 
 #include <sysdep.h>
-#include "bp-sym.h"
 
  /* These are the various addresses we require.  */
 #ifdef PIC
@@ -46,7 +45,7 @@
 L(start_addresses):
 	.quad	0 /* was _SDA_BASE_  but not in 64-bit ABI*/
 /*     function descriptors so don't need JUMPTARGET */
-	.quad	BP_SYM(main)
+	.quad	main
 	.quad 	__libc_csu_init
 	.quad 	__libc_csu_fini
 
@@ -71,7 +70,7 @@ ENTRY(_start)
 	ld	r8,.L01(r2)
 
  /* and continue in libc-start, in glibc.  */
-	b	JUMPTARGET(BP_SYM(__libc_start_main))
+	b	JUMPTARGET(__libc_start_main)
 /* The linker needs this nop to recognize that it's OK to call via a
    TOC adjusting stub.  */
 	nop
diff --git a/libc/sysdeps/powerpc/powerpc64/stpcpy.S b/libc/sysdeps/powerpc/powerpc64/stpcpy.S
index d9cffe9ad..070cd4662 100644
--- a/libc/sysdeps/powerpc/powerpc64/stpcpy.S
+++ b/libc/sysdeps/powerpc/powerpc64/stpcpy.S
@@ -17,37 +17,24 @@
    <http://www.gnu.org/licenses/>.  */
 
 #include <sysdep.h>
-#include <bp-sym.h>
-#include <bp-asm.h>
 
 /* See strlen.s for comments on how the end-of-string testing works.  */
 
 /* char * [r3] stpcpy (char *dest [r3], const char *src [r4])  */
 
-EALIGN (BP_SYM (__stpcpy), 4, 0)
+EALIGN (__stpcpy, 4, 0)
 	CALL_MCOUNT 2
 
 #define rTMP	r0
 #define rRTN	r3
-#if __BOUNDED_POINTERS__
-# define rDEST	r4		/* pointer to previous word in dest */
-# define rSRC	r5		/* pointer to previous word in src */
-# define rLOW	r11
-# define rHIGH	r12
-#else
-# define rDEST	r3		/* pointer to previous word in dest */
-# define rSRC	r4		/* pointer to previous word in src */
-#endif
+#define rDEST	r3		/* pointer to previous word in dest */
+#define rSRC	r4		/* pointer to previous word in src */
 #define rWORD	r6		/* current word from src */
 #define rFEFE	r7		/* 0xfefefeff */
 #define r7F7F	r8		/* 0x7f7f7f7f */
 #define rNEG	r9		/* ~(word in src | 0x7f7f7f7f) */
 #define rALT	r10		/* alternate word from src */
 
-	CHECK_BOUNDS_LOW (rSRC, rLOW, rHIGH)
-	CHECK_BOUNDS_LOW (rDEST, rLOW, rHIGH)
-	STORE_RETURN_BOUNDS (rLOW, rHIGH)
-
 	or	rTMP, rSRC, rDEST
 	clrldi.	rTMP, rTMP, 62
 	addi	rDEST, rDEST, -4
@@ -85,8 +72,6 @@ L(g1):	rlwinm.	rTMP, rALT, 8, 24, 31
 	stbu	rTMP, 1(rDEST)
 	beqlr-
 	stbu	rALT, 1(rDEST)
-	CHECK_BOUNDS_HIGH (rDEST, rHIGH, twlgt)
-	STORE_RETURN_VALUE (rDEST)
 	blr
 
 /* Oh well.  In this case, we just do a byte-by-byte copy.  */
@@ -108,15 +93,11 @@ L(u0):	lbzu	rALT, 1(rSRC)
 	cmpwi	rWORD, 0
 	bne+	L(u0)
 L(u2):	stbu	rWORD, 1(rDEST)
-	CHECK_BOUNDS_HIGH (rDEST, rHIGH, twlgt)
-	STORE_RETURN_VALUE (rDEST)
 	blr
 L(u1):	stbu	rALT, 1(rDEST)
-	CHECK_BOUNDS_HIGH (rDEST, rHIGH, twlgt)
-	STORE_RETURN_VALUE (rDEST)
 	blr
-END (BP_SYM (__stpcpy))
+END (__stpcpy)
 
-weak_alias (BP_SYM (__stpcpy), BP_SYM (stpcpy))
+weak_alias (__stpcpy, stpcpy)
 libc_hidden_def (__stpcpy)
 libc_hidden_builtin_def (stpcpy)
diff --git a/libc/sysdeps/powerpc/powerpc64/strchr.S b/libc/sysdeps/powerpc/powerpc64/strchr.S
index 3bd392949..d2d8cd361 100644
--- a/libc/sysdeps/powerpc/powerpc64/strchr.S
+++ b/libc/sysdeps/powerpc/powerpc64/strchr.S
@@ -17,32 +17,19 @@
    <http://www.gnu.org/licenses/>.  */
 
 #include <sysdep.h>
-#include <bp-sym.h>
-#include <bp-asm.h>
 
 /* See strlen.s for comments on how this works.  */
 
 /* char * [r3] strchr (const char *s [r3] , int c [r4] )  */
 
-ENTRY (BP_SYM (strchr))
+ENTRY (strchr)
 	CALL_MCOUNT 2
 
 #define rTMP1	r0
 #define rRTN	r3	/* outgoing result */
-/* Note:  The Bounded pointer support in this code is broken.  This code
-   was inherited from PPC32 and that support was never completed.
-   Currently PPC gcc does not support -fbounds-check or -fbounded-pointers.
-   These artifacts are left in the code as a reminder in case we need
-   bounded pointer support in the future.  */
-#if __BOUNDED_POINTERS__
-# define rSTR	r4
-# define rCHR	r5	/* byte we're looking for, spread over the whole word */
-# define rWORD	r8	/* the current word */
-#else
-# define rSTR	r8	/* current word pointer */
-# define rCHR	r4	/* byte we're looking for, spread over the whole word */
-# define rWORD	r5	/* the current word */
-#endif
+#define rSTR	r8	/* current word pointer */
+#define rCHR	r4	/* byte we're looking for, spread over the whole word */
+#define rWORD	r5	/* the current word */
 #define rCLZB	rCHR	/* leading zero byte count */
 #define rFEFE	r6	/* constant 0xfefefefefefefeff (-0x0101010101010101) */
 #define r7F7F	r7	/* constant 0x7f7f7f7f7f7f7f7f */
@@ -51,9 +38,6 @@ ENTRY (BP_SYM (strchr))
 #define rMASK	r11	/* mask with the bits to ignore set to 0 */
 #define rTMP3	r12
 
-	CHECK_BOUNDS_LOW (rSTR, rTMP1, rTMP2)
-	STORE_RETURN_BOUNDS (rTMP1, rTMP2)
-
 	dcbt	0,rRTN
 	rlwimi	rCHR, rCHR, 8, 16, 23
 	li	rMASK, -1
@@ -101,7 +85,6 @@ L(loopentry):
 L(missed):
 	and.	rTMP1, rTMP1, rTMP2
 	li	rRTN, 0
-	STORE_RETURN_VALUE (rSTR)
 	beqlr
 /* It did happen. Decide which one was first...
    I'm not sure if this is actually faster than a sequence of
@@ -119,8 +102,6 @@ L(missed):
 	cntlzd	rCLZB, rTMP2
 	srdi	rCLZB, rCLZB, 3
 	add	rRTN, rSTR, rCLZB
-	CHECK_BOUNDS_HIGH_RTN (rSTR, rTMP2, tdlge)
-	STORE_RETURN_VALUE (rSTR)
 	blr
 
 L(foundit):
@@ -132,10 +113,8 @@ L(foundit):
 	subi	rSTR, rSTR, 8
 	srdi	rCLZB, rCLZB, 3
 	add	rRTN, rSTR, rCLZB
-	CHECK_BOUNDS_HIGH_RTN (rSTR, rTMP2, tdlge)
-	STORE_RETURN_VALUE (rSTR)
 	blr
-END (BP_SYM (strchr))
+END (strchr)
 
-weak_alias (BP_SYM (strchr), BP_SYM (index))
+weak_alias (strchr, index)
 libc_hidden_builtin_def (strchr)
diff --git a/libc/sysdeps/powerpc/powerpc64/strcmp.S b/libc/sysdeps/powerpc/powerpc64/strcmp.S
index 46600d5d0..c9d6dac12 100644
--- a/libc/sysdeps/powerpc/powerpc64/strcmp.S
+++ b/libc/sysdeps/powerpc/powerpc64/strcmp.S
@@ -17,29 +17,18 @@
    <http://www.gnu.org/licenses/>.  */
 
 #include <sysdep.h>
-#include <bp-sym.h>
-#include <bp-asm.h>
 
 /* See strlen.s for comments on how the end-of-string testing works.  */
 
 /* int [r3] strcmp (const char *s1 [r3], const char *s2 [r4])  */
 
-EALIGN (BP_SYM(strcmp), 4, 0)
+EALIGN (strcmp, 4, 0)
 	CALL_MCOUNT 2
 
 #define rTMP	r0
 #define rRTN	r3
 #define rSTR1	r3	/* first string arg */
 #define rSTR2	r4	/* second string arg */
-/* Note:  The Bounded pointer support in this code is broken.  This code
-   was inherited from PPC32 and that support was never completed.
-   Current PPC gcc does not support -fbounds-check or -fbounded-pointers.
-   These artifacts are left in the code as a reminder in case we need
-   bounded pointer support in the future.  */
-#if __BOUNDED_POINTERS__
-# define rHIGH1	r11
-# define rHIGH2 r12
-#endif
 #define rWORD1	r5	/* current word in s1 */
 #define rWORD2	r6	/* current word in s2 */
 #define rFEFE	r7	/* constant 0xfefefefefefefeff (-0x0101010101010101) */
@@ -47,9 +36,6 @@ EALIGN (BP_SYM(strcmp), 4, 0)
 #define rNEG	r9	/* ~(word in s1 | 0x7f7f7f7f7f7f7f7f) */
 #define rBITDIF	r10	/* bits that differ in s1 & s2 words */
 
-	CHECK_BOUNDS_LOW (rSTR1, rTMP, rHIGH1)
-	CHECK_BOUNDS_LOW (rSTR2, rTMP, rHIGH2)
-
 	dcbt	0,rSTR1
 	or	rTMP, rSTR2, rSTR1
 	dcbt	0,rSTR2
@@ -98,7 +84,6 @@ L(endstring):
 	blr
 L(equal):
 	li	rRTN, 0
-	/* GKM FIXME: check high bounds.  */
 	blr
 
 L(different):
@@ -113,7 +98,6 @@ L(highbit):
 	srdi	rWORD2, rWORD2, 56
 	srdi	rWORD1, rWORD1, 56
 	sub	rRTN, rWORD1, rWORD2
-	/* GKM FIXME: check high bounds.  */
 	blr
 
 
@@ -137,11 +121,9 @@ L(u1):	cmpwi	cr1, rWORD1, 0
 	cmpd	rWORD1, rWORD2
 	bne+	cr1, L(u0)
 L(u3):	sub	rRTN, rWORD1, rWORD2
-	/* GKM FIXME: check high bounds.  */
 	blr
 L(u4):	lbz	rWORD1, -1(rSTR1)
 	sub	rRTN, rWORD1, rWORD2
-	/* GKM FIXME: check high bounds.  */
 	blr
-END (BP_SYM (strcmp))
+END (strcmp)
 libc_hidden_builtin_def (strcmp)
diff --git a/libc/sysdeps/powerpc/powerpc64/strcpy.S b/libc/sysdeps/powerpc/powerpc64/strcpy.S
index 56845cf8f..4c6fd3f9d 100644
--- a/libc/sysdeps/powerpc/powerpc64/strcpy.S
+++ b/libc/sysdeps/powerpc/powerpc64/strcpy.S
@@ -17,50 +17,28 @@
    <http://www.gnu.org/licenses/>.  */
 
 #include <sysdep.h>
-#include <bp-sym.h>
-#include <bp-asm.h>
 
 /* See strlen.s for comments on how the end-of-string testing works.  */
 
 /* char * [r3] strcpy (char *dest [r3], const char *src [r4])  */
 
-EALIGN (BP_SYM (strcpy), 4, 0)
+EALIGN (strcpy, 4, 0)
 	CALL_MCOUNT 2
 
 #define rTMP	r0
 #define rRTN	r3	/* incoming DEST arg preserved as result */
-/* Note.  The Bounded pointer support in this code is broken.  This code
-   was inherited from PPC32 and that support was never completed.
-   Current PPC gcc does not support -fbounds-check or -fbounded-pointers.
-   These artifacts are left in the code as a reminder in case we need
-   bounded pointer support in the future.  */
-#if __BOUNDED_POINTERS__
-# define rDEST	r4	/* pointer to previous word in dest */
-# define rSRC	r5	/* pointer to previous word in src */
-# define rLOW	r11
-# define rHIGH	r12
-#else
-# define rSRC	r4	/* pointer to previous word in src */
-# define rDEST	r5	/* pointer to previous word in dest */
-#endif
+#define rSRC	r4	/* pointer to previous word in src */
+#define rDEST	r5	/* pointer to previous word in dest */
 #define rWORD	r6	/* current word from src */
 #define rFEFE	r7	/* constant 0xfefefefefefefeff (-0x0101010101010101) */
 #define r7F7F	r8	/* constant 0x7f7f7f7f7f7f7f7f */
 #define rNEG	r9	/* ~(word in s1 | 0x7f7f7f7f7f7f7f7f) */
 #define rALT	r10	/* alternate word from src */
 
-	CHECK_BOUNDS_LOW (rSRC, rLOW, rHIGH)
-	CHECK_BOUNDS_LOW (rDEST, rLOW, rHIGH)
-	STORE_RETURN_BOUNDS (rLOW, rHIGH)
-
 	dcbt	0,rSRC
 	or	rTMP, rSRC, rRTN
 	clrldi.	rTMP, rTMP, 61
-#if __BOUNDED_POINTERS__
-	addi	rDEST, rDEST, -8
-#else
 	addi	rDEST, rRTN, -8
-#endif
 	dcbtst	0,rRTN
 	bne	L(unaligned)
 
@@ -112,7 +90,6 @@ L(g1):
 	stb	rTMP, 14(rDEST)
 	beqlr-
 	stb	rALT, 15(rDEST)
-	/* GKM FIXME: check high bound.  */
 	blr
 
 /* Oh well.  In this case, we just do a byte-by-byte copy.  */
@@ -134,11 +111,9 @@ L(u0):	lbzu	rALT, 1(rSRC)
 	cmpwi	rWORD, 0
 	bne+	L(u0)
 L(u2):	stb	rWORD, 1(rDEST)
-	/* GKM FIXME: check high bound.  */
 	blr
 L(u1):	stb	rALT, 1(rDEST)
-	/* GKM FIXME: check high bound.  */
 	blr
 
-END (BP_SYM (strcpy))
+END (strcpy)
 libc_hidden_builtin_def (strcpy)
diff --git a/libc/sysdeps/powerpc/powerpc64/strlen.S b/libc/sysdeps/powerpc/powerpc64/strlen.S
index 3ef4cc88e..dafd03387 100644
--- a/libc/sysdeps/powerpc/powerpc64/strlen.S
+++ b/libc/sysdeps/powerpc/powerpc64/strlen.S
@@ -17,8 +17,6 @@
    <http://www.gnu.org/licenses/>.  */
 
 #include <sysdep.h>
-#include <bp-sym.h>
-#include <bp-asm.h>
 
 /* The algorithm here uses the following techniques:
 
@@ -77,7 +75,7 @@
 
 /* int [r3] strlen (char *s [r3])  */
 
-ENTRY (BP_SYM (strlen))
+ENTRY (strlen)
 	CALL_MCOUNT 1
 
 #define rTMP1	r0
@@ -94,13 +92,6 @@ ENTRY (BP_SYM (strlen))
 #define rTMP3	r11
 #define rTMP4	r12
 
-/* Note:  The Bounded pointer support in this code is broken.  This code
-   was inherited from PPC32 and that support was never completed.
-   Current PPC gcc does not support -fbounds-check or -fbounded-pointers.
-   These artifacts are left in the code as a reminder in case we need
-   bounded pointer support in the future.  */
-	CHECK_BOUNDS_LOW (rRTN, rTMP1, rTMP2)
-
 	dcbt	0,rRTN
 	clrrdi	rSTR, rRTN, 3
 	lis	r7F7F, 0x7f7f
@@ -168,7 +159,6 @@ L(done0):
 	subf	rTMP1, rRTN, rSTR
 	srdi	rTMP3, rTMP3, 3
 	add	rRTN, rTMP1, rTMP3
-	/* GKM FIXME: check high bound.  */
 	blr
-END (BP_SYM (strlen))
+END (strlen)
 libc_hidden_builtin_def (strlen)
diff --git a/libc/sysdeps/powerpc/powerpc64/strncmp.S b/libc/sysdeps/powerpc/powerpc64/strncmp.S
index 89a3246fd..e2726883f 100644
--- a/libc/sysdeps/powerpc/powerpc64/strncmp.S
+++ b/libc/sysdeps/powerpc/powerpc64/strncmp.S
@@ -17,14 +17,12 @@
    <http://www.gnu.org/licenses/>.  */
 
 #include <sysdep.h>
-#include <bp-sym.h>
-#include <bp-asm.h>
 
 /* See strlen.s for comments on how the end-of-string testing works.  */
 
 /* int [r3] strncmp (const char *s1 [r3], const char *s2 [r4], size_t size [r5])  */
 
-EALIGN (BP_SYM(strncmp), 4, 0)
+EALIGN (strncmp, 4, 0)
 	CALL_MCOUNT 3
 
 #define rTMP	r0
@@ -32,9 +30,6 @@ EALIGN (BP_SYM(strncmp), 4, 0)
 #define rSTR1	r3	/* first string arg */
 #define rSTR2	r4	/* second string arg */
 #define rN	r5	/* max string length */
-/* Note:  The Bounded pointer support in this code is broken.  This code
-   was inherited from PPC32 and that support was never completed.
-   Current PPC gcc does not support -fbounds-check or -fbounded-pointers.  */
 #define rWORD1	r6	/* current word in s1 */
 #define rWORD2	r7	/* current word in s2 */
 #define rFEFE	r8	/* constant 0xfefefefefefefeff (-0x0101010101010101) */
@@ -160,5 +155,5 @@ L(u1):
 L(u2):	lbzu	rWORD1, -1(rSTR1)	
 L(u3):	sub	rRTN, rWORD1, rWORD2
 	blr
-END (BP_SYM (strncmp))
+END (strncmp)
 libc_hidden_builtin_def (strncmp)
diff --git a/libc/sysdeps/s390/s390-32/multiarch/ifunc-resolve.c b/libc/sysdeps/s390/s390-32/multiarch/ifunc-resolve.c
index fcbd15e26..d57a907df 100644
--- a/libc/sysdeps/s390/s390-32/multiarch/ifunc-resolve.c
+++ b/libc/sysdeps/s390/s390-32/multiarch/ifunc-resolve.c
@@ -34,9 +34,9 @@
 									\
   /* Make the declarations of the optimized functions hidden in order
      to prevent GOT slots being generated for them. */			\
-  extern __attribute__((visibility("hidden"))) void *FUNC##_z196;	\
-  extern __attribute__((visibility("hidden"))) void *FUNC##_z10;	\
-  extern __attribute__((visibility("hidden"))) void *FUNC##_g5;		\
+  extern void *FUNC##_z196 attribute_hidden;				\
+  extern void *FUNC##_z10 attribute_hidden;				\
+  extern void *FUNC##_g5 attribute_hidden;				\
 									\
   void *resolve_##FUNC (unsigned long int dl_hwcap)			\
   {									\
diff --git a/libc/sysdeps/s390/s390-64/multiarch/ifunc-resolve.c b/libc/sysdeps/s390/s390-64/multiarch/ifunc-resolve.c
index 256179b26..14d9c13eb 100644
--- a/libc/sysdeps/s390/s390-64/multiarch/ifunc-resolve.c
+++ b/libc/sysdeps/s390/s390-64/multiarch/ifunc-resolve.c
@@ -34,9 +34,9 @@
 									\
   /* Make the declarations of the optimized functions hidden in order
      to prevent GOT slots being generated for them. */			\
-  extern __attribute__((visibility("hidden"))) void *FUNC##_z196;	\
-  extern __attribute__((visibility("hidden"))) void *FUNC##_z10;	\
-  extern __attribute__((visibility("hidden"))) void *FUNC##_z900;	\
+  extern void *FUNC##_z196 attribute_hidden;				\
+  extern void *FUNC##_z10 attribute_hidden;				\
+  extern void *FUNC##_z900 attribute_hidden;				\
 									\
   void *resolve_##FUNC (unsigned long int dl_hwcap)			\
   {									\
diff --git a/libc/sysdeps/sparc/fpu/libm-test-ulps b/libc/sysdeps/sparc/fpu/libm-test-ulps
index 6eee78843..bacac6e00 100644
--- a/libc/sysdeps/sparc/fpu/libm-test-ulps
+++ b/libc/sysdeps/sparc/fpu/libm-test-ulps
@@ -2637,6 +2637,9 @@ float: 2
 ifloat: 2
 ildouble: 1
 ldouble: 1
+Test "j0 (0x1p16383) == 9.5859502826270374691362975419147645151233e-2467":
+ildouble: 2
+ldouble: 2
 Test "j0 (10.0) == -0.245935764451348335197760862485328754":
 double: 2
 float: 1
@@ -2677,6 +2680,9 @@ double: 1
 idouble: 1
 ildouble: 1
 ldouble: 1
+Test "j1 (0x1p16382) == 8.0839224448726336195866026476176740513439e-2467":
+ildouble: 1
+ldouble: 1
 Test "j1 (1.0) == 0.440050585744933515959682203718914913":
 ildouble: 1
 ldouble: 1
@@ -3277,6 +3283,9 @@ double: 1
 float: 1
 idouble: 1
 ifloat: 1
+Test "y0 (0x1p16382) == 8.0839224448726336195866026476176740513439e-2467":
+ildouble: 1
+ldouble: 1
 Test "y0 (1.0) == 0.0882569642156769579829267660235151628":
 double: 2
 float: 1
@@ -3321,6 +3330,9 @@ ldouble: 1
 Test "y1 (0x1p-30) == -6.8356527557643159612937462812258975438856e+08":
 ildouble: 1
 ldouble: 1
+Test "y1 (0x1p16383) == -9.5859502826270374691362975419147645151233e-2467":
+ildouble: 2
+ldouble: 2
 Test "y1 (1.5) == -0.412308626973911295952829820633445323":
 float: 1
 ifloat: 1
@@ -4133,8 +4145,8 @@ double: 3
 float: 2
 idouble: 3
 ifloat: 2
-ildouble: 1
-ldouble: 1
+ildouble: 2
+ldouble: 2
 
 Function: "yn":
 double: 3
diff --git a/libc/sysdeps/unix/sysv/linux/Makefile b/libc/sysdeps/unix/sysv/linux/Makefile
index ecd9c2c97..f82c94982 100644
--- a/libc/sysdeps/unix/sysv/linux/Makefile
+++ b/libc/sysdeps/unix/sysv/linux/Makefile
@@ -35,7 +35,8 @@ sysdep_headers += sys/mount.h sys/acct.h sys/sysctl.h \
 		  bits/a.out.h sys/inotify.h sys/signalfd.h sys/eventfd.h \
 		  sys/timerfd.h sys/fanotify.h bits/eventfd.h bits/inotify.h \
 		  bits/signalfd.h bits/timerfd.h bits/epoll.h \
-		  bits/socket_type.h bits/syscall.h bits/sysctl.h
+		  bits/socket_type.h bits/syscall.h bits/sysctl.h \
+		  bits/mman-linux.h
 
 tests += tst-clone
 
diff --git a/libc/sysdeps/unix/sysv/linux/bits/mman-linux.h b/libc/sysdeps/unix/sysv/linux/bits/mman-linux.h
new file mode 100644
index 000000000..05d2d9237
--- /dev/null
+++ b/libc/sysdeps/unix/sysv/linux/bits/mman-linux.h
@@ -0,0 +1,108 @@
+/* Definitions for POSIX memory map interface.  Linux generic version.
+   Copyright (C) 2001-2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _SYS_MMAN_H
+# error "Never use <bits/mman-linux.h> directly; include <sys/mman.h> instead."
+#endif
+
+/* The following definitions basically come from the kernel headers.
+   But the kernel header is not namespace clean.  */
+
+
+/* Protections are chosen from these bits, OR'd together.  The
+   implementation does not necessarily support PROT_EXEC or PROT_WRITE
+   without PROT_READ.  The only guarantees are that no writing will be
+   allowed without PROT_WRITE and no access will be allowed for PROT_NONE. */
+
+#define PROT_READ	0x1		/* Page can be read.  */
+#define PROT_WRITE	0x2		/* Page can be written.  */
+#define PROT_EXEC	0x4		/* Page can be executed.  */
+#define PROT_NONE	0x0		/* Page can not be accessed.  */
+#define PROT_GROWSDOWN	0x01000000	/* Extend change to start of
+					   growsdown vma (mprotect only).  */
+#define PROT_GROWSUP	0x02000000	/* Extend change to start of
+					   growsup vma (mprotect only).  */
+
+/* Sharing types (must choose one and only one of these).  */
+#define MAP_SHARED	0x01		/* Share changes.  */
+#define MAP_PRIVATE	0x02		/* Changes are private.  */
+#ifdef __USE_MISC
+# define MAP_TYPE	0x0f		/* Mask for type of mapping.  */
+#endif
+
+/* Other flags.  */
+#define MAP_FIXED	0x10		/* Interpret addr exactly.  */
+#ifdef __USE_MISC
+# define MAP_FILE	0
+# ifdef __MAP_ANONYMOUS
+#  define MAP_ANONYMOUS	__MAP_ANONYMOUS	/* Don't use a file.  */
+# else
+#  define MAP_ANONYMOUS	0x20		/* Don't use a file.  */
+# endif
+# define MAP_ANON	MAP_ANONYMOUS
+/* When MAP_HUGETLB is set bits [26:31] encode the log2 of the huge page size.  */
+# define MAP_HUGE_SHIFT	26
+# define MAP_HUGE_MASK	0x3f
+#endif
+
+/* Flags to `msync'.  */
+#define MS_ASYNC	1		/* Sync memory asynchronously.  */
+#define MS_SYNC		4		/* Synchronous memory sync.  */
+#define MS_INVALIDATE	2		/* Invalidate the caches.  */
+
+/* Flags for `mremap'.  */
+#ifdef __USE_GNU
+# define MREMAP_MAYMOVE	1
+# define MREMAP_FIXED	2
+#endif
+
+/* Advice to `madvise'.  */
+#ifdef __USE_BSD
+# define MADV_NORMAL	  0	/* No further special treatment.  */
+# define MADV_RANDOM	  1	/* Expect random page references.  */
+# define MADV_SEQUENTIAL  2	/* Expect sequential page references.  */
+# define MADV_WILLNEED	  3	/* Will need these pages.  */
+# define MADV_DONTNEED	  4	/* Don't need these pages.  */
+# define MADV_REMOVE	  9	/* Remove these pages and resources.  */
+# define MADV_DONTFORK	  10	/* Do not inherit across fork.  */
+# define MADV_DOFORK	  11	/* Do inherit across fork.  */
+# define MADV_MERGEABLE	  12	/* KSM may merge identical pages.  */
+# define MADV_UNMERGEABLE 13	/* KSM may not merge identical pages.  */
+# define MADV_HUGEPAGE	  14	/* Worth backing with hugepages.  */
+# define MADV_NOHUGEPAGE  15	/* Not worth backing with hugepages.  */
+# define MADV_DONTDUMP	  16    /* Explicity exclude from the core dump,
+                                   overrides the coredump filter bits.  */
+# define MADV_DODUMP	  17	/* Clear the MADV_DONTDUMP flag.  */
+# define MADV_HWPOISON	  100	/* Poison a page for testing.  */
+#endif
+
+/* The POSIX people had to invent similar names for the same things.  */
+#ifdef __USE_XOPEN2K
+# define POSIX_MADV_NORMAL	0 /* No further special treatment.  */
+# define POSIX_MADV_RANDOM	1 /* Expect random page references.  */
+# define POSIX_MADV_SEQUENTIAL	2 /* Expect sequential page references.  */
+# define POSIX_MADV_WILLNEED	3 /* Will need these pages.  */
+# define POSIX_MADV_DONTNEED	4 /* Don't need these pages.  */
+#endif
+
+/* Flags for `mlockall'.  */
+#ifndef MCL_CURRENT
+# define MCL_CURRENT	1		/* Lock all currently mapped pages.  */
+# define MCL_FUTURE	2		/* Lock all additions to address
+					   space.  */
+#endif
diff --git a/libc/sysdeps/unix/sysv/linux/bits/msq.h b/libc/sysdeps/unix/sysv/linux/bits/msq.h
index bd005fb10..8f6eb8a7d 100644
--- a/libc/sysdeps/unix/sysv/linux/bits/msq.h
+++ b/libc/sysdeps/unix/sysv/linux/bits/msq.h
@@ -25,6 +25,7 @@
 #define MSG_NOERROR	010000	/* no error if message is too big */
 #ifdef __USE_GNU
 # define MSG_EXCEPT	020000	/* recv any msg except of specified type */
+# define MSG_COPY	040000	/* copy (not remove) all queue messages */
 #endif
 
 /* Types used in the structure definition.  */
diff --git a/libc/sysdeps/unix/sysv/linux/fpathconf.c b/libc/sysdeps/unix/sysv/linux/fpathconf.c
index c97164468..e8c4dc972 100644
--- a/libc/sysdeps/unix/sysv/linux/fpathconf.c
+++ b/libc/sysdeps/unix/sysv/linux/fpathconf.c
@@ -33,7 +33,6 @@ __fpathconf (fd, name)
      int name;
 {
   struct statfs fsbuf;
-  int r;
 
   switch (name)
     {
@@ -49,12 +48,6 @@ __fpathconf (fd, name)
     case _PC_CHOWN_RESTRICTED:
       return __statfs_chown_restricted (__fstatfs (fd, &fsbuf), &fsbuf);
 
-    case _PC_PIPE_BUF:
-      r = __fcntl (fd, F_GETPIPE_SZ);
-      if (r > 0)
-	return r;
-      /* FALLTHROUGH */
-
     default:
       return posix_fpathconf (fd, name);
     }
diff --git a/libc/sysdeps/unix/sysv/linux/internal_statvfs.c b/libc/sysdeps/unix/sysv/linux/internal_statvfs.c
index 4cd4f042c..45a66b83d 100644
--- a/libc/sysdeps/unix/sysv/linux/internal_statvfs.c
+++ b/libc/sysdeps/unix/sysv/linux/internal_statvfs.c
@@ -117,6 +117,12 @@ __statvfs_getflags (const char *name, int fstype, struct stat64 *st)
     case LUSTRE_SUPER_MAGIC:
       fsname = "lustre";
       break;
+    case F2FS_SUPER_MAGIC:
+      fsname = "f2fs";
+      break;
+    case EFIVARFS_MAGIC:
+      fsname = "efivarfs";
+      break;
     }
 
   FILE *mtab = __setmntent ("/proc/mounts", "r");
diff --git a/libc/sysdeps/unix/sysv/linux/ldsodefs.h b/libc/sysdeps/unix/sysv/linux/ldsodefs.h
index 081fa01f8..18ff8528c 100644
--- a/libc/sysdeps/unix/sysv/linux/ldsodefs.h
+++ b/libc/sysdeps/unix/sysv/linux/ldsodefs.h
@@ -29,12 +29,6 @@
 /* We have the auxiliary vector.  */
 #define HAVE_AUX_VECTOR
 
-/* Used by static binaries to check the auxiliary vector.  */
-extern void _dl_aux_init (ElfW(auxv_t) *av) internal_function;
-
-/* Initialization which is normally done by the dynamic linker.  */
-extern void _dl_non_dynamic_init (void) internal_function;
-
 /* We can assume that the kernel always provides the AT_UID, AT_EUID,
    AT_GID, and AT_EGID values in the auxiliary vector from 2.4.0 or so on.  */
 #define HAVE_AUX_XID
diff --git a/libc/sysdeps/unix/sysv/linux/linux_fsinfo.h b/libc/sysdeps/unix/sysv/linux/linux_fsinfo.h
index 1bcd9e2b2..2312b4702 100644
--- a/libc/sysdeps/unix/sysv/linux/linux_fsinfo.h
+++ b/libc/sysdeps/unix/sysv/linux/linux_fsinfo.h
@@ -61,9 +61,15 @@
 #define EFS_SUPER_MAGIC		0x414a53
 #define EFS_MAGIC		0x072959
 
+/* Constants that identifies the `evivar' filesystem.  */
+#define EFIVARFS_MAGIC		0xde5e81e4
+
 /* Constant that identifies the `ext2' and `ext3' filesystems.  */
 #define EXT2_SUPER_MAGIC	0xef53
 
+/* Constant that identifies the `f2fs' filesystem.  */
+#define F2FS_SUPER_MAGIC	0xf2f52010
+
 /* Constant that identifies the `hpfs' filesystem.  */
 #define HPFS_SUPER_MAGIC	0xf995e849
 
@@ -153,6 +159,7 @@
 #define COH_LINK_MAX		10000
 #define EXT2_LINK_MAX		32000
 #define EXT4_LINK_MAX		65000
+#define F2FS_LINK_MAX		32000
 #define LUSTRE_LINK_MAX		EXT4_LINK_MAX
 #define MINIX2_LINK_MAX		65530
 #define MINIX_LINK_MAX		250
diff --git a/libc/sysdeps/unix/sysv/linux/pathconf.c b/libc/sysdeps/unix/sysv/linux/pathconf.c
index e86925f7d..de91a4541 100644
--- a/libc/sysdeps/unix/sysv/linux/pathconf.c
+++ b/libc/sysdeps/unix/sysv/linux/pathconf.c
@@ -39,8 +39,6 @@ long int
 __pathconf (const char *file, int name)
 {
   struct statfs fsbuf;
-  int fd;
-  int flags;
 
   switch (name)
     {
@@ -56,21 +54,6 @@ __pathconf (const char *file, int name)
     case _PC_CHOWN_RESTRICTED:
       return __statfs_chown_restricted (__statfs (file, &fsbuf), &fsbuf);
 
-    case _PC_PIPE_BUF:
-      flags = O_RDONLY|O_NONBLOCK|O_NOCTTY;
-#ifdef O_CLOEXEC
-      flags |= O_CLOEXEC;
-#endif
-      fd = open_not_cancel_2 (file, flags);
-      if (fd >= 0)
-	{
-	  long int r = __fcntl (fd, F_GETPIPE_SZ);
-	  close_not_cancel_no_status (fd);
-	  if (r > 0)
-	    return r;
-	}
-      /* FALLTHROUGH */
-
     default:
       return posix_pathconf (file, name);
     }
@@ -168,6 +151,9 @@ __statfs_link_max (int result, const struct statfs *fsbuf, const char *file,
 	 the hard way.  */
       return distinguish_extX (fsbuf, file, fd);
 
+    case F2FS_SUPER_MAGIC:
+      return F2FS_LINK_MAX;
+
     case MINIX_SUPER_MAGIC:
     case MINIX_SUPER_MAGIC2:
       return MINIX_LINK_MAX;
@@ -221,6 +207,9 @@ __statfs_filesize_max (int result, const struct statfs *fsbuf)
 
   switch (fsbuf->f_type)
     {
+    case F2FS_SUPER_MAGIC:
+      return 256;
+
     case BTRFS_SUPER_MAGIC:
       return 255;
 
diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/Implies b/libc/sysdeps/unix/sysv/linux/powerpc/Implies
deleted file mode 100644
index ff27cdb56..000000000
--- a/libc/sysdeps/unix/sysv/linux/powerpc/Implies
+++ /dev/null
@@ -1,4 +0,0 @@
-# Make sure these routines come before ldbl-opt.
-ieee754/ldbl-128ibm
-# These supply the ABI compatibility for when long double was double.
-ieee754/ldbl-opt
diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/bits/libc-vdso.h b/libc/sysdeps/unix/sysv/linux/powerpc/bits/libc-vdso.h
index 545fda462..5f5fc1eb3 100644
--- a/libc/sysdeps/unix/sysv/linux/powerpc/bits/libc-vdso.h
+++ b/libc/sysdeps/unix/sysv/linux/powerpc/bits/libc-vdso.h
@@ -32,6 +32,16 @@ extern void *__vdso_get_tbfreq;
 
 extern void *__vdso_getcpu;
 
+/* This macro is needed for PPC64 to return a skeleton OPD entry of a vDSO
+   symbol.  This works because _dl_vdso_vsym always return the function
+   address, and no vDSO symbols use the TOC or chain pointers from the OPD
+   so we can allow them to be garbage.  */
+#if defined(__PPC64__) || defined(__powerpc64__)
+#define VDSO_IFUNC_RET(value)  &value
+#else
+#define VDSO_IFUNC_RET(value)  value
+#endif
+
 #endif
 
 #endif /* _LIBC_VDSO_H */
diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/bits/mman.h b/libc/sysdeps/unix/sysv/linux/powerpc/bits/mman.h
index a27018965..3f72c0335 100644
--- a/libc/sysdeps/unix/sysv/linux/powerpc/bits/mman.h
+++ b/libc/sysdeps/unix/sysv/linux/powerpc/bits/mman.h
@@ -17,42 +17,13 @@
    <http://www.gnu.org/licenses/>.  */
 
 #ifndef _SYS_MMAN_H
-# error "Never use <bits/mman.h> directly; iclude <sys/mman.h> instead."
+# error "Never use <bits/mman.h> directly; include <sys/mman.h> instead."
 #endif
 
 /* The following definitions basically come from the kernel headers.
    But the kernel header is not namespace clean.  */
 
-
-/* Protections are chosen from these bits, OR'd together.  The
-   implementation does not necessarily support PROT_EXEC or PROT_WRITE
-   without PROT_READ.  The only guarantees are that no writing will be
-   allowed without PROT_WRITE and no access will be allowed for PROT_NONE. */
-
-#define PROT_READ	0x1		/* Page can be read.  */
-#define PROT_WRITE	0x2		/* Page can be written.  */
-#define PROT_EXEC	0x4		/* Page can be executed.  */
-#define PROT_NONE	0x0		/* Page can not be accessed.  */
 #define PROT_SAO	0x10		/* Strong Access Ordering.  */
-#define PROT_GROWSDOWN	0x01000000	/* Extend change to start of
-					   growsdown vma (mprotect only).  */
-#define PROT_GROWSUP	0x02000000	/* Extend change to start of
-					   growsup vma (mprotect only).  */
-
-/* Sharing types (must choose one and only one of these).  */
-#define MAP_SHARED	0x001		/* Share changes.  */
-#define MAP_PRIVATE	0x002		/* Changes are private.  */
-#ifdef __USE_MISC
-# define MAP_TYPE	0x00f		/* Mask for type of mapping.  */
-#endif
-
-/* Other flags.  */
-#define MAP_FIXED	0x010		/* Interpret addr exactly.  */
-#ifdef __USE_MISC
-# define MAP_FILE	0x000
-# define MAP_ANONYMOUS	0x020		/* Don't use a file.  */
-# define MAP_ANON	MAP_ANONYMOUS
-#endif
 
 /* These are Linux-specific.  */
 #ifdef __USE_MISC
@@ -67,48 +38,10 @@
 # define MAP_HUGETLB	0x40000		/* Create huge page mapping.  */
 #endif
 
-/* Flags to `msync'.  */
-#define MS_ASYNC	1		/* Sync memory asynchronously.  */
-#define MS_SYNC		4		/* Synchronous memory sync.  */
-#define MS_INVALIDATE	2		/* Invalidate the caches.  */
-
 /* Flags for `mlockall'.  */
 #define MCL_CURRENT	0x2000		/* Lock all currently mapped pages.  */
 #define MCL_FUTURE	0x4000		/* Lock all additions to address
 					   space.  */
 
-
-/* Flags for `mremap'.  */
-#ifdef __USE_GNU
-# define MREMAP_MAYMOVE	1
-# define MREMAP_FIXED	2
-#endif
-
-/* Advice to `madvise'.  */
-#ifdef __USE_BSD
-# define MADV_NORMAL	  0	/* No further special treatment.  */
-# define MADV_RANDOM	  1	/* Expect random page references.  */
-# define MADV_SEQUENTIAL  2	/* Expect sequential page references.  */
-# define MADV_WILLNEED	  3	/* Will need these pages.  */
-# define MADV_DONTNEED	  4	/* Don't need these pages.  */
-# define MADV_REMOVE	  9	/* Remove these pages and resources.  */
-# define MADV_DONTFORK	  10	/* Do not inherit across fork.  */
-# define MADV_DOFORK	  11	/* Do inherit across fork.  */
-# define MADV_MERGEABLE	  12	/* KSM may merge identical pages.  */
-# define MADV_UNMERGEABLE 13	/* KSM may not merge identical pages.  */
-# define MADV_HUGEPAGE	  14	/* Worth backing with hugepages.  */
-# define MADV_NOHUGEPAGE  15	/* Not worth backing with hugepages.  */
-# define MADV_DONTDUMP	  16    /* Explicity exclude from the core dump,
-                                   overrides the coredump filter bits.  */
-# define MADV_DODUMP	  17	/* Clear the MADV_DONTDUMP flag.  */
-# define MADV_HWPOISON	  100	/* Poison a page for testing.  */
-#endif
-
-/* The POSIX people had to invent similar names for the same things.  */
-#ifdef __USE_XOPEN2K
-# define POSIX_MADV_NORMAL	0 /* No further special treatment.  */
-# define POSIX_MADV_RANDOM	1 /* Expect random page references.  */
-# define POSIX_MADV_SEQUENTIAL	2 /* Expect sequential page references.  */
-# define POSIX_MADV_WILLNEED	3 /* Will need these pages.  */
-# define POSIX_MADV_DONTNEED	4 /* Don't need these pages.  */
-#endif
+/* Include generic Linux declarations.  */
+#include <bits/mman-linux.h>
diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/bits/msq.h b/libc/sysdeps/unix/sysv/linux/powerpc/bits/msq.h
index b9811c656..59147c268 100644
--- a/libc/sysdeps/unix/sysv/linux/powerpc/bits/msq.h
+++ b/libc/sysdeps/unix/sysv/linux/powerpc/bits/msq.h
@@ -25,6 +25,7 @@
 #define MSG_NOERROR    010000  /* no error if message is too big */
 #ifdef __USE_GNU
 # define MSG_EXCEPT    020000  /* recv any msg except of specified type */
+# define MSG_COPY	040000	/* copy (not remove) all queue messages */
 #endif
 
 /* Types used in the structure definition.  */
diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/gettimeofday.c b/libc/sysdeps/unix/sysv/linux/powerpc/gettimeofday.c
index f60748507..6506d75e6 100644
--- a/libc/sysdeps/unix/sysv/linux/powerpc/gettimeofday.c
+++ b/libc/sysdeps/unix/sysv/linux/powerpc/gettimeofday.c
@@ -15,25 +15,49 @@
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
-#include <sysdep.h>
-#include <stddef.h>
+
 #include <sys/time.h>
-#include <time.h>
-#include <hp-timing.h>
 
-#include <bits/libc-vdso.h>
+#ifdef SHARED
+
+# include <dl-vdso.h>
+# include <bits/libc-vdso.h>
+
+void *gettimeofday_ifunc (void) __asm__ ("__gettimeofday");
+
+static int
+__gettimeofday_syscall (struct timeval *tv, struct timezone *tz)
+{
+  return INLINE_SYSCALL (gettimeofday, 2, tv, tz);
+}
+
+void *
+gettimeofday_ifunc (void)
+{
+  /* If the vDSO is not available we fall back syscall.  */
+  return (__vdso_gettimeofday ? VDSO_IFUNC_RET (__vdso_gettimeofday)
+	  : __gettimeofday_syscall);
+}
+asm (".type __gettimeofday, %gnu_indirect_function");
+
+/* This is doing "libc_hidden_def (__gettimeofday)" but the compiler won't
+   let us do it in C because it doesn't know we're defining __gettimeofday
+   here in this file.  */
+asm (".globl __GI___gettimeofday\n"
+     "__GI___gettimeofday = __gettimeofday");
+
+#else
 
-/* Get the current time of day and timezone information,
-   putting it into *TV and *TZ.  If TZ is NULL, *TZ is not filled.
-   Returns 0 on success, -1 on errors.  */
+# include <sysdep.h>
+# include <errno.h>
 
 int
-__gettimeofday (tv, tz)
-     struct timeval *tv;
-     struct timezone *tz;
+__gettimeofday (struct timeval *tv, struct timezone *tz)
 {
-  return INLINE_VSYSCALL (gettimeofday, 2, tv, tz);
+  return INLINE_SYSCALL (gettimeofday, 2, tv, tz);
 }
 libc_hidden_def (__gettimeofday)
+
+#endif
 weak_alias (__gettimeofday, gettimeofday)
 libc_hidden_weak (gettimeofday)
diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc64/brk.S b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc64/brk.S
index 06596ce58..348aeb5ba 100644
--- a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc64/brk.S
+++ b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc64/brk.S
@@ -19,17 +19,14 @@
 #include <sysdep.h>
 #define _ERRNO_H	1
 #include <bits/errno.h>
-#include <bp-sym.h>
-#include <bp-asm.h>
 
 	.comm	__curbrk,8,8
 	.section	".toc","aw"
 .LC__curbrk:
 	.tc __curbrk[TC],__curbrk
 	.section ".text"
-ENTRY (BP_SYM (__brk))
+ENTRY (__brk)
 	CALL_MCOUNT 1
-	DISCARD_BOUNDS (r3)	/* the bounds are meaningless, so toss 'em.  */
 
 	std	r3,48(r1)
 	DO_CALL(SYS_ify(brk))
@@ -41,6 +38,6 @@ ENTRY (BP_SYM (__brk))
 	blelr+
 	li      r3,ENOMEM
 	TAIL_CALL_SYSCALL_ERROR
-END (BP_SYM (__brk))
+END (__brk)
 
-weak_alias (BP_SYM (__brk), BP_SYM (brk))
+weak_alias (__brk, brk)
diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc64/clone.S b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc64/clone.S
index f74dcae90..cf46856e1 100644
--- a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc64/clone.S
+++ b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc64/clone.S
@@ -19,8 +19,6 @@
 #include <sysdep.h>
 #define _ERRNO_H	1
 #include <bits/errno.h>
-#include <bp-sym.h>
-#include <bp-asm.h>
 
 #define CLONE_VM	0x00000100
 #define CLONE_THREAD	0x00010000
@@ -33,11 +31,8 @@
                   int flags [r5], void *arg [r6], void *parent_tid [r7],
                   void *tls [r8], void *child_tid [r9]); */
 
-ENTRY (BP_SYM (__clone))
+ENTRY (__clone)
 	CALL_MCOUNT 7
-	/* GKM FIXME: add bounds checks, where sensible.  */
-	DISCARD_BOUNDS (r4)
-	DISCARD_BOUNDS (r6)
 
 	/* Check for child_stack == NULL || fn == NULL.  */
 	cmpdi	cr0,r4,0
@@ -144,6 +139,6 @@ L(parent):
 	cfi_restore(r31)
 	PSEUDO_RET
 
-END (BP_SYM (__clone))
+END (__clone)
 
-weak_alias (BP_SYM (__clone), BP_SYM (clone))
+weak_alias (__clone, clone)
diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/sys/ptrace.h b/libc/sysdeps/unix/sysv/linux/powerpc/sys/ptrace.h
index dfda1c889..e6e916b0f 100644
--- a/libc/sysdeps/unix/sysv/linux/powerpc/sys/ptrace.h
+++ b/libc/sysdeps/unix/sysv/linux/powerpc/sys/ptrace.h
@@ -136,7 +136,8 @@ enum __ptrace_setoptions
   PTRACE_O_TRACEVFORKDONE = 0x00000020,
   PTRACE_O_TRACEEXIT	= 0x00000040,
   PTRACE_O_TRACESECCOMP = 0x00000080,
-  PTRACE_O_MASK		= 0x000000ff
+  PTRACE_O_EXITKILL	= 0x00100000,
+  PTRACE_O_MASK		= 0x001000ff
 };
 
 /* Wait extended result codes for the above trace options.  */
diff --git a/libc/sysdeps/unix/sysv/linux/s390/bits/mman.h b/libc/sysdeps/unix/sysv/linux/s390/bits/mman.h
index 3e7bf92f4..b788fa50d 100644
--- a/libc/sysdeps/unix/sysv/linux/s390/bits/mman.h
+++ b/libc/sysdeps/unix/sysv/linux/s390/bits/mman.h
@@ -24,39 +24,9 @@
    But the kernel header is not namespace clean.  */
 
 
-/* Protections are chosen from these bits, OR'd together.  The
-   implementation does not necessarily support PROT_EXEC or PROT_WRITE
-   without PROT_READ.  The only guarantees are that no writing will be
-   allowed without PROT_WRITE and no access will be allowed for PROT_NONE. */
-
-#define PROT_READ	0x1		/* Page can be read.  */
-#define PROT_WRITE	0x2		/* Page can be written.  */
-#define PROT_EXEC	0x4		/* Page can be executed.  */
-#define PROT_NONE	0x0		/* Page can not be accessed.  */
-#define PROT_GROWSDOWN	0x01000000	/* Extend change to start of
-					   growsdown vma (mprotect only).  */
-#define PROT_GROWSUP	0x02000000	/* Extend change to start of
-					   growsup vma (mprotect only).  */
-
-/* Sharing types (must choose one and only one of these).  */
-#define MAP_SHARED	0x01		/* Share changes.  */
-#define MAP_PRIVATE	0x02		/* Changes are private.  */
-#ifdef __USE_MISC
-# define MAP_TYPE	0x0f		/* Mask for type of mapping.  */
-#endif
-
-/* Other flags.  */
-#define MAP_FIXED	0x10		/* Interpret addr exactly.  */
-#ifdef __USE_MISC
-# define MAP_FILE	0
-# define MAP_ANONYMOUS	0x20		/* Don't use a file.  */
-# define MAP_ANON	MAP_ANONYMOUS
-#endif
-
 /* These are Linux-specific.  */
 #ifdef __USE_MISC
 # define MAP_GROWSDOWN	0x00100		/* Stack-like segment.  */
-# define MAP_GROWSUP	0x00200		/* Register stack-like segment */
 # define MAP_DENYWRITE	0x00800		/* ETXTBSY */
 # define MAP_EXECUTABLE	0x01000		/* Mark it as an executable.  */
 # define MAP_LOCKED	0x02000		/* Lock the mapping.  */
@@ -67,47 +37,5 @@
 # define MAP_HUGETLB	0x40000		/* Create huge page mapping.  */
 #endif
 
-/* Flags to `msync'.  */
-#define MS_ASYNC	1		/* Sync memory asynchronously.  */
-#define MS_SYNC		4		/* Synchronous memory sync.  */
-#define MS_INVALIDATE	2		/* Invalidate the caches.  */
-
-/* Flags for `mlockall'.  */
-#define MCL_CURRENT	1		/* Lock all currently mapped pages.  */
-#define MCL_FUTURE	2		/* Lock all additions to address
-					   space.  */
-
-/* Flags for `mremap'.  */
-#ifdef __USE_GNU
-# define MREMAP_MAYMOVE	1
-# define MREMAP_FIXED	2
-#endif
-
-/* Advice to `madvise'.  */
-#ifdef __USE_BSD
-# define MADV_NORMAL	  0	/* No further special treatment.  */
-# define MADV_RANDOM	  1	/* Expect random page references.  */
-# define MADV_SEQUENTIAL  2	/* Expect sequential page references.  */
-# define MADV_WILLNEED	  3	/* Will need these pages.  */
-# define MADV_DONTNEED	  4	/* Don't need these pages.  */
-# define MADV_REMOVE	  9	/* Remove these pages and resources.  */
-# define MADV_DONTFORK	  10	/* Do not inherit across fork.  */
-# define MADV_DOFORK	  11	/* Do inherit across fork.  */
-# define MADV_MERGEABLE	  12	/* KSM may merge identical pages.  */
-# define MADV_UNMERGEABLE 13	/* KSM may not merge identical pages.  */
-# define MADV_HUGEPAGE	  14	/* Worth backing with hugepages.  */
-# define MADV_NOHUGEPAGE  15	/* Not worth backing with hugepages.  */
-# define MADV_DONTDUMP	  16    /* Explicity exclude from the core dump,
-                                   overrides the coredump filter bits.  */
-# define MADV_DODUMP	  17	/* Clear the MADV_DONTDUMP flag.  */
-# define MADV_HWPOISON	  100	/* Poison a page for testing.  */
-#endif
-
-/* The POSIX people had to invent similar names for the same things.  */
-#ifdef __USE_XOPEN2K
-# define POSIX_MADV_NORMAL	0 /* No further special treatment.  */
-# define POSIX_MADV_RANDOM	1 /* Expect random page references.  */
-# define POSIX_MADV_SEQUENTIAL	2 /* Expect sequential page references.  */
-# define POSIX_MADV_WILLNEED	3 /* Will need these pages.  */
-# define POSIX_MADV_DONTNEED	4 /* Don't need these pages.  */
-#endif
+/* Include generic Linux declarations.  */
+#include <bits/mman-linux.h>
diff --git a/libc/sysdeps/unix/sysv/linux/s390/bits/msq.h b/libc/sysdeps/unix/sysv/linux/s390/bits/msq.h
index 5a1f6b29e..a5eaf89dd 100644
--- a/libc/sysdeps/unix/sysv/linux/s390/bits/msq.h
+++ b/libc/sysdeps/unix/sysv/linux/s390/bits/msq.h
@@ -26,6 +26,7 @@
 #define MSG_NOERROR	010000	/* no error if message is too big */
 #ifdef __USE_GNU
 # define MSG_EXCEPT	020000	/* recv any msg except of specified type */
+# define MSG_COPY	040000	/* copy (not remove) all queue messages */
 #endif
 
 /* Types used in the structure definition.  */
diff --git a/libc/sysdeps/unix/sysv/linux/s390/s390-32/getcontext.S b/libc/sysdeps/unix/sysv/linux/s390/s390-32/getcontext.S
index 1a3712d8c..0a2e63e78 100644
--- a/libc/sysdeps/unix/sysv/linux/s390/s390-32/getcontext.S
+++ b/libc/sysdeps/unix/sysv/linux/s390/s390-32/getcontext.S
@@ -31,41 +31,42 @@
   other than the PRESERVED state.  */
 
 ENTRY(__getcontext)
-	lr      %r5,%r2
+	lr      %r1,%r2
 
 	/* sigprocmask (SIG_BLOCK, NULL, &sc->sc_mask).  */
 	la      %r2,SIG_BLOCK
 	slr	%r3,%r3
-	la	%r4,SC_MASK(%r5)
+	la	%r4,SC_MASK(%r1)
+	lhi	%r5,_NSIG8
 	svc	SYS_ify(rt_sigprocmask)
 
 	/* Store fpu context.  */
-	stfpc   SC_FPC(%r5)
-	std     %f0,SC_FPRS(%r5)
-	std     %f1,SC_FPRS+8(%r5)
-	std     %f2,SC_FPRS+16(%r5)
-	std     %f3,SC_FPRS+24(%r5)
-	std     %f4,SC_FPRS+32(%r5)
-	std     %f5,SC_FPRS+40(%r5)
-	std     %f6,SC_FPRS+48(%r5)
-	std     %f7,SC_FPRS+56(%r5)
-	std     %f8,SC_FPRS+64(%r5)
-	std     %f9,SC_FPRS+72(%r5)
-	std     %f10,SC_FPRS+80(%r5)
-	std     %f11,SC_FPRS+88(%r5)
-	std     %f12,SC_FPRS+96(%r5)
-	std     %f13,SC_FPRS+104(%r5)
-	std     %f14,SC_FPRS+112(%r5)
-	std     %f15,SC_FPRS+120(%r5)
+	stfpc   SC_FPC(%r1)
+	std     %f0,SC_FPRS(%r1)
+	std     %f1,SC_FPRS+8(%r1)
+	std     %f2,SC_FPRS+16(%r1)
+	std     %f3,SC_FPRS+24(%r1)
+	std     %f4,SC_FPRS+32(%r1)
+	std     %f5,SC_FPRS+40(%r1)
+	std     %f6,SC_FPRS+48(%r1)
+	std     %f7,SC_FPRS+56(%r1)
+	std     %f8,SC_FPRS+64(%r1)
+	std     %f9,SC_FPRS+72(%r1)
+	std     %f10,SC_FPRS+80(%r1)
+	std     %f11,SC_FPRS+88(%r1)
+	std     %f12,SC_FPRS+96(%r1)
+	std     %f13,SC_FPRS+104(%r1)
+	std     %f14,SC_FPRS+112(%r1)
+	std     %f15,SC_FPRS+120(%r1)
 
 	/* Set __getcontext return value to 0.  */
 	slr     %r2,%r2
 
 	/* Store access registers.  */
-	stam    %a0,%a15,SC_ACRS(%r5)
+	stam    %a0,%a15,SC_ACRS(%r1)
 
 	/* Store general purpose registers.  */
-	stm     %r0,%r15,SC_GPRS(%r5)
+	stm     %r0,%r15,SC_GPRS(%r1)
 
 	/* Return.  */
 	br	%r14
diff --git a/libc/sysdeps/unix/sysv/linux/s390/s390-32/setcontext.S b/libc/sysdeps/unix/sysv/linux/s390/s390-32/setcontext.S
index fe56c24aa..ac25bea50 100644
--- a/libc/sysdeps/unix/sysv/linux/s390/s390-32/setcontext.S
+++ b/libc/sysdeps/unix/sysv/linux/s390/s390-32/setcontext.S
@@ -31,38 +31,39 @@
   other than the PRESERVED state.  */
 
 ENTRY(__setcontext)
-	lr	%r5,%r2
+	lr	%r1,%r2
 
 	/* sigprocmask (SIG_SETMASK, &sc->sc_mask, NULL).  */
 	la      %r2,SIG_BLOCK
-	la	%r3,SC_MASK(%r5)
+	la	%r3,SC_MASK(%r1)
 	slr	%r4,%r4
+	lhi	%r5,_NSIG8
 	svc	SYS_ify(rt_sigprocmask)
 
 	/* Load fpu context.  */
-	lfpc	SC_FPC(%r5)
-	ld	%f0,SC_FPRS(%r5)
-	ld      %f1,SC_FPRS+8(%r5)
-	ld      %f2,SC_FPRS+16(%r5)
-	ld      %f3,SC_FPRS+24(%r5)
-	ld      %f4,SC_FPRS+32(%r5)
-	ld      %f5,SC_FPRS+40(%r5)
-	ld      %f6,SC_FPRS+48(%r5)
-	ld      %f7,SC_FPRS+56(%r5)
-	ld      %f8,SC_FPRS+64(%r5)
-	ld      %f9,SC_FPRS+72(%r5)
-	ld      %f10,SC_FPRS+80(%r5)
-	ld      %f11,SC_FPRS+88(%r5)
-	ld      %f12,SC_FPRS+96(%r5)
-	ld      %f13,SC_FPRS+104(%r5)
-	ld      %f14,SC_FPRS+112(%r5)
-	ld      %f15,SC_FPRS+120(%r5)
+	lfpc	SC_FPC(%r1)
+	ld	%f0,SC_FPRS(%r1)
+	ld      %f1,SC_FPRS+8(%r1)
+	ld      %f2,SC_FPRS+16(%r1)
+	ld      %f3,SC_FPRS+24(%r1)
+	ld      %f4,SC_FPRS+32(%r1)
+	ld      %f5,SC_FPRS+40(%r1)
+	ld      %f6,SC_FPRS+48(%r1)
+	ld      %f7,SC_FPRS+56(%r1)
+	ld      %f8,SC_FPRS+64(%r1)
+	ld      %f9,SC_FPRS+72(%r1)
+	ld      %f10,SC_FPRS+80(%r1)
+	ld      %f11,SC_FPRS+88(%r1)
+	ld      %f12,SC_FPRS+96(%r1)
+	ld      %f13,SC_FPRS+104(%r1)
+	ld      %f14,SC_FPRS+112(%r1)
+	ld      %f15,SC_FPRS+120(%r1)
  
 	/* Don't touch %a0, used for thread purposes.  */
-	lam	%a1,%a15,SC_ACRS+4(%r5)
+	lam	%a1,%a15,SC_ACRS+4(%r1)
 
 	/* Load general purpose registers.  */
-	lm	%r0,%r15,SC_GPRS(%r5)
+	lm	%r0,%r15,SC_GPRS(%r1)
 
 	/* Return.  */
 	br	%r14
diff --git a/libc/sysdeps/unix/sysv/linux/s390/s390-32/swapcontext.S b/libc/sysdeps/unix/sysv/linux/s390/s390-32/swapcontext.S
index 9a4b2b987..ecb0b3f80 100644
--- a/libc/sysdeps/unix/sysv/linux/s390/s390-32/swapcontext.S
+++ b/libc/sysdeps/unix/sysv/linux/s390/s390-32/swapcontext.S
@@ -34,12 +34,13 @@
 
 ENTRY(__swapcontext)
 	lr	%r1,%r2
-	lr      %r5,%r3
+	lr      %r0,%r3
 
 	/* sigprocmask (SIG_BLOCK, NULL, &sc->sc_mask).  */
 	la      %r2,SIG_BLOCK
 	slr	%r3,%r3
 	la	%r4,SC_MASK(%r1)
+	lhi	%r5,_NSIG8
 	svc	SYS_ify(rt_sigprocmask)
 
 	/* Store fpu context.  */
@@ -72,11 +73,14 @@ ENTRY(__swapcontext)
 	
 	/* sigprocmask (SIG_SETMASK, &sc->sc_mask, NULL).  */
 	la      %r2,SIG_BLOCK
+	lr	%r5,%r0
 	la	%r3,SC_MASK(%r5)
 	slr	%r4,%r4
+	lhi	%r5,_NSIG8
 	svc	SYS_ify(rt_sigprocmask)
 
 	/* Load fpu context.  */
+	lr	%r5,%r0
 	lfpc	SC_FPC(%r5)
 	ld	%f0,SC_FPRS(%r5)
 	ld      %f1,SC_FPRS+8(%r5)
diff --git a/libc/sysdeps/unix/sysv/linux/s390/s390-64/getcontext.S b/libc/sysdeps/unix/sysv/linux/s390/s390-64/getcontext.S
index 68e89102a..7c406cb23 100644
--- a/libc/sysdeps/unix/sysv/linux/s390/s390-64/getcontext.S
+++ b/libc/sysdeps/unix/sysv/linux/s390/s390-64/getcontext.S
@@ -31,41 +31,42 @@
   other than the PRESERVED state.  */
 
 ENTRY(__getcontext)
-	lgr     %r5,%r2
+	lgr     %r1,%r2
 
 	/* sigprocmask (SIG_BLOCK, NULL, &sc->sc_mask).  */
 	la      %r2,SIG_BLOCK
 	slgr	%r3,%r3
-	la	%r4,SC_MASK(%r5)
+	la	%r4,SC_MASK(%r1)
+	lghi	%r5,_NSIG8
 	svc	SYS_ify(rt_sigprocmask)
 
 	/* Store fpu context.  */
-	stfpc   SC_FPC(%r5)
-	std     %f0,SC_FPRS(%r5)
-	std     %f1,SC_FPRS+8(%r5)
-	std     %f2,SC_FPRS+16(%r5)
-	std     %f3,SC_FPRS+24(%r5)
-	std     %f4,SC_FPRS+32(%r5)
-	std     %f5,SC_FPRS+40(%r5)
-	std     %f6,SC_FPRS+48(%r5)
-	std     %f7,SC_FPRS+56(%r5)
-	std     %f8,SC_FPRS+64(%r5)
-	std     %f9,SC_FPRS+72(%r5)
-	std     %f10,SC_FPRS+80(%r5)
-	std     %f11,SC_FPRS+88(%r5)
-	std     %f12,SC_FPRS+96(%r5)
-	std     %f13,SC_FPRS+104(%r5)
-	std     %f14,SC_FPRS+112(%r5)
-	std     %f15,SC_FPRS+120(%r5)
+	stfpc   SC_FPC(%r1)
+	std     %f0,SC_FPRS(%r1)
+	std     %f1,SC_FPRS+8(%r1)
+	std     %f2,SC_FPRS+16(%r1)
+	std     %f3,SC_FPRS+24(%r1)
+	std     %f4,SC_FPRS+32(%r1)
+	std     %f5,SC_FPRS+40(%r1)
+	std     %f6,SC_FPRS+48(%r1)
+	std     %f7,SC_FPRS+56(%r1)
+	std     %f8,SC_FPRS+64(%r1)
+	std     %f9,SC_FPRS+72(%r1)
+	std     %f10,SC_FPRS+80(%r1)
+	std     %f11,SC_FPRS+88(%r1)
+	std     %f12,SC_FPRS+96(%r1)
+	std     %f13,SC_FPRS+104(%r1)
+	std     %f14,SC_FPRS+112(%r1)
+	std     %f15,SC_FPRS+120(%r1)
 
 	/* Set __getcontext return value to 0.  */
 	slgr    %r2,%r2
 
 	/* Store access registers.  */
-	stam    %a0,%a15,SC_ACRS(%r5)
+	stam    %a0,%a15,SC_ACRS(%r1)
 
 	/* Store general purpose registers.  */
-	stmg    %r0,%r15,SC_GPRS(%r5)
+	stmg    %r0,%r15,SC_GPRS(%r1)
 
 	/* Return.  */
 	br	%r14
diff --git a/libc/sysdeps/unix/sysv/linux/s390/s390-64/setcontext.S b/libc/sysdeps/unix/sysv/linux/s390/s390-64/setcontext.S
index 7415bd938..8157327bf 100644
--- a/libc/sysdeps/unix/sysv/linux/s390/s390-64/setcontext.S
+++ b/libc/sysdeps/unix/sysv/linux/s390/s390-64/setcontext.S
@@ -31,38 +31,39 @@
   other than the PRESERVED state.  */
 
 ENTRY(__setcontext)
-	lgr	%r5,%r2
+	lgr	%r1,%r2
 
 	/* sigprocmask (SIG_SETMASK, &sc->sc_mask, NULL).  */
 	la      %r2,SIG_BLOCK
-	la	%r3,SC_MASK(%r5)
+	la	%r3,SC_MASK(%r1)
 	slgr	%r4,%r4
+	lghi	%r5,_NSIG8
 	svc	SYS_ify(rt_sigprocmask)
 
 	/* Load fpu context.  */
-	lfpc	SC_FPC(%r5)
-	ld	%f0,SC_FPRS(%r5)
-	ld      %f1,SC_FPRS+8(%r5)
-	ld      %f2,SC_FPRS+16(%r5)
-	ld      %f3,SC_FPRS+24(%r5)
-	ld      %f4,SC_FPRS+32(%r5)
-	ld      %f5,SC_FPRS+40(%r5)
-	ld      %f6,SC_FPRS+48(%r5)
-	ld      %f7,SC_FPRS+56(%r5)
-	ld      %f8,SC_FPRS+64(%r5)
-	ld      %f9,SC_FPRS+72(%r5)
-	ld      %f10,SC_FPRS+80(%r5)
-	ld      %f11,SC_FPRS+88(%r5)
-	ld      %f12,SC_FPRS+96(%r5)
-	ld      %f13,SC_FPRS+104(%r5)
-	ld      %f14,SC_FPRS+112(%r5)
-	ld      %f15,SC_FPRS+120(%r5)
+	lfpc	SC_FPC(%r1)
+	ld	%f0,SC_FPRS(%r1)
+	ld      %f1,SC_FPRS+8(%r1)
+	ld      %f2,SC_FPRS+16(%r1)
+	ld      %f3,SC_FPRS+24(%r1)
+	ld      %f4,SC_FPRS+32(%r1)
+	ld      %f5,SC_FPRS+40(%r1)
+	ld      %f6,SC_FPRS+48(%r1)
+	ld      %f7,SC_FPRS+56(%r1)
+	ld      %f8,SC_FPRS+64(%r1)
+	ld      %f9,SC_FPRS+72(%r1)
+	ld      %f10,SC_FPRS+80(%r1)
+	ld      %f11,SC_FPRS+88(%r1)
+	ld      %f12,SC_FPRS+96(%r1)
+	ld      %f13,SC_FPRS+104(%r1)
+	ld      %f14,SC_FPRS+112(%r1)
+	ld      %f15,SC_FPRS+120(%r1)
 
 	/* Don't touch %a0 and %a1, used for thread purposes.  */
-	lam     %a2,%a15,SC_ACRS+8(%r5)
+	lam     %a2,%a15,SC_ACRS+8(%r1)
  
 	/* Load general purpose registers.  */
-	lmg	%r0,%r15,SC_GPRS(%r5)
+	lmg	%r0,%r15,SC_GPRS(%r1)
 
 	/* Return.  */
 	br	%r14
diff --git a/libc/sysdeps/unix/sysv/linux/s390/s390-64/swapcontext.S b/libc/sysdeps/unix/sysv/linux/s390/s390-64/swapcontext.S
index 2d8f0d50e..a08e68cdd 100644
--- a/libc/sysdeps/unix/sysv/linux/s390/s390-64/swapcontext.S
+++ b/libc/sysdeps/unix/sysv/linux/s390/s390-64/swapcontext.S
@@ -34,12 +34,13 @@
 
 ENTRY(__swapcontext)
 	lgr	%r1,%r2
-	lgr      %r5,%r3
+	lgr     %r0,%r3
 
 	/* sigprocmask (SIG_BLOCK, NULL, &sc->sc_mask).  */
 	la      %r2,SIG_BLOCK
 	slgr	%r3,%r3
 	la	%r4,SC_MASK(%r1)
+	lghi	%r5,_NSIG8
 	svc	SYS_ify(rt_sigprocmask)
 
 	/* Store fpu context.  */
@@ -72,11 +73,14 @@ ENTRY(__swapcontext)
 
 	/* sigprocmask (SIG_SETMASK, &sc->sc_mask, NULL).  */
 	la      %r2,SIG_BLOCK
+	lgr	%r5,%r0
 	la	%r3,SC_MASK(%r5)
+	lghi	%r5,_NSIG8
 	slgr	%r4,%r4
 	svc	SYS_ify(rt_sigprocmask)
 
 	/* Load fpu context.  */
+	lgr	%r5,%r0
 	lfpc	SC_FPC(%r5)
 	ld	%f0,SC_FPRS(%r5)
 	ld      %f1,SC_FPRS+8(%r5)
diff --git a/libc/sysdeps/unix/sysv/linux/s390/sys/ptrace.h b/libc/sysdeps/unix/sysv/linux/s390/sys/ptrace.h
index b9062dc1a..ca2ebb959 100644
--- a/libc/sysdeps/unix/sysv/linux/s390/sys/ptrace.h
+++ b/libc/sysdeps/unix/sysv/linux/s390/sys/ptrace.h
@@ -175,7 +175,8 @@ enum __ptrace_setoptions
   PTRACE_O_TRACEVFORKDONE = 0x00000020,
   PTRACE_O_TRACEEXIT	= 0x00000040,
   PTRACE_O_TRACESECCOMP = 0x00000080,
-  PTRACE_O_MASK		= 0x000000ff
+  PTRACE_O_EXITKILL	= 0x00100000,
+  PTRACE_O_MASK		= 0x001000ff
 };
 
 /* Wait extended result codes for the above trace options.  */
diff --git a/libc/sysdeps/unix/sysv/linux/s390/ucontext_i.sym b/libc/sysdeps/unix/sysv/linux/s390/ucontext_i.sym
index 525b54300..6cc9f1962 100644
--- a/libc/sysdeps/unix/sysv/linux/s390/ucontext_i.sym
+++ b/libc/sysdeps/unix/sysv/linux/s390/ucontext_i.sym
@@ -8,6 +8,8 @@ SIG_BLOCK
 SIG_UNBLOCK
 SIG_SETMASK
 
+_NSIG8          (_NSIG / 8)
+
 #define ucontext(member)	offsetof (ucontext_t, member)
 #define mcontext(member)	ucontext (uc_mcontext.member)
 
diff --git a/libc/sysdeps/unix/sysv/linux/sh/bits/mman.h b/libc/sysdeps/unix/sysv/linux/sh/bits/mman.h
index 40da97e2f..396a9b918 100644
--- a/libc/sysdeps/unix/sysv/linux/sh/bits/mman.h
+++ b/libc/sysdeps/unix/sysv/linux/sh/bits/mman.h
@@ -23,36 +23,6 @@
 /* The following definitions basically come from the kernel headers.
    But the kernel header is not namespace clean.  */
 
-
-/* Protections are chosen from these bits, OR'd together.  The
-   implementation does not necessarily support PROT_EXEC or PROT_WRITE
-   without PROT_READ.  The only guarantees are that no writing will be
-   allowed without PROT_WRITE and no access will be allowed for PROT_NONE. */
-
-#define PROT_READ	0x1		/* Page can be read.  */
-#define PROT_WRITE	0x2		/* Page can be written.  */
-#define PROT_EXEC	0x4		/* Page can be executed.  */
-#define PROT_NONE	0x0		/* Page can not be accessed.  */
-#define PROT_GROWSDOWN	0x01000000	/* Extend change to start of
-					   growsdown vma (mprotect only).  */
-#define PROT_GROWSUP	0x02000000	/* Extend change to start of
-					   growsup vma (mprotect only).  */
-
-/* Sharing types (must choose one and only one of these).  */
-#define MAP_SHARED	0x01		/* Share changes.  */
-#define MAP_PRIVATE	0x02		/* Changes are private.  */
-#ifdef __USE_MISC
-# define MAP_TYPE	0x0f		/* Mask for type of mapping.  */
-#endif
-
-/* Other flags.  */
-#define MAP_FIXED	0x10		/* Interpret addr exactly.  */
-#ifdef __USE_MISC
-# define MAP_FILE	0
-# define MAP_ANONYMOUS	0x20		/* Don't use a file.  */
-# define MAP_ANON	MAP_ANONYMOUS
-#endif
-
 /* These are Linux-specific.  */
 #ifdef __USE_MISC
 # define MAP_GROWSDOWN	0x0100		/* Stack-like segment.  */
@@ -66,47 +36,5 @@
 # define MAP_HUGETLB	0x40000		/* Create huge page mapping.  */
 #endif
 
-/* Flags to `msync'.  */
-#define MS_ASYNC	1		/* Sync memory asynchronously.  */
-#define MS_SYNC		4		/* Synchronous memory sync.  */
-#define MS_INVALIDATE	2		/* Invalidate the caches.  */
-
-/* Flags for `mlockall'.  */
-#define MCL_CURRENT	1		/* Lock all currently mapped pages.  */
-#define MCL_FUTURE	2		/* Lock all additions to address
-					   space.  */
-
-/* Flags for `mremap'.  */
-#ifdef __USE_GNU
-# define MREMAP_MAYMOVE	1
-# define MREMAP_FIXED	2
-#endif
-
-/* Advice to `madvise'.  */
-#ifdef __USE_BSD
-# define MADV_NORMAL	  0	/* No further special treatment.  */
-# define MADV_RANDOM	  1	/* Expect random page references.  */
-# define MADV_SEQUENTIAL  2	/* Expect sequential page references.  */
-# define MADV_WILLNEED	  3	/* Will need these pages.  */
-# define MADV_DONTNEED	  4	/* Don't need these pages.  */
-# define MADV_REMOVE	  9	/* Remove these pages and resources.  */
-# define MADV_DONTFORK	  10	/* Do not inherit across fork.  */
-# define MADV_DOFORK	  11	/* Do inherit across fork.  */
-# define MADV_MERGEABLE	  12	/* KSM may merge identical pages.  */
-# define MADV_UNMERGEABLE 13	/* KSM may not merge identical pages.  */
-# define MADV_HUGEPAGE	  14	/* Worth backing with hugepages.  */
-# define MADV_NOHUGEPAGE  15	/* Not worth backing with hugepages.  */
-# define MADV_DONTDUMP	  16    /* Explicity exclude from the core dump,
-                                   overrides the coredump filter bits.  */
-# define MADV_DODUMP	  17	/* Clear the MADV_DONTDUMP flag.  */
-# define MADV_HWPOISON	  100	/* Poison a page for testing.  */
-#endif
-
-/* The POSIX people had to invent similar names for the same things.  */
-#ifdef __USE_XOPEN2K
-# define POSIX_MADV_NORMAL	0 /* No further special treatment.  */
-# define POSIX_MADV_RANDOM	1 /* Expect random page references.  */
-# define POSIX_MADV_SEQUENTIAL	2 /* Expect sequential page references.  */
-# define POSIX_MADV_WILLNEED	3 /* Will need these pages.  */
-# define POSIX_MADV_DONTNEED	4 /* Don't need these pages.  */
-#endif
+/* Include generic Linux declarations.  */
+#include <bits/mman-linux.h>
diff --git a/libc/sysdeps/unix/sysv/linux/sparc/bits/mman.h b/libc/sysdeps/unix/sysv/linux/sparc/bits/mman.h
index 616e24333..ad0389ca3 100644
--- a/libc/sysdeps/unix/sysv/linux/sparc/bits/mman.h
+++ b/libc/sysdeps/unix/sysv/linux/sparc/bits/mman.h
@@ -24,36 +24,6 @@
    But the kernel header is not namespace clean.  */
 
 
-/* Protections are chosen from these bits, OR'd together.  The
-   implementation does not necessarily support PROT_EXEC or PROT_WRITE
-   without PROT_READ.  The only guarantees are that no writing will be
-   allowed without PROT_WRITE and no access will be allowed for PROT_NONE. */
-
-#define PROT_READ	0x1		/* Page can be read.  */
-#define PROT_WRITE	0x2		/* Page can be written.  */
-#define PROT_EXEC	0x4		/* Page can be executed.  */
-#define PROT_NONE	0x0		/* Page can not be accessed.  */
-#define PROT_GROWSDOWN	0x01000000	/* Extend change to start of
-					   growsdown vma (mprotect only).  */
-#define PROT_GROWSUP	0x02000000	/* Extend change to start of
-					   growsup vma (mprotect only).  */
-
-/* Sharing types (must choose one and only one of these).  */
-#define MAP_SHARED	0x01		/* Share changes.  */
-#define MAP_PRIVATE	0x02		/* Changes are private.  */
-#ifdef __USE_MISC
-# define MAP_TYPE	0x0f		/* Mask for type of mapping.  */
-#endif
-
-/* Other flags.  */
-#define MAP_FIXED	0x10		/* Interpret addr exactly.  */
-#ifdef __USE_MISC
-# define MAP_FILE	0x00
-# define MAP_ANONYMOUS	0x20		/* Don't use a file.  */
-# define MAP_ANON	MAP_ANONYMOUS
-# define MAP_RENAME	MAP_ANONYMOUS
-#endif
-
 /* These are Linux-specific.  */
 #ifdef __USE_MISC
 # define MAP_GROWSDOWN	0x0200		/* Stack-like segment.  */
@@ -68,48 +38,14 @@
 # define MAP_HUGETLB	0x40000		/* Create huge page mapping.  */
 #endif
 
-/* Flags to `msync'.  */
-#define MS_ASYNC	1		/* Sync memory asynchronously.  */
-#define MS_SYNC		4		/* Synchronous memory sync.  */
-#define MS_INVALIDATE	2		/* Invalidate the caches.  */
-
 /* Flags for `mlockall'.  */
 #define MCL_CURRENT	0x2000		/* Lock all currently mapped pages.  */
 #define MCL_FUTURE	0x4000		/* Lock all additions to address
 					   space.  */
+/* Include generic Linux declarations.  */
+#include <bits/mman-linux.h>
 
-/* Flags for `mremap'.  */
-#ifdef __USE_GNU
-# define MREMAP_MAYMOVE	1
-# define MREMAP_FIXED	2
-#endif
-
-/* Advice to `madvise'.  */
-#ifdef __USE_BSD
-# define MADV_NORMAL	  0	/* No further special treatment.  */
-# define MADV_RANDOM	  1	/* Expect random page references.  */
-# define MADV_SEQUENTIAL  2	/* Expect sequential page references.  */
-# define MADV_WILLNEED	  3	/* Will need these pages.  */
-# define MADV_DONTNEED	  4	/* Don't need these pages.  */
-# define MADV_FREE	  5	/* Content can be freed (Solaris).  */
-# define MADV_REMOVE	  9	/* Remove these pages and resources.  */
-# define MADV_DONTFORK	  10	/* Do not inherit across fork.  */
-# define MADV_DOFORK	  11	/* Do inherit across fork.  */
-# define MADV_MERGEABLE	  12	/* KSM may merge identical pages.  */
-# define MADV_UNMERGEABLE 13	/* KSM may not merge identical pages.  */
-# define MADV_HUGEPAGE	  14	/* Worth backing with hugepages.  */
-# define MADV_NOHUGEPAGE  15	/* Not worth backing with hugepages.  */
-# define MADV_DONTDUMP	  16    /* Explicity exclude from the core dump,
-                                   overrides the coredump filter bits.  */
-# define MADV_DODUMP	  17	/* Clear the MADV_DONTDUMP flag.  */
-# define MADV_HWPOISON	  100	/* Poison a page for testing.  */
-#endif
-
-/* The POSIX people had to invent similar names for the same things.  */
-#ifdef __USE_XOPEN2K
-# define POSIX_MADV_NORMAL	0 /* No further special treatment.  */
-# define POSIX_MADV_RANDOM	1 /* Expect random page references.  */
-# define POSIX_MADV_SEQUENTIAL	2 /* Expect sequential page references.  */
-# define POSIX_MADV_WILLNEED	3 /* Will need these pages.  */
-# define POSIX_MADV_DONTNEED	4 /* Don't need these pages.  */
+/* Other flags.  */
+#ifdef __USE_MISC
+# define MAP_RENAME	MAP_ANONYMOUS
 #endif
diff --git a/libc/sysdeps/unix/sysv/linux/sparc/bits/msq.h b/libc/sysdeps/unix/sysv/linux/sparc/bits/msq.h
index 84c4b858b..0a0192732 100644
--- a/libc/sysdeps/unix/sysv/linux/sparc/bits/msq.h
+++ b/libc/sysdeps/unix/sysv/linux/sparc/bits/msq.h
@@ -26,6 +26,7 @@
 #define MSG_NOERROR	010000	/* no error if message is too big */
 #ifdef __USE_GNU
 # define MSG_EXCEPT	020000	/* recv any msg except of specified type */
+# define MSG_COPY	040000	/* copy (not remove) all queue messages */
 #endif
 
 /* Types used in the structure definition.  */
diff --git a/libc/sysdeps/unix/sysv/linux/sparc/sys/ptrace.h b/libc/sysdeps/unix/sysv/linux/sparc/sys/ptrace.h
index bd6fd536d..7ba8f5f25 100644
--- a/libc/sysdeps/unix/sysv/linux/sparc/sys/ptrace.h
+++ b/libc/sysdeps/unix/sysv/linux/sparc/sys/ptrace.h
@@ -219,7 +219,8 @@ enum __ptrace_setoptions
   PTRACE_O_TRACEVFORKDONE = 0x00000020,
   PTRACE_O_TRACEEXIT	= 0x00000040,
   PTRACE_O_TRACESECCOMP = 0x00000080,
-  PTRACE_O_MASK		= 0x000000ff
+  PTRACE_O_EXITKILL	= 0x00100000,
+  PTRACE_O_MASK		= 0x001000ff
 };
 
 /* Wait extended result codes for the above trace options.  */
diff --git a/libc/sysdeps/unix/sysv/linux/sys/ptrace.h b/libc/sysdeps/unix/sysv/linux/sys/ptrace.h
index d04fab599..08709bf64 100644
--- a/libc/sysdeps/unix/sysv/linux/sys/ptrace.h
+++ b/libc/sysdeps/unix/sysv/linux/sys/ptrace.h
@@ -166,7 +166,8 @@ enum __ptrace_setoptions
   PTRACE_O_TRACEVFORKDONE = 0x00000020,
   PTRACE_O_TRACEEXIT	= 0x00000040,
   PTRACE_O_TRACESECCOMP = 0x00000080,
-  PTRACE_O_MASK		= 0x000000ff
+  PTRACE_O_EXITKILL	= 0x00100000,
+  PTRACE_O_MASK		= 0x001000ff
 };
 
 /* Wait extended result codes for the above trace options.  */
diff --git a/libc/sysdeps/unix/sysv/linux/times.c b/libc/sysdeps/unix/sysv/linux/times.c
index f3b5f014e..2a5caf2cd 100644
--- a/libc/sysdeps/unix/sysv/linux/times.c
+++ b/libc/sysdeps/unix/sysv/linux/times.c
@@ -26,13 +26,14 @@ __times (struct tms *buf)
   INTERNAL_SYSCALL_DECL (err);
   clock_t ret = INTERNAL_SYSCALL (times, err, 1, buf);
   if (INTERNAL_SYSCALL_ERROR_P (ret, err)
-      && __builtin_expect (INTERNAL_SYSCALL_ERRNO (ret, err) == EFAULT, 0))
+      && __builtin_expect (INTERNAL_SYSCALL_ERRNO (ret, err) == EFAULT, 0)
+      && buf)
     {
       /* This might be an error or not.  For architectures which have
 	 no separate return value and error indicators we cannot
 	 distinguish a return value of -1 from an error.  Do it the
-	 hard way.  We crash applications which pass in an invalid BUF
-	 pointer.  */
+	 hard way.  We crash applications which pass in an invalid
+	 non-NULL BUF pointer.  Linux allows BUF to be NULL. */
 #define touch(v) \
       do {								      \
 	clock_t temp = v;						      \
@@ -44,7 +45,8 @@ __times (struct tms *buf)
       touch (buf->tms_cutime);
       touch (buf->tms_cstime);
 
-      /* If we come here the memory is valid and the kernel did not
+      /* If we come here the memory is valid (or BUF is NULL, which is
+         a valid condition for the kernel syscall) and the kernel did not
 	 return an EFAULT error.  Return the value given by the kernel.  */
     }
 
diff --git a/libc/sysdeps/unix/sysv/linux/x86/bits/mman.h b/libc/sysdeps/unix/sysv/linux/x86/bits/mman.h
index 591df139d..a2fa80879 100644
--- a/libc/sysdeps/unix/sysv/linux/x86/bits/mman.h
+++ b/libc/sysdeps/unix/sysv/linux/x86/bits/mman.h
@@ -23,34 +23,8 @@
 /* The following definitions basically come from the kernel headers.
    But the kernel header is not namespace clean.  */
 
-
-/* Protections are chosen from these bits, OR'd together.  The
-   implementation does not necessarily support PROT_EXEC or PROT_WRITE
-   without PROT_READ.  The only guarantees are that no writing will be
-   allowed without PROT_WRITE and no access will be allowed for PROT_NONE. */
-
-#define PROT_READ	0x1		/* Page can be read.  */
-#define PROT_WRITE	0x2		/* Page can be written.  */
-#define PROT_EXEC	0x4		/* Page can be executed.  */
-#define PROT_NONE	0x0		/* Page can not be accessed.  */
-#define PROT_GROWSDOWN	0x01000000	/* Extend change to start of
-					   growsdown vma (mprotect only).  */
-#define PROT_GROWSUP	0x02000000	/* Extend change to start of
-					   growsup vma (mprotect only).  */
-
-/* Sharing types (must choose one and only one of these).  */
-#define MAP_SHARED	0x01		/* Share changes.  */
-#define MAP_PRIVATE	0x02		/* Changes are private.  */
-#ifdef __USE_MISC
-# define MAP_TYPE	0x0f		/* Mask for type of mapping.  */
-#endif
-
 /* Other flags.  */
-#define MAP_FIXED	0x10		/* Interpret addr exactly.  */
 #ifdef __USE_MISC
-# define MAP_FILE	0
-# define MAP_ANONYMOUS	0x20		/* Don't use a file.  */
-# define MAP_ANON	MAP_ANONYMOUS
 # define MAP_32BIT	0x40		/* Only give out 32-bit addresses.  */
 #endif
 
@@ -67,47 +41,5 @@
 # define MAP_HUGETLB	0x40000		/* Create huge page mapping.  */
 #endif
 
-/* Flags to `msync'.  */
-#define MS_ASYNC	1		/* Sync memory asynchronously.  */
-#define MS_SYNC		4		/* Synchronous memory sync.  */
-#define MS_INVALIDATE	2		/* Invalidate the caches.  */
-
-/* Flags for `mlockall'.  */
-#define MCL_CURRENT	1		/* Lock all currently mapped pages.  */
-#define MCL_FUTURE	2		/* Lock all additions to address
-					   space.  */
-
-/* Flags for `mremap'.  */
-#ifdef __USE_GNU
-# define MREMAP_MAYMOVE	1
-# define MREMAP_FIXED	2
-#endif
-
-/* Advice to `madvise'.  */
-#ifdef __USE_BSD
-# define MADV_NORMAL	  0	/* No further special treatment.  */
-# define MADV_RANDOM	  1	/* Expect random page references.  */
-# define MADV_SEQUENTIAL  2	/* Expect sequential page references.  */
-# define MADV_WILLNEED	  3	/* Will need these pages.  */
-# define MADV_DONTNEED	  4	/* Don't need these pages.  */
-# define MADV_REMOVE	  9	/* Remove these pages and resources.  */
-# define MADV_DONTFORK	  10	/* Do not inherit across fork.  */
-# define MADV_DOFORK	  11	/* Do inherit across fork.  */
-# define MADV_MERGEABLE	  12	/* KSM may merge identical pages.  */
-# define MADV_UNMERGEABLE 13	/* KSM may not merge identical pages.  */
-# define MADV_HUGEPAGE	  14	/* Worth backing with hugepages.  */
-# define MADV_NOHUGEPAGE  15	/* Not worth backing with hugepages.  */
-# define MADV_DONTDUMP	  16    /* Explicity exclude from the core dump,
-                                   overrides the coredump filter bits.  */
-# define MADV_DODUMP	  17	/* Clear the MADV_DONTDUMP flag.  */
-# define MADV_HWPOISON	  100	/* Poison a page for testing.  */
-#endif
-
-/* The POSIX people had to invent similar names for the same things.  */
-#ifdef __USE_XOPEN2K
-# define POSIX_MADV_NORMAL	0 /* No further special treatment.  */
-# define POSIX_MADV_RANDOM	1 /* Expect random page references.  */
-# define POSIX_MADV_SEQUENTIAL	2 /* Expect sequential page references.  */
-# define POSIX_MADV_WILLNEED	3 /* Will need these pages.  */
-# define POSIX_MADV_DONTNEED	4 /* Don't need these pages.  */
-#endif
+/* Include generic Linux declarations.  */
+#include <bits/mman-linux.h>
diff --git a/libc/sysdeps/unix/sysv/linux/x86/bits/msq.h b/libc/sysdeps/unix/sysv/linux/x86/bits/msq.h
index ef5cc3868..9355e465d 100644
--- a/libc/sysdeps/unix/sysv/linux/x86/bits/msq.h
+++ b/libc/sysdeps/unix/sysv/linux/x86/bits/msq.h
@@ -25,6 +25,7 @@
 #define MSG_NOERROR	010000	/* no error if message is too big */
 #ifdef __USE_GNU
 # define MSG_EXCEPT	020000	/* recv any msg except of specified type */
+# define MSG_COPY	040000	/* copy (not remove) all queue messages */
 #endif
 
 /* Types used in the structure definition.  */
diff --git a/libc/sysdeps/unix/sysv/linux/x86_64/x32/nptl/libpthread.abilist b/libc/sysdeps/unix/sysv/linux/x86_64/x32/nptl/libpthread.abilist
index ee6993291..b07d16f78 100644
--- a/libc/sysdeps/unix/sysv/linux/x86_64/x32/nptl/libpthread.abilist
+++ b/libc/sysdeps/unix/sysv/linux/x86_64/x32/nptl/libpthread.abilist
@@ -73,7 +73,6 @@ GLIBC_2.16
  pause F
  pread F
  pread64 F
- pthread_atfork F
  pthread_attr_destroy F
  pthread_attr_getaffinity_np F
  pthread_attr_getdetachstate F
diff --git a/libc/sysdeps/x86_64/fpu/libm-test-ulps b/libc/sysdeps/x86_64/fpu/libm-test-ulps
index b828774c7..f190ed881 100644
--- a/libc/sysdeps/x86_64/fpu/libm-test-ulps
+++ b/libc/sysdeps/x86_64/fpu/libm-test-ulps
@@ -2390,6 +2390,9 @@ ifloat: 1
 Test "j0 (0x1.d7ce3ap+107) == 2.775523647291230802651040996274861694514e-17":
 float: 2
 ifloat: 2
+Test "j0 (0x1p16382) == -1.2193782500509000574176799046642541129387e-2466":
+ildouble: 1
+ldouble: 1
 Test "j0 (10.0) == -0.245935764451348335197760862485328754":
 double: 2
 float: 1
@@ -2420,6 +2423,9 @@ ldouble: 1
 Test "j1 (0x1.ff00000000002p+840) == 1.846591691699331493194965158699937660696e-127":
 double: 1
 idouble: 1
+Test "j1 (0x1p16382) == 8.0839224448726336195866026476176740513439e-2467":
+ildouble: 1
+ldouble: 1
 Test "j1 (10.0) == 0.0434727461688614366697487680258592883":
 float: 2
 ifloat: 2
@@ -3073,6 +3079,9 @@ double: 1
 float: 1
 idouble: 1
 ifloat: 1
+Test "y0 (0x1p16382) == 8.0839224448726336195866026476176740513439e-2467":
+ildouble: 1
+ldouble: 1
 Test "y0 (1.0) == 0.0882569642156769579829267660235151628":
 double: 2
 float: 1
@@ -3117,6 +3126,9 @@ ldouble: 1
 Test "y1 (0x1p-10) == -6.5190099301063115047395187618929589514382e+02":
 double: 1
 idouble: 1
+Test "y1 (0x1p16382) == 1.2193782500509000574176799046642541129387e-2466":
+ildouble: 1
+ldouble: 1
 Test "y1 (1.5) == -0.412308626973911295952829820633445323":
 float: 1
 ifloat: 1
diff --git a/libc/sysdeps/x86_64/memset.S b/libc/sysdeps/x86_64/memset.S
index f3a4d448d..b393efe44 100644
--- a/libc/sysdeps/x86_64/memset.S
+++ b/libc/sysdeps/x86_64/memset.S
@@ -23,7 +23,7 @@
 #define __STOS_UPPER_BOUNDARY	$65536
 
 	.text
-#if !defined NOT_IN_libc && !defined USE_MULTIARCH
+#if !defined NOT_IN_libc
 ENTRY(__bzero)
 	mov	%rsi,%rdx	/* Adjust parameter.  */
 	xorl	%esi,%esi	/* Fill with 0s.  */
diff --git a/libc/sysdeps/x86_64/multiarch/Makefile b/libc/sysdeps/x86_64/multiarch/Makefile
index dd6c27d0b..86787ee6e 100644
--- a/libc/sysdeps/x86_64/multiarch/Makefile
+++ b/libc/sysdeps/x86_64/multiarch/Makefile
@@ -10,14 +10,12 @@ sysdep_routines += strncat-c stpncpy-c strncpy-c strcmp-ssse3 strncmp-ssse3 \
 		   strend-sse4 memcmp-sse4 memcpy-ssse3 mempcpy-ssse3 \
 		   memmove-ssse3 memcpy-ssse3-back mempcpy-ssse3-back \
 		   memmove-ssse3-back strcasestr-nonascii strcasecmp_l-ssse3 \
-		   strncase_l-ssse3 strlen-sse4 strlen-sse2-no-bsf memset-x86-64 \
+		   strncase_l-ssse3 strcat-ssse3 strncat-ssse3\
 		   strcpy-ssse3 strncpy-ssse3 stpcpy-ssse3 stpncpy-ssse3 \
 		   strcpy-sse2-unaligned strncpy-sse2-unaligned \
 		   stpcpy-sse2-unaligned stpncpy-sse2-unaligned \
 		   strcat-sse2-unaligned strncat-sse2-unaligned \
-		   strcat-ssse3 strncat-ssse3 strlen-sse2-pminub \
-		   strnlen-sse2-no-bsf strrchr-sse2-no-bsf strchr-sse2-no-bsf \
-		   memcmp-ssse3
+		   strrchr-sse2-no-bsf strchr-sse2-no-bsf memcmp-ssse3
 ifeq (yes,$(config-cflags-sse4))
 sysdep_routines += strcspn-c strpbrk-c strspn-c strstr-c strcasestr-c varshift
 CFLAGS-varshift.c += -msse4
diff --git a/libc/sysdeps/x86_64/multiarch/bzero.S b/libc/sysdeps/x86_64/multiarch/bzero.S
deleted file mode 100644
index 88e96ea8e..000000000
--- a/libc/sysdeps/x86_64/multiarch/bzero.S
+++ /dev/null
@@ -1,28 +0,0 @@
-/* bzero.  x86-64 version.
-   Copyright (C) 2010-2013 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-	.text
-ENTRY(__bzero)
-	mov	%rsi,%rdx	/* Adjust parameter.  */
-	xorl	%esi,%esi	/* Fill with 0s.  */
-	jmp	__libc_memset	/* Branch to IFUNC memset.  */
-END(__bzero)
-weak_alias (__bzero, bzero)
diff --git a/libc/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/libc/sysdeps/x86_64/multiarch/ifunc-impl-list.c
index 643cb2dd0..05315fdd7 100644
--- a/libc/sysdeps/x86_64/multiarch/ifunc-impl-list.c
+++ b/libc/sysdeps/x86_64/multiarch/ifunc-impl-list.c
@@ -61,17 +61,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 			      __memmove_ssse3)
 	      IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_sse2))
 
-  /* Support sysdeps/x86_64/multiarch/memset_chk.S.  */
-  IFUNC_IMPL (i, name, __memset_chk,
-	      IFUNC_IMPL_ADD (array, i, __memset_chk, 1, __memset_chk_sse2)
-	      IFUNC_IMPL_ADD (array, i, __memset_chk, 1,
-			      __memset_chk_x86_64))
-
-  /* Support sysdeps/x86_64/multiarch/memset.S.  */
-  IFUNC_IMPL (i, name, memset,
-	      IFUNC_IMPL_ADD (array, i, memset, 1, __memset_sse2)
-	      IFUNC_IMPL_ADD (array, i, memset, 1, __memset_x86_64))
-
   /* Support sysdeps/x86_64/multiarch/rawmemchr.S.  */
   IFUNC_IMPL (i, name, rawmemchr,
 	      IFUNC_IMPL_ADD (array, i, rawmemchr, HAS_SSE4_2,
@@ -187,11 +176,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 			      __strncpy_sse2_unaligned)
 	      IFUNC_IMPL_ADD (array, i, strncpy, 1, __strncpy_sse2))
 
-  /* Support sysdeps/x86_64/multiarch/strnlen.S.  */
-  IFUNC_IMPL (i, name, strnlen,
-	      IFUNC_IMPL_ADD (array, i, strnlen, 1, __strnlen_sse2_no_bsf)
-	      IFUNC_IMPL_ADD (array, i, strnlen, 1, __strnlen_sse2))
-
   /* Support sysdeps/x86_64/multiarch/strpbrk.S.  */
   IFUNC_IMPL (i, name, strpbrk,
 	      IFUNC_IMPL_ADD (array, i, strpbrk, HAS_SSE4_2,
@@ -262,14 +246,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 			      __mempcpy_ssse3)
 	      IFUNC_IMPL_ADD (array, i, mempcpy, 1, __mempcpy_sse2))
 
-  /* Support sysdeps/x86_64/multiarch/strlen.S.  */
-  IFUNC_IMPL (i, name, strlen,
-	      IFUNC_IMPL_ADD (array, i, strlen, HAS_SSE4_2, __strlen_sse42)
-	      IFUNC_IMPL_ADD (array, i, strlen, 1, __strlen_sse2_pminub)
-	      IFUNC_IMPL_ADD (array, i, strlen, 1, __strlen_sse2_no_bsf)
-	      IFUNC_IMPL_ADD (array, i, strlen, 1, __strlen_sse2)
-	      IFUNC_IMPL_ADD (array, i, strlen, 1, __strlen_sse2))
-
   /* Support sysdeps/x86_64/multiarch/strncmp.S.  */
   IFUNC_IMPL (i, name, strncmp,
 	      IFUNC_IMPL_ADD (array, i, strncmp, HAS_SSE4_2,
diff --git a/libc/sysdeps/x86_64/multiarch/init-arch.c b/libc/sysdeps/x86_64/multiarch/init-arch.c
index 992cbfb75..7daaf4609 100644
--- a/libc/sysdeps/x86_64/multiarch/init-arch.c
+++ b/libc/sysdeps/x86_64/multiarch/init-arch.c
@@ -58,11 +58,6 @@ __init_cpu_features (void)
 
       get_common_indeces (&family, &model);
 
-      /* Intel processors prefer SSE instruction for memory/string
-	 routines if they are available.  */
-      __cpu_features.feature[index_Prefer_SSE_for_memop]
-	|= bit_Prefer_SSE_for_memop;
-
       unsigned int eax = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax;
       unsigned int extended_family = (eax >> 20) & 0xff;
       unsigned int extended_model = (eax >> 12) & 0xf0;
@@ -125,12 +120,6 @@ __init_cpu_features (void)
 
       ecx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx;
 
-      /* AMD processors prefer SSE instructions for memory/string routines
-	 if they are available, otherwise they prefer integer instructions.  */
-      if ((ecx & 0x200))
-	__cpu_features.feature[index_Prefer_SSE_for_memop]
-	  |= bit_Prefer_SSE_for_memop;
-
       unsigned int eax;
       __cpuid (0x80000000, eax, ebx, ecx, edx);
       if (eax >= 0x80000001)
diff --git a/libc/sysdeps/x86_64/multiarch/init-arch.h b/libc/sysdeps/x86_64/multiarch/init-arch.h
index 0aece18de..28edbf7d0 100644
--- a/libc/sysdeps/x86_64/multiarch/init-arch.h
+++ b/libc/sysdeps/x86_64/multiarch/init-arch.h
@@ -18,7 +18,6 @@
 #define bit_Fast_Rep_String		(1 << 0)
 #define bit_Fast_Copy_Backward		(1 << 1)
 #define bit_Slow_BSF			(1 << 2)
-#define bit_Prefer_SSE_for_memop	(1 << 3)
 #define bit_Fast_Unaligned_Load		(1 << 4)
 #define bit_Prefer_PMINUB_for_stringop	(1 << 5)
 #define bit_AVX_Usable			(1 << 6)
@@ -58,7 +57,6 @@
 # define index_Fast_Rep_String		FEATURE_INDEX_1*FEATURE_SIZE
 # define index_Fast_Copy_Backward	FEATURE_INDEX_1*FEATURE_SIZE
 # define index_Slow_BSF			FEATURE_INDEX_1*FEATURE_SIZE
-# define index_Prefer_SSE_for_memop	FEATURE_INDEX_1*FEATURE_SIZE
 # define index_Fast_Unaligned_Load	FEATURE_INDEX_1*FEATURE_SIZE
 # define index_Prefer_PMINUB_for_stringop FEATURE_INDEX_1*FEATURE_SIZE
 # define index_AVX_Usable		FEATURE_INDEX_1*FEATURE_SIZE
@@ -157,7 +155,6 @@ extern const struct cpu_features *__get_cpu_features (void)
 # define index_Fast_Rep_String		FEATURE_INDEX_1
 # define index_Fast_Copy_Backward	FEATURE_INDEX_1
 # define index_Slow_BSF			FEATURE_INDEX_1
-# define index_Prefer_SSE_for_memop	FEATURE_INDEX_1
 # define index_Fast_Unaligned_Load	FEATURE_INDEX_1
 # define index_AVX_Usable		FEATURE_INDEX_1
 # define index_FMA_Usable		FEATURE_INDEX_1
@@ -169,7 +166,6 @@ extern const struct cpu_features *__get_cpu_features (void)
 # define HAS_FAST_REP_STRING		HAS_ARCH_FEATURE (Fast_Rep_String)
 # define HAS_FAST_COPY_BACKWARD		HAS_ARCH_FEATURE (Fast_Copy_Backward)
 # define HAS_SLOW_BSF			HAS_ARCH_FEATURE (Slow_BSF)
-# define HAS_PREFER_SSE_FOR_MEMOP	HAS_ARCH_FEATURE (Prefer_SSE_for_memop)
 # define HAS_FAST_UNALIGNED_LOAD	HAS_ARCH_FEATURE (Fast_Unaligned_Load)
 # define HAS_AVX			HAS_ARCH_FEATURE (AVX_Usable)
 # define HAS_FMA			HAS_ARCH_FEATURE (FMA_Usable)
diff --git a/libc/sysdeps/x86_64/multiarch/memset-x86-64.S b/libc/sysdeps/x86_64/multiarch/memset-x86-64.S
deleted file mode 100644
index 551d105d2..000000000
--- a/libc/sysdeps/x86_64/multiarch/memset-x86-64.S
+++ /dev/null
@@ -1,19 +0,0 @@
-#include <sysdep.h>
-
-#ifndef NOT_IN_libc
-# undef ENTRY_CHK
-# define ENTRY_CHK(name) \
-	.type __memset_chk_x86_64, @function; \
-	.globl __memset_chk_x86_64; \
-	.p2align 4; \
-	__memset_chk_x86_64: cfi_startproc; \
-	CALL_MCOUNT
-# undef END_CHK
-# define END_CHK(name) \
-	cfi_endproc; .size __memset_chk_x86_64, .-__memset_chk_x86_64
-
-# undef libc_hidden_builtin_def
-# define libc_hidden_builtin_def(name)
-# define memset __memset_x86_64
-# include "../memset.S"
-#endif
diff --git a/libc/sysdeps/x86_64/multiarch/memset.S b/libc/sysdeps/x86_64/multiarch/memset.S
deleted file mode 100644
index 7f673faa7..000000000
--- a/libc/sysdeps/x86_64/multiarch/memset.S
+++ /dev/null
@@ -1,79 +0,0 @@
-/* Multiple versions of memset
-   All versions must be listed in ifunc-impl-list.c.
-   Copyright (C) 2010-2013 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-/* Define multiple versions only for the definition in lib.  */
-#ifndef NOT_IN_libc
-ENTRY(memset)
-	.type	memset, @gnu_indirect_function
-	cmpl	$0, __cpu_features+KIND_OFFSET(%rip)
-	jne	1f
-	call	__init_cpu_features
-1:	leaq	__memset_x86_64(%rip), %rax
-	testl	$bit_Prefer_SSE_for_memop, __cpu_features+FEATURE_OFFSET+index_Prefer_SSE_for_memop(%rip)
-	jz	2f
-	leaq	__memset_sse2(%rip), %rax
-2:	ret
-END(memset)
-
-/* Define internal IFUNC memset for bzero.  */
-	.globl __libc_memset
-	.hidden __libc_memset
-	__libc_memset = memset
-
-# define USE_SSE2 1
-
-# undef ENTRY
-# define ENTRY(name) \
-	.type __memset_sse2, @function; \
-	.globl __memset_sse2; \
-	.p2align 4; \
-	__memset_sse2: cfi_startproc; \
-	CALL_MCOUNT
-# undef END
-# define END(name) \
-	cfi_endproc; .size __memset_sse2, .-__memset_sse2
-
-# undef ENTRY_CHK
-# define ENTRY_CHK(name) \
-	.type __memset_chk_sse2, @function; \
-	.globl __memset_chk_sse2; \
-	.p2align 4; \
-	__memset_chk_sse2: cfi_startproc; \
-	CALL_MCOUNT
-# undef END_CHK
-# define END_CHK(name) \
-	cfi_endproc; .size __memset_chk_sse2, .-__memset_chk_sse2
-
-# ifdef SHARED
-#  undef libc_hidden_builtin_def
-/* It doesn't make sense to send libc-internal memset calls through a PLT.
-   The speedup we get from using GPR instruction is likely eaten away
-   by the indirect call in the PLT.  */
-#  define libc_hidden_builtin_def(name) \
-	.globl __GI_memset; __GI_memset = __memset_sse2
-# endif
-
-# undef strong_alias
-# define strong_alias(original, alias)
-#endif
-
-#include "../memset.S"
diff --git a/libc/sysdeps/x86_64/multiarch/memset_chk.S b/libc/sysdeps/x86_64/multiarch/memset_chk.S
deleted file mode 100644
index 55e263542..000000000
--- a/libc/sysdeps/x86_64/multiarch/memset_chk.S
+++ /dev/null
@@ -1,44 +0,0 @@
-/* Multiple versions of __memset_chk
-   All versions must be listed in ifunc-impl-list.c.
-   Copyright (C) 2010-2013 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-/* Define multiple versions only for the definition in lib.  */
-#ifndef NOT_IN_libc
-# ifdef SHARED
-ENTRY(__memset_chk)
-	.type	__memset_chk, @gnu_indirect_function
-	cmpl	$0, __cpu_features+KIND_OFFSET(%rip)
-	jne	1f
-	call	__init_cpu_features
-1:	leaq	__memset_chk_x86_64(%rip), %rax
-	testl	$bit_Prefer_SSE_for_memop, __cpu_features+FEATURE_OFFSET+index_Prefer_SSE_for_memop(%rip)
-	jz	2f
-	leaq	__memset_chk_sse2(%rip), %rax
-2:	ret
-END(__memset_chk)
-
-strong_alias (__memset_chk, __memset_zero_constant_len_parameter)
-	.section .gnu.warning.__memset_zero_constant_len_parameter
-	.string "memset used with constant zero length parameter; this could be due to transposed parameters"
-# else
-#  include "../memset_chk.S"
-# endif
-#endif
diff --git a/libc/sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S b/libc/sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S
index 72bb60994..028c6d3d7 100644
--- a/libc/sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S
+++ b/libc/sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S
@@ -34,10 +34,236 @@ ENTRY (STRCAT)
 	mov	%rdx, %r8
 # endif
 
-# define RETURN  jmp L(StartStrcpyPart)
-# include "strlen-sse2-pminub.S"
-# undef RETURN
+/* Inline corresponding strlen file, temporary until new strcpy
+   implementation gets merged.  */
 
+	xor	%rax, %rax
+	mov	%edi, %ecx
+	and	$0x3f, %ecx
+	pxor	%xmm0, %xmm0
+	cmp	$0x30, %ecx
+	ja	L(next)
+	movdqu	(%rdi), %xmm1
+	pcmpeqb	%xmm1, %xmm0
+	pmovmskb %xmm0, %edx
+	test	%edx, %edx
+	jnz	L(exit_less16)
+	mov	%rdi, %rax
+	and	$-16, %rax
+	jmp	L(align16_start)
+L(next):
+	mov	%rdi, %rax
+	and	$-16, %rax
+	pcmpeqb	(%rax), %xmm0
+	mov	$-1, %r10d
+	sub	%rax, %rcx
+	shl	%cl, %r10d
+	pmovmskb %xmm0, %edx
+	and	%r10d, %edx
+	jnz	L(exit)
+
+L(align16_start):
+	pxor	%xmm0, %xmm0
+	pxor	%xmm1, %xmm1
+	pxor	%xmm2, %xmm2
+	pxor	%xmm3, %xmm3
+	pcmpeqb	16(%rax), %xmm0
+	pmovmskb %xmm0, %edx
+	test	%edx, %edx
+	jnz	L(exit16)
+
+	pcmpeqb	32(%rax), %xmm1
+	pmovmskb %xmm1, %edx
+	test	%edx, %edx
+	jnz	L(exit32)
+
+	pcmpeqb	48(%rax), %xmm2
+	pmovmskb %xmm2, %edx
+	test	%edx, %edx
+	jnz	L(exit48)
+
+	pcmpeqb	64(%rax), %xmm3
+	pmovmskb %xmm3, %edx
+	test	%edx, %edx
+	jnz	L(exit64)
+
+	pcmpeqb	80(%rax), %xmm0
+	add	$64, %rax
+	pmovmskb %xmm0, %edx
+	test	%edx, %edx
+	jnz	L(exit16)
+
+	pcmpeqb	32(%rax), %xmm1
+	pmovmskb %xmm1, %edx
+	test	%edx, %edx
+	jnz	L(exit32)
+
+	pcmpeqb	48(%rax), %xmm2
+	pmovmskb %xmm2, %edx
+	test	%edx, %edx
+	jnz	L(exit48)
+
+	pcmpeqb	64(%rax), %xmm3
+	pmovmskb %xmm3, %edx
+	test	%edx, %edx
+	jnz	L(exit64)
+
+	pcmpeqb	80(%rax), %xmm0
+	add	$64, %rax
+	pmovmskb %xmm0, %edx
+	test	%edx, %edx
+	jnz	L(exit16)
+
+	pcmpeqb	32(%rax), %xmm1
+	pmovmskb %xmm1, %edx
+	test	%edx, %edx
+	jnz	L(exit32)
+
+	pcmpeqb	48(%rax), %xmm2
+	pmovmskb %xmm2, %edx
+	test	%edx, %edx
+	jnz	L(exit48)
+
+	pcmpeqb	64(%rax), %xmm3
+	pmovmskb %xmm3, %edx
+	test	%edx, %edx
+	jnz	L(exit64)
+
+	pcmpeqb	80(%rax), %xmm0
+	add	$64, %rax
+	pmovmskb %xmm0, %edx
+	test	%edx, %edx
+	jnz	L(exit16)
+
+	pcmpeqb	32(%rax), %xmm1
+	pmovmskb %xmm1, %edx
+	test	%edx, %edx
+	jnz	L(exit32)
+
+	pcmpeqb	48(%rax), %xmm2
+	pmovmskb %xmm2, %edx
+	test	%edx, %edx
+	jnz	L(exit48)
+
+	pcmpeqb	64(%rax), %xmm3
+	pmovmskb %xmm3, %edx
+	test	%edx, %edx
+	jnz	L(exit64)
+
+	test	$0x3f, %rax
+	jz	L(align64_loop)
+
+	pcmpeqb	80(%rax), %xmm0
+	add	$80, %rax
+	pmovmskb %xmm0, %edx
+	test	%edx, %edx
+	jnz	L(exit)
+
+	test	$0x3f, %rax
+	jz	L(align64_loop)
+
+	pcmpeqb	16(%rax), %xmm1
+	add	$16, %rax
+	pmovmskb %xmm1, %edx
+	test	%edx, %edx
+	jnz	L(exit)
+
+	test	$0x3f, %rax
+	jz	L(align64_loop)
+
+	pcmpeqb	16(%rax), %xmm2
+	add	$16, %rax
+	pmovmskb %xmm2, %edx
+	test	%edx, %edx
+	jnz	L(exit)
+
+	test	$0x3f, %rax
+	jz	L(align64_loop)
+
+	pcmpeqb	16(%rax), %xmm3
+	add	$16, %rax
+	pmovmskb %xmm3, %edx
+	test	%edx, %edx
+	jnz	L(exit)
+
+	add	$16, %rax
+	.p2align 4
+	L(align64_loop):
+	movaps	(%rax),	%xmm4
+	pminub	16(%rax),	%xmm4
+	movaps	32(%rax),	%xmm5
+	pminub	48(%rax),	%xmm5
+	add	$64,	%rax
+	pminub	%xmm4,	%xmm5
+	pcmpeqb	%xmm0,	%xmm5
+	pmovmskb %xmm5,	%edx
+	test	%edx,	%edx
+	jz	L(align64_loop)
+
+	pcmpeqb	-64(%rax), %xmm0
+	sub	$80,	%rax
+	pmovmskb %xmm0, %edx
+	test	%edx, %edx
+	jnz	L(exit16)
+
+	pcmpeqb	32(%rax), %xmm1
+	pmovmskb %xmm1, %edx
+	test	%edx, %edx
+	jnz	L(exit32)
+
+	pcmpeqb	48(%rax), %xmm2
+	pmovmskb %xmm2, %edx
+	test	%edx, %edx
+	jnz	L(exit48)
+
+	pcmpeqb	64(%rax), %xmm3
+	pmovmskb %xmm3, %edx
+	sub	%rdi, %rax
+	bsf	%rdx, %rdx
+	add	%rdx, %rax
+	add	$64, %rax
+	jmp	L(StartStrcpyPart)
+
+	.p2align 4
+L(exit):
+	sub	%rdi, %rax
+L(exit_less16):
+	bsf	%rdx, %rdx
+	add	%rdx, %rax
+	jmp	L(StartStrcpyPart)
+
+	.p2align 4
+L(exit16):
+	sub	%rdi, %rax
+	bsf	%rdx, %rdx
+	add	%rdx, %rax
+	add	$16, %rax
+	jmp	L(StartStrcpyPart)
+
+	.p2align 4
+L(exit32):
+	sub	%rdi, %rax
+	bsf	%rdx, %rdx
+	add	%rdx, %rax
+	add	$32, %rax
+	jmp	L(StartStrcpyPart)
+
+	.p2align 4
+L(exit48):
+	sub	%rdi, %rax
+	bsf	%rdx, %rdx
+	add	%rdx, %rax
+	add	$48, %rax
+	jmp	L(StartStrcpyPart)
+
+	.p2align 4
+L(exit64):
+	sub	%rdi, %rax
+	bsf	%rdx, %rdx
+	add	%rdx, %rax
+	add	$64, %rax
+
+	.p2align 4
 L(StartStrcpyPart):
 	lea	(%r9, %rax), %rdi
 	mov	%rsi, %rcx
diff --git a/libc/sysdeps/x86_64/multiarch/strcat-ssse3.S b/libc/sysdeps/x86_64/multiarch/strcat-ssse3.S
index fea9d11b4..8101b91e5 100644
--- a/libc/sysdeps/x86_64/multiarch/strcat-ssse3.S
+++ b/libc/sysdeps/x86_64/multiarch/strcat-ssse3.S
@@ -33,11 +33,321 @@ ENTRY (STRCAT)
 	mov	%rdx, %r8
 # endif
 
-# define RETURN  jmp L(StartStrcpyPart)
-# include "strlen-sse2-no-bsf.S"
 
-# undef RETURN
+/* Inline corresponding strlen file, temporary until new strcpy
+   implementation gets merged.  */
+
+	xor	%eax, %eax
+	cmpb	$0, (%rdi)
+	jz	L(exit_tail0)
+	cmpb	$0, 1(%rdi)
+	jz	L(exit_tail1)
+	cmpb	$0, 2(%rdi)
+	jz	L(exit_tail2)
+	cmpb	$0, 3(%rdi)
+	jz	L(exit_tail3)
+
+	cmpb	$0, 4(%rdi)
+	jz	L(exit_tail4)
+	cmpb	$0, 5(%rdi)
+	jz	L(exit_tail5)
+	cmpb	$0, 6(%rdi)
+	jz	L(exit_tail6)
+	cmpb	$0, 7(%rdi)
+	jz	L(exit_tail7)
+
+	cmpb	$0, 8(%rdi)
+	jz	L(exit_tail8)
+	cmpb	$0, 9(%rdi)
+	jz	L(exit_tail9)
+	cmpb	$0, 10(%rdi)
+	jz	L(exit_tail10)
+	cmpb	$0, 11(%rdi)
+	jz	L(exit_tail11)
+
+	cmpb	$0, 12(%rdi)
+	jz	L(exit_tail12)
+	cmpb	$0, 13(%rdi)
+	jz	L(exit_tail13)
+	cmpb	$0, 14(%rdi)
+	jz	L(exit_tail14)
+	cmpb	$0, 15(%rdi)
+	jz	L(exit_tail15)
+	pxor	%xmm0, %xmm0
+	lea	16(%rdi), %rcx
+	lea	16(%rdi), %rax
+	and	$-16, %rax
+
+	pcmpeqb	(%rax), %xmm0
+	pmovmskb %xmm0, %edx
+	pxor	%xmm1, %xmm1
+	test	%edx, %edx
+	lea	16(%rax), %rax
+	jnz	L(exit)
+
+	pcmpeqb	(%rax), %xmm1
+	pmovmskb %xmm1, %edx
+	pxor	%xmm2, %xmm2
+	test	%edx, %edx
+	lea	16(%rax), %rax
+	jnz	L(exit)
+
+	pcmpeqb	(%rax), %xmm2
+	pmovmskb %xmm2, %edx
+	pxor	%xmm3, %xmm3
+	test	%edx, %edx
+	lea	16(%rax), %rax
+	jnz	L(exit)
+
+	pcmpeqb	(%rax), %xmm3
+	pmovmskb %xmm3, %edx
+	test	%edx, %edx
+	lea	16(%rax), %rax
+	jnz	L(exit)
+
+	pcmpeqb	(%rax), %xmm0
+	pmovmskb %xmm0, %edx
+	test	%edx, %edx
+	lea	16(%rax), %rax
+	jnz	L(exit)
+
+	pcmpeqb	(%rax), %xmm1
+	pmovmskb %xmm1, %edx
+	test	%edx, %edx
+	lea	16(%rax), %rax
+	jnz	L(exit)
+
+	pcmpeqb	(%rax), %xmm2
+	pmovmskb %xmm2, %edx
+	test	%edx, %edx
+	lea	16(%rax), %rax
+	jnz	L(exit)
+
+	pcmpeqb	(%rax), %xmm3
+	pmovmskb %xmm3, %edx
+	test	%edx, %edx
+	lea	16(%rax), %rax
+	jnz	L(exit)
+
+	pcmpeqb	(%rax), %xmm0
+	pmovmskb %xmm0, %edx
+	test	%edx, %edx
+	lea	16(%rax), %rax
+	jnz	L(exit)
+
+	pcmpeqb	(%rax), %xmm1
+	pmovmskb %xmm1, %edx
+	test	%edx, %edx
+	lea	16(%rax), %rax
+	jnz	L(exit)
+
+	pcmpeqb	(%rax), %xmm2
+	pmovmskb %xmm2, %edx
+	test	%edx, %edx
+	lea	16(%rax), %rax
+	jnz	L(exit)
+
+	pcmpeqb	(%rax), %xmm3
+	pmovmskb %xmm3, %edx
+	test	%edx, %edx
+	lea	16(%rax), %rax
+	jnz	L(exit)
+
+	pcmpeqb	(%rax), %xmm0
+	pmovmskb %xmm0, %edx
+	test	%edx, %edx
+	lea	16(%rax), %rax
+	jnz	L(exit)
+
+	pcmpeqb	(%rax), %xmm1
+	pmovmskb %xmm1, %edx
+	test	%edx, %edx
+	lea	16(%rax), %rax
+	jnz	L(exit)
+
+	pcmpeqb	(%rax), %xmm2
+	pmovmskb %xmm2, %edx
+	test	%edx, %edx
+	lea	16(%rax), %rax
+	jnz	L(exit)
+
+	pcmpeqb	(%rax), %xmm3
+	pmovmskb %xmm3, %edx
+	test	%edx, %edx
+	lea	16(%rax), %rax
+	jnz	L(exit)
+
+	and	$-0x40, %rax
 
+	.p2align 4
+L(aligned_64):
+	pcmpeqb	(%rax), %xmm0
+	pcmpeqb	16(%rax), %xmm1
+	pcmpeqb	32(%rax), %xmm2
+	pcmpeqb	48(%rax), %xmm3
+	pmovmskb %xmm0, %edx
+	pmovmskb %xmm1, %r11d
+	pmovmskb %xmm2, %r10d
+	pmovmskb %xmm3, %r9d
+	or	%edx, %r9d
+	or	%r11d, %r9d
+	or	%r10d, %r9d
+	lea	64(%rax), %rax
+	jz	L(aligned_64)
+
+	test	%edx, %edx
+	jnz	L(aligned_64_exit_16)
+	test	%r11d, %r11d
+	jnz	L(aligned_64_exit_32)
+	test	%r10d, %r10d
+	jnz	L(aligned_64_exit_48)
+
+L(aligned_64_exit_64):
+	pmovmskb %xmm3, %edx
+	jmp	L(exit)
+
+L(aligned_64_exit_48):
+	lea	-16(%rax), %rax
+	mov	%r10d, %edx
+	jmp	L(exit)
+
+L(aligned_64_exit_32):
+	lea	-32(%rax), %rax
+	mov	%r11d, %edx
+	jmp	L(exit)
+
+L(aligned_64_exit_16):
+	lea	-48(%rax), %rax
+
+L(exit):
+	sub	%rcx, %rax
+	test	%dl, %dl
+	jz	L(exit_high)
+	test	$0x01, %dl
+	jnz	L(exit_tail0)
+
+	test	$0x02, %dl
+	jnz	L(exit_tail1)
+
+	test	$0x04, %dl
+	jnz	L(exit_tail2)
+
+	test	$0x08, %dl
+	jnz	L(exit_tail3)
+
+	test	$0x10, %dl
+	jnz	L(exit_tail4)
+
+	test	$0x20, %dl
+	jnz	L(exit_tail5)
+
+	test	$0x40, %dl
+	jnz	L(exit_tail6)
+	add	$7, %eax
+L(exit_tail0):
+	jmp	L(StartStrcpyPart)
+
+	.p2align 4
+L(exit_high):
+	add	$8, %eax
+	test	$0x01, %dh
+	jnz	L(exit_tail0)
+
+	test	$0x02, %dh
+	jnz	L(exit_tail1)
+
+	test	$0x04, %dh
+	jnz	L(exit_tail2)
+
+	test	$0x08, %dh
+	jnz	L(exit_tail3)
+
+	test	$0x10, %dh
+	jnz	L(exit_tail4)
+
+	test	$0x20, %dh
+	jnz	L(exit_tail5)
+
+	test	$0x40, %dh
+	jnz	L(exit_tail6)
+	add	$7, %eax
+	jmp	L(StartStrcpyPart)
+
+	.p2align 4
+L(exit_tail1):
+	add	$1, %eax
+	jmp	L(StartStrcpyPart)
+
+	.p2align 4
+L(exit_tail2):
+	add	$2, %eax
+	jmp	L(StartStrcpyPart)
+
+	.p2align 4
+L(exit_tail3):
+	add	$3, %eax
+	jmp	L(StartStrcpyPart)
+
+	.p2align 4
+L(exit_tail4):
+	add	$4, %eax
+	jmp	L(StartStrcpyPart)
+
+	.p2align 4
+L(exit_tail5):
+	add	$5, %eax
+	jmp	L(StartStrcpyPart)
+
+	.p2align 4
+L(exit_tail6):
+	add	$6, %eax
+	jmp	L(StartStrcpyPart)
+
+	.p2align 4
+L(exit_tail7):
+	add	$7, %eax
+	jmp	L(StartStrcpyPart)
+
+	.p2align 4
+L(exit_tail8):
+	add	$8, %eax
+	jmp	L(StartStrcpyPart)
+
+	.p2align 4
+L(exit_tail9):
+	add	$9, %eax
+	jmp	L(StartStrcpyPart)
+
+	.p2align 4
+L(exit_tail10):
+	add	$10, %eax
+	jmp	L(StartStrcpyPart)
+
+	.p2align 4
+L(exit_tail11):
+	add	$11, %eax
+	jmp	L(StartStrcpyPart)
+
+	.p2align 4
+L(exit_tail12):
+	add	$12, %eax
+	jmp	L(StartStrcpyPart)
+
+	.p2align 4
+L(exit_tail13):
+	add	$13, %eax
+	jmp	L(StartStrcpyPart)
+
+	.p2align 4
+L(exit_tail14):
+	add	$14, %eax
+	jmp	L(StartStrcpyPart)
+
+	.p2align 4
+L(exit_tail15):
+	add	$15, %eax
+
+	.p2align 4
 L(StartStrcpyPart):
 	mov	%rsi, %rcx
 	lea	(%rdi, %rax), %rdx
diff --git a/libc/sysdeps/x86_64/multiarch/strlen-sse2-no-bsf.S b/libc/sysdeps/x86_64/multiarch/strlen-sse2-no-bsf.S
deleted file mode 100644
index ff2ab7004..000000000
--- a/libc/sysdeps/x86_64/multiarch/strlen-sse2-no-bsf.S
+++ /dev/null
@@ -1,685 +0,0 @@
-/* strlen SSE2 without bsf
-   Copyright (C) 2010-2013 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-/* only for strlen case we don't use optimized version for STATIC build just for SHARED */
-
-#if (defined SHARED || defined USE_AS_STRCAT || defined USE_AS_STRNLEN) && !defined NOT_IN_libc
-
-# ifndef USE_AS_STRCAT
-
-#  include <sysdep.h>
-
-#  define RETURN	ret
-
-#  ifndef STRLEN
-#   define STRLEN	__strlen_sse2_no_bsf
-#  endif
-
-	atom_text_section
-ENTRY (STRLEN)
-# endif
-	xor	%eax, %eax
-#  ifdef USE_AS_STRNLEN
-	mov	%rsi, %r8
-	sub	$4, %rsi
-	jbe	L(len_less4_prolog)
-#  endif
-	cmpb	$0, (%rdi)
-	jz	L(exit_tail0)
-	cmpb	$0, 1(%rdi)
-	jz	L(exit_tail1)
-	cmpb	$0, 2(%rdi)
-	jz	L(exit_tail2)
-	cmpb	$0, 3(%rdi)
-	jz	L(exit_tail3)
-
-# ifdef USE_AS_STRNLEN
-	sub	$4, %rsi
-	jbe	L(len_less8_prolog)
-# endif
-
-	cmpb	$0, 4(%rdi)
-	jz	L(exit_tail4)
-	cmpb	$0, 5(%rdi)
-	jz	L(exit_tail5)
-	cmpb	$0, 6(%rdi)
-	jz	L(exit_tail6)
-	cmpb	$0, 7(%rdi)
-	jz	L(exit_tail7)
-
-# ifdef USE_AS_STRNLEN
-	sub	$4, %rsi
-	jbe	L(len_less12_prolog)
-# endif
-
-	cmpb	$0, 8(%rdi)
-	jz	L(exit_tail8)
-	cmpb	$0, 9(%rdi)
-	jz	L(exit_tail9)
-	cmpb	$0, 10(%rdi)
-	jz	L(exit_tail10)
-	cmpb	$0, 11(%rdi)
-	jz	L(exit_tail11)
-
-# ifdef USE_AS_STRNLEN
-	sub	$4, %rsi
-	jbe	L(len_less16_prolog)
-# endif
-
-	cmpb	$0, 12(%rdi)
-	jz	L(exit_tail12)
-	cmpb	$0, 13(%rdi)
-	jz	L(exit_tail13)
-	cmpb	$0, 14(%rdi)
-	jz	L(exit_tail14)
-	cmpb	$0, 15(%rdi)
-	jz	L(exit_tail15)
-	pxor	%xmm0, %xmm0
-	lea	16(%rdi), %rcx
-	lea	16(%rdi), %rax
-	and	$-16, %rax
-
-# ifdef USE_AS_STRNLEN
-	and	$15, %rdi
-	add	%rdi, %rsi
-	sub	$64, %rsi
-	jbe	L(len_less64)
-# endif
-
-	pcmpeqb	(%rax), %xmm0
-	pmovmskb %xmm0, %edx
-	pxor	%xmm1, %xmm1
-	test	%edx, %edx
-	lea	16(%rax), %rax
-	jnz	L(exit)
-
-	pcmpeqb	(%rax), %xmm1
-	pmovmskb %xmm1, %edx
-	pxor	%xmm2, %xmm2
-	test	%edx, %edx
-	lea	16(%rax), %rax
-	jnz	L(exit)
-
-	pcmpeqb	(%rax), %xmm2
-	pmovmskb %xmm2, %edx
-	pxor	%xmm3, %xmm3
-	test	%edx, %edx
-	lea	16(%rax), %rax
-	jnz	L(exit)
-
-	pcmpeqb	(%rax), %xmm3
-	pmovmskb %xmm3, %edx
-	test	%edx, %edx
-	lea	16(%rax), %rax
-	jnz	L(exit)
-
-# ifdef USE_AS_STRNLEN
-	sub	$64, %rsi
-	jbe	L(len_less64)
-# endif
-
-	pcmpeqb	(%rax), %xmm0
-	pmovmskb %xmm0, %edx
-	test	%edx, %edx
-	lea	16(%rax), %rax
-	jnz	L(exit)
-
-	pcmpeqb	(%rax), %xmm1
-	pmovmskb %xmm1, %edx
-	test	%edx, %edx
-	lea	16(%rax), %rax
-	jnz	L(exit)
-
-	pcmpeqb	(%rax), %xmm2
-	pmovmskb %xmm2, %edx
-	test	%edx, %edx
-	lea	16(%rax), %rax
-	jnz	L(exit)
-
-	pcmpeqb	(%rax), %xmm3
-	pmovmskb %xmm3, %edx
-	test	%edx, %edx
-	lea	16(%rax), %rax
-	jnz	L(exit)
-
-# ifdef USE_AS_STRNLEN
-	sub	$64, %rsi
-	jbe	L(len_less64)
-# endif
-
-	pcmpeqb	(%rax), %xmm0
-	pmovmskb %xmm0, %edx
-	test	%edx, %edx
-	lea	16(%rax), %rax
-	jnz	L(exit)
-
-	pcmpeqb	(%rax), %xmm1
-	pmovmskb %xmm1, %edx
-	test	%edx, %edx
-	lea	16(%rax), %rax
-	jnz	L(exit)
-
-	pcmpeqb	(%rax), %xmm2
-	pmovmskb %xmm2, %edx
-	test	%edx, %edx
-	lea	16(%rax), %rax
-	jnz	L(exit)
-
-	pcmpeqb	(%rax), %xmm3
-	pmovmskb %xmm3, %edx
-	test	%edx, %edx
-	lea	16(%rax), %rax
-	jnz	L(exit)
-
-# ifdef USE_AS_STRNLEN
-	sub	$64, %rsi
-	jbe	L(len_less64)
-# endif
-
-	pcmpeqb	(%rax), %xmm0
-	pmovmskb %xmm0, %edx
-	test	%edx, %edx
-	lea	16(%rax), %rax
-	jnz	L(exit)
-
-	pcmpeqb	(%rax), %xmm1
-	pmovmskb %xmm1, %edx
-	test	%edx, %edx
-	lea	16(%rax), %rax
-	jnz	L(exit)
-
-	pcmpeqb	(%rax), %xmm2
-	pmovmskb %xmm2, %edx
-	test	%edx, %edx
-	lea	16(%rax), %rax
-	jnz	L(exit)
-
-	pcmpeqb	(%rax), %xmm3
-	pmovmskb %xmm3, %edx
-	test	%edx, %edx
-	lea	16(%rax), %rax
-	jnz	L(exit)
-
-# ifdef USE_AS_STRNLEN
-	mov	%rax, %rdx
-	and	$63, %rdx
-	add	%rdx, %rsi
-# endif
-
-	and	$-0x40, %rax
-
-	.p2align 4
-L(aligned_64):
-# ifdef USE_AS_STRNLEN
-	sub	$64, %rsi
-	jbe	L(len_less64)
-# endif
-	pcmpeqb	(%rax), %xmm0
-	pcmpeqb	16(%rax), %xmm1
-	pcmpeqb	32(%rax), %xmm2
-	pcmpeqb	48(%rax), %xmm3
-	pmovmskb %xmm0, %edx
-	pmovmskb %xmm1, %r11d
-	pmovmskb %xmm2, %r10d
-	pmovmskb %xmm3, %r9d
-	or	%edx, %r9d
-	or	%r11d, %r9d
-	or	%r10d, %r9d
-	lea	64(%rax), %rax
-	jz	L(aligned_64)
-
-	test	%edx, %edx
-	jnz	L(aligned_64_exit_16)
-	test	%r11d, %r11d
-	jnz	L(aligned_64_exit_32)
-	test	%r10d, %r10d
-	jnz	L(aligned_64_exit_48)
-L(aligned_64_exit_64):
-	pmovmskb %xmm3, %edx
-	jmp	L(aligned_64_exit)
-L(aligned_64_exit_48):
-	lea	-16(%rax), %rax
-	mov	%r10d, %edx
-	jmp	L(aligned_64_exit)
-L(aligned_64_exit_32):
-	lea	-32(%rax), %rax
-	mov	%r11d, %edx
-	jmp	L(aligned_64_exit)
-L(aligned_64_exit_16):
-	lea	-48(%rax), %rax
-L(aligned_64_exit):
-L(exit):
-	sub	%rcx, %rax
-	test	%dl, %dl
-	jz	L(exit_high)
-	test	$0x01, %dl
-	jnz	L(exit_tail0)
-
-	test	$0x02, %dl
-	jnz	L(exit_tail1)
-
-	test	$0x04, %dl
-	jnz	L(exit_tail2)
-
-	test	$0x08, %dl
-	jnz	L(exit_tail3)
-
-	test	$0x10, %dl
-	jnz	L(exit_tail4)
-
-	test	$0x20, %dl
-	jnz	L(exit_tail5)
-
-	test	$0x40, %dl
-	jnz	L(exit_tail6)
-	add	$7, %eax
-L(exit_tail0):
-	RETURN
-
-L(exit_high):
-	add	$8, %eax
-	test	$0x01, %dh
-	jnz	L(exit_tail0)
-
-	test	$0x02, %dh
-	jnz	L(exit_tail1)
-
-	test	$0x04, %dh
-	jnz	L(exit_tail2)
-
-	test	$0x08, %dh
-	jnz	L(exit_tail3)
-
-	test	$0x10, %dh
-	jnz	L(exit_tail4)
-
-	test	$0x20, %dh
-	jnz	L(exit_tail5)
-
-	test	$0x40, %dh
-	jnz	L(exit_tail6)
-	add	$7, %eax
-	RETURN
-
-# ifdef USE_AS_STRNLEN
-
-	.p2align 4
-L(len_less64):
-	pxor	%xmm0, %xmm0
-	add	$64, %rsi
-
-	pcmpeqb	(%rax), %xmm0
-	pmovmskb %xmm0, %edx
-	pxor	%xmm1, %xmm1
-	lea	16(%rax), %rax
-	test	%edx, %edx
-	jnz	L(strnlen_exit)
-
-	sub	$16, %rsi
-	jbe	L(return_start_len)
-
-	pcmpeqb	(%rax), %xmm1
-	pmovmskb %xmm1, %edx
-	lea	16(%rax), %rax
-	test	%edx, %edx
-	jnz	L(strnlen_exit)
-
-	sub	$16, %rsi
-	jbe	L(return_start_len)
-
-	pcmpeqb	(%rax), %xmm0
-	pmovmskb %xmm0, %edx
-	lea	16(%rax), %rax
-	test	%edx, %edx
-	jnz	L(strnlen_exit)
-
-	sub	$16, %rsi
-	jbe	L(return_start_len)
-
-	pcmpeqb	(%rax), %xmm1
-	pmovmskb %xmm1, %edx
-	lea	16(%rax), %rax
-	test	%edx, %edx
-	jnz	L(strnlen_exit)
-
-	mov	%r8, %rax
-	ret
-
-	.p2align 4
-L(strnlen_exit):
-	sub	%rcx, %rax
-
-	test	%dl, %dl
-	jz	L(strnlen_exit_high)
-	mov	%dl, %cl
-	and	$15, %cl
-	jz	L(strnlen_exit_8)
-	test	$0x01, %dl
-	jnz	L(exit_tail0)
-	test	$0x02, %dl
-	jnz	L(strnlen_exit_tail1)
-	test	$0x04, %dl
-	jnz	L(strnlen_exit_tail2)
-	sub	$4, %rsi
-	jb	L(return_start_len)
-	lea	3(%eax), %eax
-	ret
-
-	.p2align 4
-L(strnlen_exit_8):
-	test	$0x10, %dl
-	jnz	L(strnlen_exit_tail4)
-	test	$0x20, %dl
-	jnz	L(strnlen_exit_tail5)
-	test	$0x40, %dl
-	jnz	L(strnlen_exit_tail6)
-	sub	$8, %rsi
-	jb	L(return_start_len)
-	lea	7(%eax), %eax
-	ret
-
-	.p2align 4
-L(strnlen_exit_high):
-	mov	%dh, %ch
-	and	$15, %ch
-	jz	L(strnlen_exit_high_8)
-	test	$0x01, %dh
-	jnz	L(strnlen_exit_tail8)
-	test	$0x02, %dh
-	jnz	L(strnlen_exit_tail9)
-	test	$0x04, %dh
-	jnz	L(strnlen_exit_tail10)
-	sub	$12, %rsi
-	jb	L(return_start_len)
-	lea	11(%eax), %eax
-	ret
-
-	.p2align 4
-L(strnlen_exit_high_8):
-	test	$0x10, %dh
-	jnz	L(strnlen_exit_tail12)
-	test	$0x20, %dh
-	jnz	L(strnlen_exit_tail13)
-	test	$0x40, %dh
-	jnz	L(strnlen_exit_tail14)
-	sub	$16, %rsi
-	jb	L(return_start_len)
-	lea	15(%eax), %eax
-	ret
-
-	.p2align 4
-L(strnlen_exit_tail1):
-	sub	$2, %rsi
-	jb	L(return_start_len)
-	lea	1(%eax), %eax
-	ret
-
-	.p2align 4
-L(strnlen_exit_tail2):
-	sub	$3, %rsi
-	jb	L(return_start_len)
-	lea	2(%eax), %eax
-	ret
-
-	.p2align 4
-L(strnlen_exit_tail4):
-	sub	$5, %rsi
-	jb	L(return_start_len)
-	lea	4(%eax), %eax
-	ret
-
-	.p2align 4
-L(strnlen_exit_tail5):
-	sub	$6, %rsi
-	jb	L(return_start_len)
-	lea	5(%eax), %eax
-	ret
-
-	.p2align 4
-L(strnlen_exit_tail6):
-	sub	$7, %rsi
-	jb	L(return_start_len)
-	lea	6(%eax), %eax
-	ret
-
-	.p2align 4
-L(strnlen_exit_tail8):
-	sub	$9, %rsi
-	jb	L(return_start_len)
-	lea	8(%eax), %eax
-	ret
-
-	.p2align 4
-L(strnlen_exit_tail9):
-	sub	$10, %rsi
-	jb	L(return_start_len)
-	lea	9(%eax), %eax
-	ret
-
-	.p2align 4
-L(strnlen_exit_tail10):
-	sub	$11, %rsi
-	jb	L(return_start_len)
-	lea	10(%eax), %eax
-	ret
-
-	.p2align 4
-L(strnlen_exit_tail12):
-	sub	$13, %rsi
-	jb	L(return_start_len)
-	lea	12(%eax), %eax
-	ret
-
-	.p2align 4
-L(strnlen_exit_tail13):
-	sub	$14, %rsi
-	jb	L(return_start_len)
-	lea	13(%eax), %eax
-	ret
-
-	.p2align 4
-L(strnlen_exit_tail14):
-	sub	$15, %rsi
-	jb	L(return_start_len)
-	lea	14(%eax), %eax
-	ret
-
-	.p2align 4
-L(return_start_len):
-	mov	%r8, %rax
-	ret
-
-/* for prolog only */
-
-	.p2align 4
-L(len_less4_prolog):
-	add	$4, %rsi
-	jz	L(exit_tail0)
-
-	cmpb	$0, (%rdi)
-	jz	L(exit_tail0)
-	cmp	$1, %esi
-	je	L(exit_tail1)
-
-	cmpb	$0, 1(%rdi)
-	jz	L(exit_tail1)
-	cmp	$2, %esi
-	je	L(exit_tail2)
-
-	cmpb	$0, 2(%rdi)
-	jz	L(exit_tail2)
-	cmp	$3, %esi
-	je	L(exit_tail3)
-
-	cmpb	$0, 3(%rdi)
-	jz	L(exit_tail3)
-	mov	$4, %eax
-	ret
-
-	.p2align 4
-L(len_less8_prolog):
-	add	$4, %rsi
-
-	cmpb	$0, 4(%rdi)
-	jz	L(exit_tail4)
-	cmp	$1, %esi
-	je	L(exit_tail5)
-
-	cmpb	$0, 5(%rdi)
-	jz	L(exit_tail5)
-	cmp	$2, %esi
-	je	L(exit_tail6)
-
-	cmpb	$0, 6(%rdi)
-	jz	L(exit_tail6)
-	cmp	$3, %esi
-	je	L(exit_tail7)
-
-	cmpb	$0, 7(%rdi)
-	jz	L(exit_tail7)
-	mov	$8, %eax
-	ret
-
-	.p2align 4
-L(len_less12_prolog):
-	add	$4, %rsi
-
-	cmpb	$0, 8(%rdi)
-	jz	L(exit_tail8)
-	cmp	$1, %esi
-	je	L(exit_tail9)
-
-	cmpb	$0, 9(%rdi)
-	jz	L(exit_tail9)
-	cmp	$2, %esi
-	je	L(exit_tail10)
-
-	cmpb	$0, 10(%rdi)
-	jz	L(exit_tail10)
-	cmp	$3, %esi
-	je	L(exit_tail11)
-
-	cmpb	$0, 11(%rdi)
-	jz	L(exit_tail11)
-	mov	$12, %eax
-	ret
-
-	.p2align 4
-L(len_less16_prolog):
-	add	$4, %rsi
-
-	cmpb	$0, 12(%rdi)
-	jz	L(exit_tail12)
-	cmp	$1, %esi
-	je	L(exit_tail13)
-
-	cmpb	$0, 13(%rdi)
-	jz	L(exit_tail13)
-	cmp	$2, %esi
-	je	L(exit_tail14)
-
-	cmpb	$0, 14(%rdi)
-	jz	L(exit_tail14)
-	cmp	$3, %esi
-	je	L(exit_tail15)
-
-	cmpb	$0, 15(%rdi)
-	jz	L(exit_tail15)
-	mov	$16, %eax
-	ret
-# endif
-
-	.p2align 4
-L(exit_tail1):
-	add	$1, %eax
-	RETURN
-
-	.p2align 4
-L(exit_tail2):
-	add	$2, %eax
-	RETURN
-
-	.p2align 4
-L(exit_tail3):
-	add	$3, %eax
-	RETURN
-
-	.p2align 4
-L(exit_tail4):
-	add	$4, %eax
-	RETURN
-
-	.p2align 4
-L(exit_tail5):
-	add	$5, %eax
-	RETURN
-
-	.p2align 4
-L(exit_tail6):
-	add	$6, %eax
-	RETURN
-
-	.p2align 4
-L(exit_tail7):
-	add	$7, %eax
-	RETURN
-
-	.p2align 4
-L(exit_tail8):
-	add	$8, %eax
-	RETURN
-
-	.p2align 4
-L(exit_tail9):
-	add	$9, %eax
-	RETURN
-
-	.p2align 4
-L(exit_tail10):
-	add	$10, %eax
-	RETURN
-
-	.p2align 4
-L(exit_tail11):
-	add	$11, %eax
-	RETURN
-
-	.p2align 4
-L(exit_tail12):
-	add	$12, %eax
-	RETURN
-
-	.p2align 4
-L(exit_tail13):
-	add	$13, %eax
-	RETURN
-
-	.p2align 4
-L(exit_tail14):
-	add	$14, %eax
-	RETURN
-
-	.p2align 4
-L(exit_tail15):
-	add	$15, %eax
-# ifndef USE_AS_STRCAT
-	RETURN
-END (STRLEN)
-# endif
-#endif
diff --git a/libc/sysdeps/x86_64/multiarch/strlen-sse2-pminub.S b/libc/sysdeps/x86_64/multiarch/strlen-sse2-pminub.S
deleted file mode 100644
index cc4bb57e9..000000000
--- a/libc/sysdeps/x86_64/multiarch/strlen-sse2-pminub.S
+++ /dev/null
@@ -1,259 +0,0 @@
-/* strlen SSE2
-   Copyright (C) 2011-2013 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#if !defined NOT_IN_libc && (defined SHARED || defined USE_AS_STRCAT)
-
-# ifndef USE_AS_STRCAT
-
-#  include <sysdep.h>
-
-#  define RETURN ret
-
-	.section .text.sse2,"ax",@progbits
-ENTRY (__strlen_sse2_pminub)
-
-# endif
-	xor	%rax, %rax
-	mov	%edi, %ecx
-	and	$0x3f, %ecx
-	pxor	%xmm0, %xmm0
-	cmp	$0x30, %ecx
-	ja	L(next)
-	movdqu	(%rdi), %xmm1
-	pcmpeqb	%xmm1, %xmm0
-	pmovmskb %xmm0, %edx
-	test	%edx, %edx
-	jnz	L(exit_less16)
-	mov	%rdi, %rax
-	and	$-16, %rax
-	jmp	L(align16_start)
-L(next):
-	mov	%rdi, %rax
-	and	$-16, %rax
-	pcmpeqb	(%rax), %xmm0
-	mov	$-1, %r10d
-	sub	%rax, %rcx
-	shl	%cl, %r10d
-	pmovmskb %xmm0, %edx
-	and	%r10d, %edx
-	jnz	L(exit)
-L(align16_start):
-	pxor	%xmm0, %xmm0
-	pxor	%xmm1, %xmm1
-	pxor	%xmm2, %xmm2
-	pxor	%xmm3, %xmm3
-	pcmpeqb	16(%rax), %xmm0
-	pmovmskb %xmm0, %edx
-	test	%edx, %edx
-	jnz	L(exit16)
-
-	pcmpeqb	32(%rax), %xmm1
-	pmovmskb %xmm1, %edx
-	test	%edx, %edx
-	jnz	L(exit32)
-
-	pcmpeqb	48(%rax), %xmm2
-	pmovmskb %xmm2, %edx
-	test	%edx, %edx
-	jnz	L(exit48)
-
-	pcmpeqb	64(%rax), %xmm3
-	pmovmskb %xmm3, %edx
-	test	%edx, %edx
-	jnz	L(exit64)
-
-	pcmpeqb	80(%rax), %xmm0
-	add	$64, %rax
-	pmovmskb %xmm0, %edx
-	test	%edx, %edx
-	jnz	L(exit16)
-
-	pcmpeqb	32(%rax), %xmm1
-	pmovmskb %xmm1, %edx
-	test	%edx, %edx
-	jnz	L(exit32)
-
-	pcmpeqb	48(%rax), %xmm2
-	pmovmskb %xmm2, %edx
-	test	%edx, %edx
-	jnz	L(exit48)
-
-	pcmpeqb	64(%rax), %xmm3
-	pmovmskb %xmm3, %edx
-	test	%edx, %edx
-	jnz	L(exit64)
-
-	pcmpeqb	80(%rax), %xmm0
-	add	$64, %rax
-	pmovmskb %xmm0, %edx
-	test	%edx, %edx
-	jnz	L(exit16)
-
-	pcmpeqb	32(%rax), %xmm1
-	pmovmskb %xmm1, %edx
-	test	%edx, %edx
-	jnz	L(exit32)
-
-	pcmpeqb	48(%rax), %xmm2
-	pmovmskb %xmm2, %edx
-	test	%edx, %edx
-	jnz	L(exit48)
-
-	pcmpeqb	64(%rax), %xmm3
-	pmovmskb %xmm3, %edx
-	test	%edx, %edx
-	jnz	L(exit64)
-
-	pcmpeqb	80(%rax), %xmm0
-	add	$64, %rax
-	pmovmskb %xmm0, %edx
-	test	%edx, %edx
-	jnz	L(exit16)
-
-	pcmpeqb	32(%rax), %xmm1
-	pmovmskb %xmm1, %edx
-	test	%edx, %edx
-	jnz	L(exit32)
-
-	pcmpeqb	48(%rax), %xmm2
-	pmovmskb %xmm2, %edx
-	test	%edx, %edx
-	jnz	L(exit48)
-
-	pcmpeqb	64(%rax), %xmm3
-	pmovmskb %xmm3, %edx
-	test	%edx, %edx
-	jnz	L(exit64)
-
-
-	test	$0x3f, %rax
-	jz	L(align64_loop)
-
-	pcmpeqb	80(%rax), %xmm0
-	add	$80, %rax
-	pmovmskb %xmm0, %edx
-	test	%edx, %edx
-	jnz	L(exit)
-
-	test	$0x3f, %rax
-	jz	L(align64_loop)
-
-	pcmpeqb	16(%rax), %xmm1
-	add	$16, %rax
-	pmovmskb %xmm1, %edx
-	test	%edx, %edx
-	jnz	L(exit)
-
-	test	$0x3f, %rax
-	jz	L(align64_loop)
-
-	pcmpeqb	16(%rax), %xmm2
-	add	$16, %rax
-	pmovmskb %xmm2, %edx
-	test	%edx, %edx
-	jnz	L(exit)
-
-	test	$0x3f, %rax
-	jz	L(align64_loop)
-
-	pcmpeqb	16(%rax), %xmm3
-	add	$16, %rax
-	pmovmskb %xmm3, %edx
-	test	%edx, %edx
-	jnz	L(exit)
-
-	add	$16, %rax
-	.p2align 4
-	L(align64_loop):
-	movaps	(%rax),	%xmm4
-	pminub	16(%rax),	%xmm4
-	movaps	32(%rax),	%xmm5
-	pminub	48(%rax),	%xmm5
-	add	$64,	%rax
-	pminub	%xmm4,	%xmm5
-	pcmpeqb	%xmm0,	%xmm5
-	pmovmskb %xmm5,	%edx
-	test	%edx,	%edx
-	jz	L(align64_loop)
-
-
-	pcmpeqb	-64(%rax), %xmm0
-	sub	$80,	%rax
-	pmovmskb %xmm0, %edx
-	test	%edx, %edx
-	jnz	L(exit16)
-
-	pcmpeqb	32(%rax), %xmm1
-	pmovmskb %xmm1, %edx
-	test	%edx, %edx
-	jnz	L(exit32)
-
-	pcmpeqb	48(%rax), %xmm2
-	pmovmskb %xmm2, %edx
-	test	%edx, %edx
-	jnz	L(exit48)
-
-	pcmpeqb	64(%rax), %xmm3
-	pmovmskb %xmm3, %edx
-	sub	%rdi, %rax
-	bsf	%rdx, %rdx
-	add	%rdx, %rax
-	add	$64, %rax
-	RETURN
-
-	.p2align 4
-L(exit):
-	sub	%rdi, %rax
-L(exit_less16):
-	bsf	%rdx, %rdx
-	add	%rdx, %rax
-	RETURN
-	.p2align 4
-L(exit16):
-	sub	%rdi, %rax
-	bsf	%rdx, %rdx
-	add	%rdx, %rax
-	add	$16, %rax
-	RETURN
-	.p2align 4
-L(exit32):
-	sub	%rdi, %rax
-	bsf	%rdx, %rdx
-	add	%rdx, %rax
-	add	$32, %rax
-	RETURN
-	.p2align 4
-L(exit48):
-	sub	%rdi, %rax
-	bsf	%rdx, %rdx
-	add	%rdx, %rax
-	add	$48, %rax
-	RETURN
-	.p2align 4
-L(exit64):
-	sub	%rdi, %rax
-	bsf	%rdx, %rdx
-	add	%rdx, %rax
-	add	$64, %rax
-# ifndef USE_AS_STRCAT
-	RETURN
-
-END (__strlen_sse2_pminub)
-# endif
-#endif
diff --git a/libc/sysdeps/x86_64/multiarch/strlen-sse4.S b/libc/sysdeps/x86_64/multiarch/strlen-sse4.S
deleted file mode 100644
index 8d685df0c..000000000
--- a/libc/sysdeps/x86_64/multiarch/strlen-sse4.S
+++ /dev/null
@@ -1,84 +0,0 @@
-/* strlen with SSE4
-   Copyright (C) 2009-2013 Free Software Foundation, Inc.
-   Contributed by Ulrich Drepper <drepper@redhat.com>.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#if defined SHARED && !defined NOT_IN_libc
-
-#include <sysdep.h>
-
-	.section .text.sse4.2,"ax",@progbits
-ENTRY (__strlen_sse42)
-	pxor	%xmm1, %xmm1
-	movl	%edi, %ecx
-	movq	%rdi, %r8
-	andq	$~15, %rdi
-	xor	%edi, %ecx
-	pcmpeqb	(%rdi), %xmm1
-	pmovmskb %xmm1, %edx
-	shrl	%cl, %edx
-	shll	%cl, %edx
-	andl	%edx, %edx
-	jnz	L(less16bytes)
-	pxor	%xmm1, %xmm1
-
-	.p2align 4
-L(more64bytes_loop):
-	pcmpistri $0x08, 16(%rdi), %xmm1
-	jz	L(more32bytes)
-
-	pcmpistri $0x08, 32(%rdi), %xmm1
-	jz	L(more48bytes)
-
-	pcmpistri $0x08, 48(%rdi), %xmm1
-	jz	L(more64bytes)
-
-	add	$64, %rdi
-	pcmpistri $0x08, (%rdi), %xmm1
-	jnz	L(more64bytes_loop)
-	leaq	(%rdi,%rcx), %rax
-	subq	%r8, %rax
-	ret
-
-	.p2align 4
-L(more32bytes):
-	leaq	16(%rdi,%rcx, 1), %rax
-	subq	%r8, %rax
-	ret
-
-	.p2align 4
-L(more48bytes):
-	leaq	32(%rdi,%rcx, 1), %rax
-	subq	%r8, %rax
-	ret
-
-	.p2align 4
-L(more64bytes):
-	leaq	48(%rdi,%rcx, 1), %rax
-	subq	%r8, %rax
-	ret
-
-	.p2align 4
-L(less16bytes):
-	subq	%r8, %rdi
-	bsfl	%edx, %eax
-	addq	%rdi, %rax
-	ret
-
-END (__strlen_sse42)
-
-#endif
diff --git a/libc/sysdeps/x86_64/multiarch/strlen.S b/libc/sysdeps/x86_64/multiarch/strlen.S
deleted file mode 100644
index ab29ceff2..000000000
--- a/libc/sysdeps/x86_64/multiarch/strlen.S
+++ /dev/null
@@ -1,68 +0,0 @@
-/* Multiple versions of strlen(str) -- determine the length of the string STR.
-   All versions must be listed in ifunc-impl-list.c.
-   Copyright (C) 2009-2013 Free Software Foundation, Inc.
-   Contributed by Ulrich Drepper <drepper@redhat.com>.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-
-/* Define multiple versions only for the definition in libc and for
-   the DSO.  In static binaries we need strlen before the initialization
-   happened.  */
-#if defined SHARED && !defined NOT_IN_libc
-	.text
-ENTRY(strlen)
-	.type	strlen, @gnu_indirect_function
-	cmpl	$0, __cpu_features+KIND_OFFSET(%rip)
-	jne	1f
-	call	__init_cpu_features
-1:	leaq	__strlen_sse2_pminub(%rip), %rax
-	testl	$bit_Prefer_PMINUB_for_stringop, __cpu_features+FEATURE_OFFSET+index_Prefer_PMINUB_for_stringop(%rip)
-	jnz	2f
-	leaq	__strlen_sse2(%rip), %rax
-	testl	$bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip)
-	jz	2f
-	leaq	__strlen_sse42(%rip), %rax
-	ret
-2:	testl	$bit_Slow_BSF, __cpu_features+FEATURE_OFFSET+index_Slow_BSF(%rip)
-	jz	3f
-	leaq    __strlen_sse2_no_bsf(%rip), %rax
-3:	ret
-END(strlen)
-
-# undef ENTRY
-# define ENTRY(name) \
-	.type __strlen_sse2, @function; \
-	.align 16; \
-	.globl __strlen_sse2; \
-	.hidden __strlen_sse2; \
-	__strlen_sse2: cfi_startproc; \
-	CALL_MCOUNT
-# undef END
-# define END(name) \
-	cfi_endproc; .size __strlen_sse2, .-__strlen_sse2
-# undef libc_hidden_builtin_def
-/* It doesn't make sense to send libc-internal strlen calls through a PLT.
-   The speedup we get from using SSE4.2 instruction is likely eaten away
-   by the indirect call in the PLT.  */
-# define libc_hidden_builtin_def(name) \
-	.globl __GI_strlen; __GI_strlen = __strlen_sse2
-#endif
-
-#include "../strlen.S"
diff --git a/libc/sysdeps/x86_64/multiarch/strnlen-sse2-no-bsf.S b/libc/sysdeps/x86_64/multiarch/strnlen-sse2-no-bsf.S
deleted file mode 100644
index 248328d99..000000000
--- a/libc/sysdeps/x86_64/multiarch/strnlen-sse2-no-bsf.S
+++ /dev/null
@@ -1,3 +0,0 @@
-#define USE_AS_STRNLEN
-#define STRLEN __strnlen_sse2_no_bsf
-#include "strlen-sse2-no-bsf.S"
diff --git a/libc/sysdeps/x86_64/multiarch/strnlen.S b/libc/sysdeps/x86_64/multiarch/strnlen.S
deleted file mode 100644
index 124f8458a..000000000
--- a/libc/sysdeps/x86_64/multiarch/strnlen.S
+++ /dev/null
@@ -1,57 +0,0 @@
-/* multiple version of strnlen
-   All versions must be listed in ifunc-impl-list.c.
-   Copyright (C) 2011-2013 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-
-/* Define multiple versions only for the definition in libc.  */
-#ifndef NOT_IN_libc
-
-	.text
-ENTRY(__strnlen)
-	.type	__strnlen, @gnu_indirect_function
-	cmpl	$0, __cpu_features+KIND_OFFSET(%rip)
-	jne	1f
-	call	__init_cpu_features
-1:	leaq	__strnlen_sse2(%rip), %rax
-	testl	$bit_Slow_BSF, __cpu_features+FEATURE_OFFSET+index_Slow_BSF(%rip)
-	jz	2f
-	leaq	__strnlen_sse2_no_bsf(%rip), %rax
-2:	ret
-END(__strnlen)
-
-# undef ENTRY
-# define ENTRY(name) \
-	.type __strnlen_sse2, @function; \
-	.align 16; \
-	.globl __strnlen_sse2; \
-	.hidden __strnlen_sse2; \
-	__strnlen_sse2: cfi_startproc; \
-	CALL_MCOUNT
-# undef END
-# define END(name) \
-	cfi_endproc; .size __strnlen_sse2, .-__strnlen_sse2
-
-# undef libc_hidden_def
-# define libc_hidden_def(name) \
-	.globl __GI_strnlen; __GI_strnlen = __strnlen_sse2
-#endif
-
-#include "../strnlen.S"
diff --git a/libc/sysdeps/x86_64/preconfigure b/libc/sysdeps/x86_64/preconfigure
index ca9de7584..d5abba882 100644
--- a/libc/sysdeps/x86_64/preconfigure
+++ b/libc/sysdeps/x86_64/preconfigure
@@ -1,123 +1,3 @@
-
-# as_fn_set_status STATUS
-# -----------------------
-# Set $? to STATUS, without forking.
-as_fn_set_status ()
-{
-  return $1
-} # as_fn_set_status
-
-# as_fn_exit STATUS
-# -----------------
-# Exit the shell with STATUS, even in a "trap 0" or "set -e" context.
-as_fn_exit ()
-{
-  set +e
-  as_fn_set_status $1
-  exit $1
-} # as_fn_exit
-if expr a : '\(a\)' >/dev/null 2>&1 &&
-   test "X`expr 00001 : '.*\(...\)'`" = X001; then
-  as_expr=expr
-else
-  as_expr=false
-fi
-
-if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then
-  as_basename=basename
-else
-  as_basename=false
-fi
-
-as_me=`$as_basename -- "$0" ||
-$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \
-	 X"$0" : 'X\(//\)$' \| \
-	 X"$0" : 'X\(/\)' \| . 2>/dev/null ||
-$as_echo X/"$0" |
-    sed '/^.*\/\([^/][^/]*\)\/*$/{
-	    s//\1/
-	    q
-	  }
-	  /^X\/\(\/\/\)$/{
-	    s//\1/
-	    q
-	  }
-	  /^X\/\(\/\).*/{
-	    s//\1/
-	    q
-	  }
-	  s/.*/./; q'`
-
-
-  as_lineno_1=$LINENO as_lineno_1a=$LINENO
-  as_lineno_2=$LINENO as_lineno_2a=$LINENO
-  eval 'test "x$as_lineno_1'$as_run'" != "x$as_lineno_2'$as_run'" &&
-  test "x`expr $as_lineno_1'$as_run' + 1`" = "x$as_lineno_2'$as_run'"' || {
-  # Blame Lee E. McMahon (1931-1989) for sed's syntax.  :-)
-  sed -n '
-    p
-    /[$]LINENO/=
-  ' <$as_myself |
-    sed '
-      s/[$]LINENO.*/&-/
-      t lineno
-      b
-      :lineno
-      N
-      :loop
-      s/[$]LINENO\([^'$as_cr_alnum'_].*\n\)\(.*\)/\2\1\2/
-      t loop
-      s/-\n.*//
-    ' >$as_me.lineno &&
-  chmod +x "$as_me.lineno" ||
-    { $as_echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2; as_fn_exit 1; }
-
-  # Don't try to exec as it changes $[0], causing all sort of problems
-  # (the dirname of $[0] is not the place where we might find the
-  # original and so on.  Autoconf is especially sensitive to this).
-  . "./$as_me.lineno"
-  # Exit status is that of the last command.
-  exit
-}
-
-
-# ac_fn_c_try_compile LINENO
-# --------------------------
-# Try to compile conftest.$ac_ext, and return whether this succeeded.
-ac_fn_c_try_compile ()
-{
-  as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
-  rm -f conftest.$ac_objext
-  if { { ac_try="$ac_compile"
-case "(($ac_try" in
-  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
-  *) ac_try_echo=$ac_try;;
-esac
-eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
-$as_echo "$ac_try_echo"; } >&5
-  (eval "$ac_compile") 2>conftest.err
-  ac_status=$?
-  if test -s conftest.err; then
-    grep -v '^ *+' conftest.err >conftest.er1
-    cat conftest.er1 >&5
-    mv -f conftest.er1 conftest.err
-  fi
-  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
-  test $ac_status = 0; } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest.$ac_objext; then :
-  ac_retval=0
-else
-  $as_echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-	ac_retval=1
-fi
-  eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
-  as_fn_set_status $ac_retval
-
-} # ac_fn_c_try_compile
 # This file is generated from configure.in by Autoconf.  DO NOT EDIT!
  # Local preconfigure fragment for sysdeps/x86_64
 
diff --git a/libc/sysdeps/x86_64/strcat.S b/libc/sysdeps/x86_64/strcat.S
index 287ffd24c..8bea6fb5d 100644
--- a/libc/sysdeps/x86_64/strcat.S
+++ b/libc/sysdeps/x86_64/strcat.S
@@ -21,6 +21,7 @@
 #include <sysdep.h>
 #include "asm-syntax.h"
 
+/* Will be removed when new strcpy implementation gets merged.  */
 
 	.text
 ENTRY (strcat)
diff --git a/libc/sysdeps/x86_64/strlen.S b/libc/sysdeps/x86_64/strlen.S
index 4bdca0a45..eeb109221 100644
--- a/libc/sysdeps/x86_64/strlen.S
+++ b/libc/sysdeps/x86_64/strlen.S
@@ -1,6 +1,5 @@
-/* strlen(str) -- determine the length of the string STR.
-   Copyright (C) 2009-2013 Free Software Foundation, Inc.
-   Contributed by Ulrich Drepper <drepper@redhat.com>.
+/* SSE2 version of strlen.
+   Copyright (C) 2012-2013 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -19,83 +18,222 @@
 
 #include <sysdep.h>
 
+/* Long lived register in strlen(s), strnlen(s, n) are:
 
-	.text
+	%xmm11 - zero
+	%rdi   - s
+	%r10  (s+n) & (~(64-1))
+	%r11   s+n
+*/
+
+
+.text
 ENTRY(strlen)
+
+/* Test 64 bytes from %rax for zero. Save result as bitmask in %rdx.  */
+#define FIND_ZERO	\
+	pcmpeqb	(%rax), %xmm8;	\
+	pcmpeqb	16(%rax), %xmm9;	\
+	pcmpeqb	32(%rax), %xmm10;	\
+	pcmpeqb	48(%rax), %xmm11;	\
+	pmovmskb	%xmm8, %esi;	\
+	pmovmskb	%xmm9, %edx;	\
+	pmovmskb	%xmm10, %r8d;	\
+	pmovmskb	%xmm11, %ecx;	\
+	salq	$16, %rdx;	\
+	salq	$16, %rcx;	\
+	orq	%rsi, %rdx;	\
+	orq	%r8, %rcx;	\
+	salq	$32, %rcx;	\
+	orq	%rcx, %rdx;
+
+#ifdef AS_STRNLEN
+/* Do not read anything when n==0.  */
+	test	%rsi, %rsi
+	jne	L(n_nonzero)
 	xor	%rax, %rax
-	mov	%edi, %ecx
-	and	$0x3f, %ecx
-	pxor	%xmm0, %xmm0
-	cmp	$0x30, %ecx
-	ja	L(next)
-	movdqu	(%rdi), %xmm1
-	pcmpeqb	%xmm1, %xmm0
-	pmovmskb %xmm0, %edx
-	test	%edx, %edx
-	jnz	L(exit_less16)
-	mov	%rdi, %rax
-	and	$-16, %rax
-	jmp	L(align16_start)
-L(next):
-	mov	%rdi, %rax
-	and	$-16, %rax
-	pcmpeqb	(%rax), %xmm0
-	mov	$-1, %esi
-	sub	%rax, %rcx
-	shl	%cl, %esi
-	pmovmskb %xmm0, %edx
-	and	%esi, %edx
-	jnz	L(exit)
-L(align16_start):
-	pxor	%xmm0, %xmm0
-	pxor	%xmm1, %xmm1
-	pxor	%xmm2, %xmm2
-	pxor	%xmm3, %xmm3
-	.p2align 4
-L(align16_loop):
-	pcmpeqb	16(%rax), %xmm0
-	pmovmskb %xmm0, %edx
-	test	%edx, %edx
-	jnz	L(exit16)
+	ret
+L(n_nonzero):
 
-	pcmpeqb	32(%rax), %xmm1
-	pmovmskb %xmm1, %edx
-	test	%edx, %edx
-	jnz	L(exit32)
+/* Initialize long lived registers.  */
 
-	pcmpeqb	48(%rax), %xmm2
-	pmovmskb %xmm2, %edx
-	test	%edx, %edx
-	jnz	L(exit48)
+	add	%rdi, %rsi
+	mov	%rsi, %r10
+	and	$-64, %r10
+	mov	%rsi, %r11
+#endif
 
-	pcmpeqb	64(%rax), %xmm3
-	pmovmskb %xmm3, %edx
-	lea	64(%rax), %rax
+	pxor	%xmm8, %xmm8
+	pxor	%xmm9, %xmm9
+	pxor	%xmm10, %xmm10
+	pxor	%xmm11, %xmm11
+	movq	%rdi, %rax
+	movq	%rdi, %rcx
+	andq	$4095, %rcx
+/* Offsets 4032-4047 will be aligned into 4032 thus fit into page.  */
+	cmpq	$4047, %rcx
+/* We cannot unify this branching as it would be ~6 cycles slower.  */
+	ja	L(cross_page)
+
+#ifdef AS_STRNLEN
+/* Test if end is among first 64 bytes.  */
+# define STRNLEN_PROLOG	\
+	mov	%r11, %rsi;	\
+	subq	%rax, %rsi;	\
+	andq	$-64, %rax;	\
+	testq	$-64, %rsi;	\
+	je	L(strnlen_ret)
+#else
+# define STRNLEN_PROLOG  andq $-64, %rax;
+#endif
+
+/* Ignore bits in mask that come before start of string.  */
+#define PROLOG(lab)	\
+	movq	%rdi, %rcx;	\
+	xorq	%rax, %rcx;	\
+	STRNLEN_PROLOG;	\
+	sarq	%cl, %rdx;	\
+	test	%rdx, %rdx;	\
+	je	L(lab);	\
+	bsfq	%rdx, %rax;	\
+	ret
+
+#ifdef AS_STRNLEN
+	andq	$-16, %rax
+	FIND_ZERO
+#else
+	/* Test first 16 bytes unaligned.  */
+	movdqu	(%rax), %xmm12
+	pcmpeqb	%xmm8, %xmm12
+	pmovmskb	%xmm12, %edx
 	test	%edx, %edx
-	jz	L(align16_loop)
-L(exit):
-	sub	%rdi, %rax
-L(exit_less16):
-	bsf	%rdx, %rdx
-	add	%rdx, %rax
+	je 	L(next48_bytes)
+	bsf	%edx, %eax /* If eax is zeroed 16bit bsf can be used.  */
+	ret
+
+L(next48_bytes):
+/* Same as FIND_ZERO except we do not check first 16 bytes.  */
+	andq	$-16, %rax
+	pcmpeqb 16(%rax), %xmm9
+	pcmpeqb 32(%rax), %xmm10
+	pcmpeqb 48(%rax), %xmm11
+	pmovmskb	%xmm9, %edx
+	pmovmskb	%xmm10, %r8d
+	pmovmskb	%xmm11, %ecx
+	salq	$16, %rdx
+	salq	$16, %rcx
+	orq	%r8, %rcx
+	salq	$32, %rcx
+	orq	%rcx, %rdx
+#endif
+
+	/* When no zero byte is found xmm9-11 are zero so we do not have to
+	   zero them.  */
+	PROLOG(loop)
+
+	.p2align 4
+L(cross_page):
+	andq	$-64, %rax
+	FIND_ZERO
+	PROLOG(loop_init)
+
+#ifdef AS_STRNLEN
+/* We must do this check to correctly handle strnlen (s, -1).  */
+L(strnlen_ret):
+	bts	%rsi, %rdx
+	sarq	%cl, %rdx
+	test	%rdx, %rdx
+	je	L(loop_init)
+	bsfq	%rdx, %rax
 	ret
+#endif
+	.p2align 4
+L(loop_init):
+	pxor	%xmm9, %xmm9
+	pxor	%xmm10, %xmm10
+	pxor	%xmm11, %xmm11
+#ifdef AS_STRNLEN
+	.p2align 4
+L(loop):
+
+	addq	$64, %rax
+	cmpq	%rax, %r10
+	je	L(exit_end)
+
+	movdqa	(%rax), %xmm8
+	pminub	16(%rax), %xmm8
+	pminub	32(%rax), %xmm8
+	pminub	48(%rax), %xmm8
+	pcmpeqb	%xmm11, %xmm8
+	pmovmskb	%xmm8, %edx
+	testl	%edx, %edx
+	jne	L(exit)
+	jmp	L(loop)
+
 	.p2align 4
-L(exit16):
-	sub	%rdi, %rax
-	bsf	%rdx, %rdx
-	lea	16(%rdx,%rax), %rax
+L(exit_end):
+	cmp	%rax, %r11
+	je	L(first) /* Do not read when end is at page boundary.  */
+	pxor	%xmm8, %xmm8
+	FIND_ZERO
+
+L(first):
+	bts	%r11, %rdx
+	bsfq	%rdx, %rdx
+	addq	%rdx, %rax
+	subq	%rdi, %rax
 	ret
+
 	.p2align 4
-L(exit32):
-	sub	%rdi, %rax
-	bsf	%rdx, %rdx
-	lea	32(%rdx,%rax), %rax
+L(exit):
+	pxor	%xmm8, %xmm8
+	FIND_ZERO
+
+	bsfq	%rdx, %rdx
+	addq	%rdx, %rax
+	subq	%rdi, %rax
 	ret
+
+#else
+
+	/* Main loop.  Unrolled twice to improve L2 cache performance on core2.  */
+	.p2align 4
+L(loop):
+
+	movdqa	64(%rax), %xmm8
+	pminub	80(%rax), %xmm8
+	pminub	96(%rax), %xmm8
+	pminub	112(%rax), %xmm8
+	pcmpeqb	%xmm11, %xmm8
+	pmovmskb	%xmm8, %edx
+	testl	%edx, %edx
+	jne	L(exit64)
+
+	subq	$-128, %rax
+
+	movdqa	(%rax), %xmm8
+	pminub	16(%rax), %xmm8
+	pminub	32(%rax), %xmm8
+	pminub	48(%rax), %xmm8
+	pcmpeqb	%xmm11, %xmm8
+	pmovmskb	%xmm8, %edx
+	testl	%edx, %edx
+	jne	L(exit0)
+	jmp	L(loop)
+
 	.p2align 4
-L(exit48):
-	sub	%rdi, %rax
-	bsf	%rdx, %rdx
-	lea	48(%rdx,%rax), %rax
+L(exit64):
+	addq	$64, %rax
+L(exit0):
+	pxor	%xmm8, %xmm8
+	FIND_ZERO
+
+	bsfq	%rdx, %rdx
+	addq	%rdx, %rax
+	subq	%rdi, %rax
 	ret
+
+#endif
+
 END(strlen)
 libc_hidden_builtin_def (strlen)
diff --git a/libc/sysdeps/x86_64/strnlen.S b/libc/sysdeps/x86_64/strnlen.S
index 6e5350306..d3c43ac48 100644
--- a/libc/sysdeps/x86_64/strnlen.S
+++ b/libc/sysdeps/x86_64/strnlen.S
@@ -1,63 +1,6 @@
-/* strnlen(str,maxlen) -- determine the length of the string STR up to MAXLEN.
-   Copyright (C) 2010-2013 Free Software Foundation, Inc.
-   Contributed by Ulrich Drepper <drepper@redhat.com>.
-   This file is part of the GNU C Library.
+#define AS_STRNLEN
+#define strlen __strnlen
+#include "strlen.S"
 
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-
-
-	.text
-ENTRY(__strnlen)
-	movq	%rsi, %rax
-	testq	%rsi, %rsi
-	jz	3f
-	pxor	%xmm2, %xmm2
-	movq	%rdi, %rcx
-	movq	%rdi, %r8
-	movq	$16, %r9
-	andq	$~15, %rdi
-	movdqa	%xmm2, %xmm1
-	pcmpeqb	(%rdi), %xmm2
-	orl	$0xffffffff, %r10d
-	subq	%rdi, %rcx
-	shll	%cl, %r10d
-	subq	%rcx, %r9
-	pmovmskb %xmm2, %edx
-	andl	%r10d, %edx
-	jnz	1f
-	subq	%r9, %rsi
-	jbe	3f
-
-2:	movdqa	16(%rdi), %xmm0
-	leaq	16(%rdi), %rdi
-	pcmpeqb	%xmm1, %xmm0
-	pmovmskb %xmm0, %edx
-	testl	%edx, %edx
-	jnz	1f
-	subq	$16, %rsi
-	jnbe	2b
-3:	ret
-
-1:	subq	%r8, %rdi
-	bsfl	%edx, %edx
-	addq	%rdi, %rdx
-	cmpq	%rdx, %rax
-	cmovnbq	%rdx, %rax
-	ret
-END(__strnlen)
-weak_alias (__strnlen, strnlen)
-libc_hidden_def (strnlen)
+weak_alias (__strnlen, strnlen);
+libc_hidden_builtin_def (strnlen)